随着越来越多的企业和组织开始上云,云上的成本管理 Cost Management 越来越成为人们关注的重点。OpenCost1KubeCost2 的开源版本,并且在最近刚刚加入 CNCF Sandbox3 。本文主要将介绍 OpenCost 的使用,架构和实现。

背景

参见 Cost Management,主要介绍在当前用云模式下成本管理的重要性,以及越来越多的解决方案。

使用

OpenCost Install

架构

实现

现在最大的问题就是,不知道 KubeCost 怎么算的钱,钱是否准确?

  • BaseCPUPrice
  • BaseMemoryPrice
  • DownloadPricingData

从前端页面展示开始

http://kubecost.test.hulu.com:9090/allocations?idle=separate&window=2023-01-08T00%3A00%3A00Z%2C2023-01-15T00%3A00%3A00Z&agg=team

http://kubecost.test.hulu.com:9090/model/allocation/view?aggregate=team&window=2023-01-08T00%3A00%3A00Z%2C2023-01-15T00%3A00%3A00Z&accumulate=true&shareIdle=false&idle=true&idleByNode=false&shareTenancyCosts=true&shareNamespaces=&shareCost=NaN&shareSplit=weighted&chartType=1&costMetric=1&startIndex=0&maxResults=0&req=1674974173877?req=1674974173877

对应 payload

aggregate: team  
window: 2023-01-08T00:00:00Z,2023-01-15T00:00:00Z  
accumulate: true  
shareIdle: false  
idle: true  
idleByNode: false  
shareTenancyCosts: true  
shareNamespaces:  
shareCost: NaN  
shareSplit: weighted  
chartType: 1  
costMetric: 1  
startIndex: 0  
maxResults: 0  
req: 1674974173877

Response

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
{
    "initialResponse": {
        "graphData": {
            "topResultsGraph": {
                "items": [
                    {
                        "name": "__idle__",
                        "cost": 206.93661,
                        "efficiency": 0
                    },
                    {
                        "name": "__unallocated__",
                        "cost": 203.38568,
                        "efficiency": 0.325487
                    },
                    {
                        "name": "unknown",
                        "cost": 39.255405,
                        "efficiency": 0.029567076
                    },
                    {
                        "name": "data-core",
                        "cost": 25.477795,
                        "efficiency": 0.025389424
                    },
                    {
                        "name": "targeting",
                        "cost": 6.3039703,
                        "efficiency": 0.025157884
                    },
                    {
                        "name": "personalization",
                        "cost": 1.3340744,
                        "efficiency": 0.025484152
                    },
                    {
                        "name": "researcher",
                        "cost": 1.3186032,
                        "efficiency": 0.024420282
                    },
                    {
                        "name": "data-metrics",
                        "cost": 1.3174559,
                        "efficiency": 0.025455672
                    },
                    {
                        "name": "measurement",
                        "cost": 1.292863,
                        "efficiency": 0.025255583
                    },
                    {
                        "name": "segmentation",
                        "cost": 1.1671263,
                        "efficiency": 0.024859745
                    },
                    {
                        "name": "decisioning",
                        "cost": 1.109542,
                        "efficiency": 0.02370197
                    },
                    {
                        "name": "others",
                        "cost": 0.01907245,
                        "efficiency": 0.21839446
                    }
                ]
            }
        },
        "totalRow": {
            "name": "Totals",
            "cpuCost": 302.57953,
            "gpuCost": 0,
            "ramCost": 156.6476,
            "pvCost": 15.842329,
            "networkCost": 0,
            "loadBalancerCost": 13.84875,
            "sharedCost": 0,
            "externalCost": 0,
            "averageCpuUtilization": 0.1897173,
            "averageRamUtilization": 0.37763026,
            "efficiency": 0.21839446,
            "totalCost": 488.9182
        },
        "totalItems": 12
    },
    "tableResults": {
        "startIndex": 0,
        "tableItems": [
            {
                "name": "__idle__",
                "cpuCost": 88.79077,
                "gpuCost": 0,
                "ramCost": 118.14584,
                "pvCost": 0,
                "networkCost": 0,
                "loadBalancerCost": 0,
                "sharedCost": 0,
                "externalCost": 0,
                "averageCpuUtilization": 0,
                "averageRamUtilization": 0,
                "efficiency": 0,
                "totalCost": 206.93661
            },
            {
                "name": "__unallocated__",
                "cpuCost": 142.52498,
                "gpuCost": 0,
                "ramCost": 31.169619,
                "pvCost": 15.842329,
                "networkCost": 0,
                "loadBalancerCost": 13.84875,
                "sharedCost": 0,
                "externalCost": 0,
                "averageCpuUtilization": 0.30877534,
                "averageRamUtilization": 0.40190205,
                "efficiency": 0.325487,
                "totalCost": 203.38568
            },
            {
                "name": "unknown",
                "cpuCost": 35.19112,
                "gpuCost": 0,
                "ramCost": 4.0642867,
                "pvCost": 0,
                "networkCost": 0,
                "loadBalancerCost": 0,
                "sharedCost": 0,
                "externalCost": 0,
                "averageCpuUtilization": 0.00058239116,
                "averageRamUtilization": 0.2805345,
                "efficiency": 0.029567076,
                "totalCost": 39.255405
            },
            {
                "name": "data-core",
                "cpuCost": 23.361794,
                "gpuCost": 0,
                "ramCost": 2.1160026,
                "pvCost": 0,
                "networkCost": 0,
                "loadBalancerCost": 0,
                "sharedCost": 0,
                "externalCost": 0,
                "averageCpuUtilization": 0.0011240747,
                "averageRamUtilization": 0.29329178,
                "efficiency": 0.025389424,
                "totalCost": 25.477795
            },
            {
                "name": "targeting",
                "cpuCost": 5.7804084,
                "gpuCost": 0,
                "ramCost": 0.5235622,
                "pvCost": 0,
                "networkCost": 0,
                "loadBalancerCost": 0,
                "sharedCost": 0,
                "externalCost": 0,
                "averageCpuUtilization": 0.0009608587,
                "averageRamUtilization": 0.29230607,
                "efficiency": 0.025157884,
                "totalCost": 6.3039703
            },
            {
                "name": "personalization",
                "cpuCost": 1.2232746,
                "gpuCost": 0,
                "ramCost": 0.110799745,
                "pvCost": 0,
                "networkCost": 0,
                "loadBalancerCost": 0,
                "sharedCost": 0,
                "externalCost": 0,
                "averageCpuUtilization": 0.0008683359,
                "averageRamUtilization": 0.29725286,
                "efficiency": 0.025484152,
                "totalCost": 1.3340744
            },
            {
                "name": "researcher",
                "cpuCost": 1.2090883,
                "gpuCost": 0,
                "ramCost": 0.10951474,
                "pvCost": 0,
                "networkCost": 0,
                "loadBalancerCost": 0,
                "sharedCost": 0,
                "externalCost": 0,
                "averageCpuUtilization": 0.00064384757,
                "averageRamUtilization": 0.28692204,
                "efficiency": 0.024420282,
                "totalCost": 1.3186032
            },
            {
                "name": "data-metrics",
                "cpuCost": 1.2080364,
                "gpuCost": 0,
                "ramCost": 0.10941946,
                "pvCost": 0,
                "networkCost": 0,
                "loadBalancerCost": 0,
                "sharedCost": 0,
                "externalCost": 0,
                "averageCpuUtilization": 0.00096174324,
                "averageRamUtilization": 0.29587886,
                "efficiency": 0.025455672,
                "totalCost": 1.3174559
            },
            {
                "name": "measurement",
                "cpuCost": 1.1854862,
                "gpuCost": 0,
                "ramCost": 0.10737674,
                "pvCost": 0,
                "networkCost": 0,
                "loadBalancerCost": 0,
                "sharedCost": 0,
                "externalCost": 0,
                "averageCpuUtilization": 0.0009781235,
                "averageRamUtilization": 0.29328936,
                "efficiency": 0.025255583,
                "totalCost": 1.292863
            },
            {
                "name": "segmentation",
                "cpuCost": 1.0701923,
                "gpuCost": 0,
                "ramCost": 0.096933894,
                "pvCost": 0,
                "networkCost": 0,
                "loadBalancerCost": 0,
                "sharedCost": 0,
                "externalCost": 0,
                "averageCpuUtilization": 0.0011511084,
                "averageRamUtilization": 0.2866134,
                "efficiency": 0.024859745,
                "totalCost": 1.1671263
            },
            {
                "name": "decisioning",
                "cpuCost": 1.0172931,
                "gpuCost": 0,
                "ramCost": 0.092248864,
                "pvCost": 0,
                "networkCost": 0,
                "loadBalancerCost": 0,
                "sharedCost": 0,
                "externalCost": 0,
                "averageCpuUtilization": 0.001340874,
                "averageRamUtilization": 0.2702935,
                "efficiency": 0.02370197,
                "totalCost": 1.109542
            },
            {
                "name": "default",
                "cpuCost": 0.017094458,
                "gpuCost": 0,
                "ramCost": 0.0019779932,
                "pvCost": 0,
                "networkCost": 0,
                "loadBalancerCost": 0,
                "sharedCost": 0,
                "externalCost": 0,
                "averageCpuUtilization": 0.0030613337,
                "averageRamUtilization": 0.34745252,
                "efficiency": 0.03877795,
                "totalCost": 0.01907245
            }
        ]
    },
    "warnings": "",
    "errors": ""
}

前端请求响应处理: 参数:

  • aggregate:聚合类型
  • window: 聚合时间窗口
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
class AllocationService {  
  BASE_URL = process.env.BASE_URL || 'http://localhost:9090/allocation';  
  
  async fetchAllocation(win, aggregate, options) {  
    const { accumulate, filters, } = options;  
    const params = {  
      window: win,  
      aggregate: aggregate,  
      step: '1d',  
    };  
    if (typeof accumulate === 'boolean') {  
      params.accumulate = accumulate;  
    }  
    const result = await axios.get(`${this.BASE_URL}/compute`, { params });  
    return result.data;  
  }  
}  
  
export default new AllocationService();

问题,前端的请求,是怎么传递给后端的 cost-model 的呢?这里 /allocation 处理后,进一步转给了 /allocation/compute ,但是在前端代码里面没有找到 /allocation/compute 的处理逻辑。在 cost-model 里面找到了对应的 webserver handler https://github.com/opencost/opencost/blob/develop/pkg/costmodel/router.go#L1744

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
func Initialize(additionalConfigWatchers ...*watcher.ConfigMapWatcher) *Accesses {  
   a := &Accesses{  
      Router:              httprouter.New(),  
      PrometheusClient:    promCli,  
      ThanosClient:        thanosClient,  
      KubeClientSet:       kubeClientset,  
      ClusterCache:        k8sCache,  
      ClusterMap:          clusterMap,  
      CloudProvider:       cloudProvider,  
      ConfigFileManager:   confManager,  
      ClusterInfoProvider: clusterInfoProvider,  
      Model:               costModel,  
      MetricsEmitter:      metricsEmitter,  
      AggregateCache:      aggregateCache,  
      CostDataCache:       costDataCache,  
      ClusterCostsCache:   clusterCostsCache,  
      OutOfClusterCache:   outOfClusterCache,  
      SettingsCache:       settingsCache,  
      CacheExpiration:     cacheExpiration,  
      httpServices:        services.NewCostModelServices(),  
   }  
   // Use the Accesses instance, itself, as the CostModelAggregator. This is  
   // confusing and unconventional, but necessary so that we can swap it   // out for the ETL-adapted version elsewhere.   // TODO clean this up once ETL is open-sourced.  
   a.AggAPI = a  
  
   // Initialize mechanism for subscribing to settings changes  
   a.InitializeSettingsPubSub()  
   err = a.CloudProvider.DownloadPricingData()  
   if err != nil {  
      log.Infof("Failed to download pricing data: " + err.Error())  
   }  
  
   // Warm the aggregate cache unless explicitly set to false  
   if env.IsCacheWarmingEnabled() {  
      log.Infof("Init: AggregateCostModel cache warming enabled")  
      a.warmAggregateCostModelCache()  
   } else {  
      log.Infof("Init: AggregateCostModel cache warming disabled")  
   }  
  
   if !env.IsKubecostMetricsPodEnabled() {  
      a.MetricsEmitter.Start()  
   }  
  
   a.Router.GET("/costDataModel", a.CostDataModel)  
   a.Router.GET("/costDataModelRange", a.CostDataModelRange)  
   a.Router.GET("/aggregatedCostModel", a.AggregateCostModelHandler)  
   a.Router.GET("/allocation/compute", a.ComputeAllocationHandler)  
   a.Router.GET("/allocation/compute/summary", a.ComputeAllocationHandlerSummary)  
   a.Router.GET("/allNodePricing", a.GetAllNodePricing)  
   a.Router.POST("/refreshPricing", a.RefreshPricingData)  
   a.Router.GET("/clusterCostsOverTime", a.ClusterCostsOverTime)  
   a.Router.GET("/clusterCosts", a.ClusterCosts)  
   a.Router.GET("/clusterCostsFromCache", a.ClusterCostsFromCacheHandler)  
   a.Router.GET("/validatePrometheus", a.GetPrometheusMetadata)  
   a.Router.GET("/managementPlatform", a.ManagementPlatform)  
   a.Router.GET("/clusterInfo", a.ClusterInfo)  
   a.Router.GET("/clusterInfoMap", a.GetClusterInfoMap)  
   a.Router.GET("/serviceAccountStatus", a.GetServiceAccountStatus)  
   a.Router.GET("/pricingSourceStatus", a.GetPricingSourceStatus)  
   a.Router.GET("/pricingSourceCounts", a.GetPricingSourceCounts)  
  
   // endpoints migrated from server  
   a.Router.GET("/allPersistentVolumes", a.GetAllPersistentVolumes)  
   a.Router.GET("/allDeployments", a.GetAllDeployments)  
   a.Router.GET("/allStorageClasses", a.GetAllStorageClasses)  
   a.Router.GET("/allStatefulSets", a.GetAllStatefulSets)  
   a.Router.GET("/allNodes", a.GetAllNodes)  
   a.Router.GET("/allPods", a.GetAllPods)  
   a.Router.GET("/allNamespaces", a.GetAllNamespaces)  
   a.Router.GET("/allDaemonSets", a.GetAllDaemonSets)  
   a.Router.GET("/pod/:namespace/:name", a.GetPod)  
   a.Router.GET("/prometheusRecordingRules", a.PrometheusRecordingRules)  
   a.Router.GET("/prometheusConfig", a.PrometheusConfig)  
   a.Router.GET("/prometheusTargets", a.PrometheusTargets)  
   a.Router.GET("/orphanedPods", a.GetOrphanedPods)  
   a.Router.GET("/installNamespace", a.GetInstallNamespace)  
   a.Router.GET("/installInfo", a.GetInstallInfo)  
   a.Router.GET("/podLogs", a.GetPodLogs)  
   a.Router.POST("/serviceKey", a.AddServiceKey)  
   a.Router.GET("/helmValues", a.GetHelmValues)  
   a.Router.GET("/status", a.Status)  
  
   // prom query proxies  
   a.Router.GET("/prometheusQuery", a.PrometheusQuery)  
   a.Router.GET("/prometheusQueryRange", a.PrometheusQueryRange)  
   a.Router.GET("/thanosQuery", a.ThanosQuery)  
   a.Router.GET("/thanosQueryRange", a.ThanosQueryRange)  
  
   // diagnostics  
   a.Router.GET("/diagnostics/requestQueue", a.GetPrometheusQueueState)  
   a.Router.GET("/diagnostics/prometheusMetrics", a.GetPrometheusMetrics)  
  
   a.Router.GET("/logs/level", a.GetLogLevel)  
   a.Router.POST("/logs/level", a.SetLogLevel)  
  
   a.httpServices.RegisterAll(a.Router)  
  
   return a  
}

ComputeAllocationHandler

https://github.com/opencost/opencost/blob/develop/pkg/costmodel/aggregation.go#L2131

主要作用是解析 HTTP 参数,这里有一个核心数据结构 AllocationSet

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
// AllocationSetRange is a thread-safe slice of AllocationSets. It is meant to// be used such that the AllocationSets held are consecutive and coherent with  
// respect to using the same aggregation properties, UTC offset, and  
// resolution. However these rules are not necessarily enforced, so use wisely.  
type AllocationSetRange struct {  
   Allocations []*AllocationSet  
   FromStore   string // stores the name of the store used to retrieve the data  
}

// AllocationSet stores a set of Allocations, each with a unique name, that share  
// a window. An AllocationSet is mutable, so treat it like a threadsafe map.
type AllocationSet struct {  
   Allocations  map[string]*Allocation  
   ExternalKeys map[string]bool  
   IdleKeys     map[string]bool  
   FromSource   string // stores the name of the source used to compute the data  
   Window       Window  
   Warnings     []string  
   Errors       []string  
}

// Allocation is a unit of resource allocation and cost for a given window// of time and for a given kubernetes construct with its associated set of  
// properties.  
// TODO:CLEANUP consider dropping name in favor of just Allocation and an  
// Assets-style key() function for AllocationSet.  
type Allocation struct {  
   Name                       string                `json:"name"`  
   Properties                 *AllocationProperties `json:"properties,omitempty"`  
   Window                     Window                `json:"window"`  
   Start                      time.Time             `json:"start"`  
   End                        time.Time             `json:"end"`  
   CPUCoreHours               float64               `json:"cpuCoreHours"`  
   CPUCoreRequestAverage      float64               `json:"cpuCoreRequestAverage"`  
   CPUCoreUsageAverage        float64               `json:"cpuCoreUsageAverage"`  
   CPUCost                    float64               `json:"cpuCost"`  
   CPUCostAdjustment          float64               `json:"cpuCostAdjustment"`  
   GPUHours                   float64               `json:"gpuHours"`  
   GPUCost                    float64               `json:"gpuCost"`  
   GPUCostAdjustment          float64               `json:"gpuCostAdjustment"`  
   NetworkTransferBytes       float64               `json:"networkTransferBytes"`  
   NetworkReceiveBytes        float64               `json:"networkReceiveBytes"`  
   NetworkCost                float64               `json:"networkCost"`  
   NetworkCrossZoneCost       float64               `json:"networkCrossZoneCost"`   // @bingen:field[version=16]  
   NetworkCrossRegionCost     float64               `json:"networkCrossRegionCost"` // @bingen:field[version=16]  
   NetworkInternetCost        float64               `json:"networkInternetCost"`    // @bingen:field[version=16]  
   NetworkCostAdjustment      float64               `json:"networkCostAdjustment"`  
   LoadBalancerCost           float64               `json:"loadBalancerCost"`  
   LoadBalancerCostAdjustment float64               `json:"loadBalancerCostAdjustment"`  
   PVs                        PVAllocations         `json:"pvs"`  
   PVCostAdjustment           float64               `json:"pvCostAdjustment"`  
   RAMByteHours               float64               `json:"ramByteHours"`  
   RAMBytesRequestAverage     float64               `json:"ramByteRequestAverage"`  
   RAMBytesUsageAverage       float64               `json:"ramByteUsageAverage"`  
   RAMCost                    float64               `json:"ramCost"`  
   RAMCostAdjustment          float64               `json:"ramCostAdjustment"`  
   SharedCost                 float64               `json:"sharedCost"`  
   ExternalCost               float64               `json:"externalCost"`  
   // RawAllocationOnly is a pointer so if it is not present it will be   // marshalled as null rather than as an object with Go default values.   RawAllocationOnly *RawAllocationOnlyData `json:"rawAllocationOnly"`  
}

以 CPU 为例:

  • 对于每一个 Pod
    • Spec 中有对应的 CPU Request 和 Limit
    • 可以通过 Prometheus 计算出 Pod 实际使用了多少 CPU,以及对应的 CPU 时间
    • 如果有一个基本的 CPU Price
    • 我们就可以算出 CPU Cost 和对应的 CPU 使用率
  • 问题:
    • 不同机型的 CPU Price 是相同的吗
    • `idle 所耗费的成本是如何计算出来的
    • __unallocated__ 怎么计算出来的
1
2
3
4
5
CPUCoreHours               float64               `json:"cpuCoreHours"`  
CPUCoreRequestAverage      float64               `json:"cpuCoreRequestAverage"`  
CPUCoreUsageAverage        float64               `json:"cpuCoreUsageAverage"`  
CPUCost                    float64               `json:"cpuCost"`  
CPUCostAdjustment          float64               `json:"cpuCostAdjustment"`

关键计算代码:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
asr := kubecost.NewAllocationSetRange()  
stepStart := *window.Start()  
for window.End().After(stepStart) {  
   stepEnd := stepStart.Add(step)  
   stepWindow := kubecost.NewWindow(&stepStart, &stepEnd)  
  
   as, err := a.Model.ComputeAllocation(*stepWindow.Start(), *stepWindow.End(), resolution)  
   if err != nil {  
      WriteError(w, InternalServerError(err.Error()))  
      return  
   }  
   asr.Append(as)  
  
   stepStart = stepEnd  
}  
  
// Aggregate, if requested  
if len(aggregateBy) > 0 {  
   err = asr.AggregateBy(aggregateBy, nil)  
   if err != nil {  
      WriteError(w, InternalServerError(err.Error()))  
      return  
   }  
}

ComputeAllocation

实际计算函数

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
  
func (cm *CostModel) computeAllocation(start, end time.Time, resolution time.Duration) (*kubecost.AllocationSet, error) {  
   // 1. Build out Pod map from resolution-tuned, batched Pod start/end query  
   // 2. Run and apply the results of the remaining queries to   // 3. Build out AllocationSet from completed Pod map  
   // Create a window spanning the requested query   window := kubecost.NewWindow(&start, &end)  
  
   // Create an empty AllocationSet. For safety, in the case of an error, we  
   // should prefer to return this empty set with the error. (In the case of   // no error, of course we populate the set and return it.)   allocSet := kubecost.NewAllocationSet(start, end)  
  
   // (1) Build out Pod map  
  
   // Build out a map of Allocations as a mapping from pod-to-container-to-   // underlying-Allocation instance, starting with (start, end) so that we   // begin with minutes, from which we compute resource allocation and cost   // totals from measured rate data.   podMap := map[podKey]*pod{}  
  
   // clusterStarts and clusterEnds record the earliest start and latest end  
   // times, respectively, on a cluster-basis. These are used for unmounted   // PVs and other "virtual" Allocations so that minutes are maximally   // accurate during start-up or spin-down of a cluster   clusterStart := map[string]time.Time{}  
   clusterEnd := map[string]time.Time{}  
  
   // If ingesting pod UID, we query kube_pod_container_status_running avg  
   // by uid as well as the default values, and all podKeys/pods have their   // names changed to "<pod_name> <pod_uid>". Because other metrics need   // to generate keys to match pods but don't have UIDs, podUIDKeyMap   // stores values of format:  
   // default podKey : []{edited podkey 1, edited podkey 2}  
   // This is because ingesting UID allows us to catch uncontrolled pods   // with the same names. However, this will lead to a many-to-one metric   // to podKey relation, so this map allows us to map the metric's   // "<pod_name>" key to the edited "<pod_name> <pod_uid>" keys in podMap.   ingestPodUID := env.IsIngestingPodUID()  
   podUIDKeyMap := make(map[podKey][]podKey)  
  
   if ingestPodUID {  
      log.Debugf("CostModel.ComputeAllocation: ingesting UID data from KSM metrics...")  
   }  
  
   // TODO:CLEANUP remove "max batch" idea and clusterStart/End  
   err := cm.buildPodMap(window, resolution, env.GetETLMaxPrometheusQueryDuration(), podMap, clusterStart, clusterEnd, ingestPodUID, podUIDKeyMap)  
   if err != nil {  
      log.Errorf("CostModel.ComputeAllocation: failed to build pod map: %s", err.Error())  
   }  
   // (2) Run and apply remaining queries  
  
   // Query for the duration between start and end   durStr := timeutil.DurationString(end.Sub(start))  
   if durStr == "" {  
      return allocSet, fmt.Errorf("illegal duration value for %s", kubecost.NewClosedWindow(start, end))  
   }  
  
   // Convert resolution duration to a query-ready string  
   resStr := timeutil.DurationString(resolution)  
  
   ctx := prom.NewNamedContext(cm.PrometheusClient, prom.AllocationContextName)  
  
   queryRAMBytesAllocated := fmt.Sprintf(queryFmtRAMBytesAllocated, durStr, env.GetPromClusterLabel())  
   resChRAMBytesAllocated := ctx.QueryAtTime(queryRAMBytesAllocated, end)  
  
   queryRAMRequests := fmt.Sprintf(queryFmtRAMRequests, durStr, env.GetPromClusterLabel())  
   resChRAMRequests := ctx.QueryAtTime(queryRAMRequests, end)  
  
   queryRAMUsageAvg := fmt.Sprintf(queryFmtRAMUsageAvg, durStr, env.GetPromClusterLabel())  
   resChRAMUsageAvg := ctx.QueryAtTime(queryRAMUsageAvg, end)  
  
   queryRAMUsageMax := fmt.Sprintf(queryFmtRAMUsageMax, durStr, env.GetPromClusterLabel())  
   resChRAMUsageMax := ctx.QueryAtTime(queryRAMUsageMax, end)  
  
   queryCPUCoresAllocated := fmt.Sprintf(queryFmtCPUCoresAllocated, durStr, env.GetPromClusterLabel())  
   resChCPUCoresAllocated := ctx.QueryAtTime(queryCPUCoresAllocated, end)  
  
   queryCPURequests := fmt.Sprintf(queryFmtCPURequests, durStr, env.GetPromClusterLabel())  
   resChCPURequests := ctx.QueryAtTime(queryCPURequests, end)  
  
   queryCPUUsageAvg := fmt.Sprintf(queryFmtCPUUsageAvg, durStr, env.GetPromClusterLabel())  
   resChCPUUsageAvg := ctx.QueryAtTime(queryCPUUsageAvg, end)  
  
   queryCPUUsageMax := fmt.Sprintf(queryFmtCPUUsageMax, durStr, env.GetPromClusterLabel())  
   resChCPUUsageMax := ctx.QueryAtTime(queryCPUUsageMax, end)  
  
   queryGPUsRequested := fmt.Sprintf(queryFmtGPUsRequested, durStr, env.GetPromClusterLabel())  
   resChGPUsRequested := ctx.QueryAtTime(queryGPUsRequested, end)  
  
   queryGPUsAllocated := fmt.Sprintf(queryFmtGPUsAllocated, durStr, env.GetPromClusterLabel())  
   resChGPUsAllocated := ctx.QueryAtTime(queryGPUsAllocated, end)  
  
   queryNodeCostPerCPUHr := fmt.Sprintf(queryFmtNodeCostPerCPUHr, durStr, env.GetPromClusterLabel())  
   resChNodeCostPerCPUHr := ctx.QueryAtTime(queryNodeCostPerCPUHr, end)  
  
   queryNodeCostPerRAMGiBHr := fmt.Sprintf(queryFmtNodeCostPerRAMGiBHr, durStr, env.GetPromClusterLabel())  
   resChNodeCostPerRAMGiBHr := ctx.QueryAtTime(queryNodeCostPerRAMGiBHr, end)  
  
   queryNodeCostPerGPUHr := fmt.Sprintf(queryFmtNodeCostPerGPUHr, durStr, env.GetPromClusterLabel())  
   resChNodeCostPerGPUHr := ctx.QueryAtTime(queryNodeCostPerGPUHr, end)  
  
   queryNodeIsSpot := fmt.Sprintf(queryFmtNodeIsSpot, durStr)  
   resChNodeIsSpot := ctx.QueryAtTime(queryNodeIsSpot, end)  
  
   queryPVCInfo := fmt.Sprintf(queryFmtPVCInfo, env.GetPromClusterLabel(), durStr, resStr)  
   resChPVCInfo := ctx.QueryAtTime(queryPVCInfo, end)  
  
   queryPodPVCAllocation := fmt.Sprintf(queryFmtPodPVCAllocation, durStr, env.GetPromClusterLabel())  
   resChPodPVCAllocation := ctx.QueryAtTime(queryPodPVCAllocation, end)  
  
   queryPVCBytesRequested := fmt.Sprintf(queryFmtPVCBytesRequested, durStr, env.GetPromClusterLabel())  
   resChPVCBytesRequested := ctx.QueryAtTime(queryPVCBytesRequested, end)  
  
   queryPVActiveMins := fmt.Sprintf(queryFmtPVActiveMins, env.GetPromClusterLabel(), durStr, resStr)  
   resChPVActiveMins := ctx.QueryAtTime(queryPVActiveMins, end)  
  
   queryPVBytes := fmt.Sprintf(queryFmtPVBytes, durStr, env.GetPromClusterLabel())  
   resChPVBytes := ctx.QueryAtTime(queryPVBytes, end)  
  
   queryPVCostPerGiBHour := fmt.Sprintf(queryFmtPVCostPerGiBHour, durStr, env.GetPromClusterLabel())  
   resChPVCostPerGiBHour := ctx.QueryAtTime(queryPVCostPerGiBHour, end)  
  
   queryNetTransferBytes := fmt.Sprintf(queryFmtNetTransferBytes, durStr, env.GetPromClusterLabel())  
   resChNetTransferBytes := ctx.QueryAtTime(queryNetTransferBytes, end)  
  
   queryNetReceiveBytes := fmt.Sprintf(queryFmtNetReceiveBytes, durStr, env.GetPromClusterLabel())  
   resChNetReceiveBytes := ctx.QueryAtTime(queryNetReceiveBytes, end)  
  
   queryNetZoneGiB := fmt.Sprintf(queryFmtNetZoneGiB, durStr, env.GetPromClusterLabel())  
   resChNetZoneGiB := ctx.QueryAtTime(queryNetZoneGiB, end)  
  
   queryNetZoneCostPerGiB := fmt.Sprintf(queryFmtNetZoneCostPerGiB, durStr, env.GetPromClusterLabel())  
   resChNetZoneCostPerGiB := ctx.QueryAtTime(queryNetZoneCostPerGiB, end)  
  
   queryNetRegionGiB := fmt.Sprintf(queryFmtNetRegionGiB, durStr, env.GetPromClusterLabel())  
   resChNetRegionGiB := ctx.QueryAtTime(queryNetRegionGiB, end)  
  
   queryNetRegionCostPerGiB := fmt.Sprintf(queryFmtNetRegionCostPerGiB, durStr, env.GetPromClusterLabel())  
   resChNetRegionCostPerGiB := ctx.QueryAtTime(queryNetRegionCostPerGiB, end)  
  
   queryNetInternetGiB := fmt.Sprintf(queryFmtNetInternetGiB, durStr, env.GetPromClusterLabel())  
   resChNetInternetGiB := ctx.QueryAtTime(queryNetInternetGiB, end)  
  
   queryNetInternetCostPerGiB := fmt.Sprintf(queryFmtNetInternetCostPerGiB, durStr, env.GetPromClusterLabel())  
   resChNetInternetCostPerGiB := ctx.QueryAtTime(queryNetInternetCostPerGiB, end)  
  
   queryNamespaceLabels := fmt.Sprintf(queryFmtNamespaceLabels, durStr)  
   resChNamespaceLabels := ctx.QueryAtTime(queryNamespaceLabels, end)  
  
   queryNamespaceAnnotations := fmt.Sprintf(queryFmtNamespaceAnnotations, durStr)  
   resChNamespaceAnnotations := ctx.QueryAtTime(queryNamespaceAnnotations, end)  
  
   queryPodLabels := fmt.Sprintf(queryFmtPodLabels, durStr)  
   resChPodLabels := ctx.QueryAtTime(queryPodLabels, end)  
  
   queryPodAnnotations := fmt.Sprintf(queryFmtPodAnnotations, durStr)  
   resChPodAnnotations := ctx.QueryAtTime(queryPodAnnotations, end)  
  
   queryServiceLabels := fmt.Sprintf(queryFmtServiceLabels, durStr)  
   resChServiceLabels := ctx.QueryAtTime(queryServiceLabels, end)  
  
   queryDeploymentLabels := fmt.Sprintf(queryFmtDeploymentLabels, durStr)  
   resChDeploymentLabels := ctx.QueryAtTime(queryDeploymentLabels, end)  
  
   queryStatefulSetLabels := fmt.Sprintf(queryFmtStatefulSetLabels, durStr)  
   resChStatefulSetLabels := ctx.QueryAtTime(queryStatefulSetLabels, end)  
  
   queryDaemonSetLabels := fmt.Sprintf(queryFmtDaemonSetLabels, durStr, env.GetPromClusterLabel())  
   resChDaemonSetLabels := ctx.QueryAtTime(queryDaemonSetLabels, end)  
  
   queryPodsWithReplicaSetOwner := fmt.Sprintf(queryFmtPodsWithReplicaSetOwner, durStr, env.GetPromClusterLabel())  
   resChPodsWithReplicaSetOwner := ctx.QueryAtTime(queryPodsWithReplicaSetOwner, end)  
  
   queryReplicaSetsWithoutOwners := fmt.Sprintf(queryFmtReplicaSetsWithoutOwners, durStr, env.GetPromClusterLabel())  
   resChReplicaSetsWithoutOwners := ctx.QueryAtTime(queryReplicaSetsWithoutOwners, end)  
  
   queryJobLabels := fmt.Sprintf(queryFmtJobLabels, durStr, env.GetPromClusterLabel())  
   resChJobLabels := ctx.QueryAtTime(queryJobLabels, end)  
  
   queryLBCostPerHr := fmt.Sprintf(queryFmtLBCostPerHr, durStr, env.GetPromClusterLabel())  
   resChLBCostPerHr := ctx.QueryAtTime(queryLBCostPerHr, end)  
  
   queryLBActiveMins := fmt.Sprintf(queryFmtLBActiveMins, env.GetPromClusterLabel(), durStr, resStr)  
   resChLBActiveMins := ctx.QueryAtTime(queryLBActiveMins, end)  
  
   resCPUCoresAllocated, _ := resChCPUCoresAllocated.Await()  
   resCPURequests, _ := resChCPURequests.Await()  
   resCPUUsageAvg, _ := resChCPUUsageAvg.Await()  
   resCPUUsageMax, _ := resChCPUUsageMax.Await()  
   resRAMBytesAllocated, _ := resChRAMBytesAllocated.Await()  
   resRAMRequests, _ := resChRAMRequests.Await()  
   resRAMUsageAvg, _ := resChRAMUsageAvg.Await()  
   resRAMUsageMax, _ := resChRAMUsageMax.Await()  
   resGPUsRequested, _ := resChGPUsRequested.Await()  
   resGPUsAllocated, _ := resChGPUsAllocated.Await()  
  
   resNodeCostPerCPUHr, _ := resChNodeCostPerCPUHr.Await()  
   resNodeCostPerRAMGiBHr, _ := resChNodeCostPerRAMGiBHr.Await()  
   resNodeCostPerGPUHr, _ := resChNodeCostPerGPUHr.Await()  
   resNodeIsSpot, _ := resChNodeIsSpot.Await()  
  
   resPVActiveMins, _ := resChPVActiveMins.Await()  
   resPVBytes, _ := resChPVBytes.Await()  
   resPVCostPerGiBHour, _ := resChPVCostPerGiBHour.Await()  
  
   resPVCInfo, _ := resChPVCInfo.Await()  
   resPVCBytesRequested, _ := resChPVCBytesRequested.Await()  
   resPodPVCAllocation, _ := resChPodPVCAllocation.Await()  
  
   resNetTransferBytes, _ := resChNetTransferBytes.Await()  
   resNetReceiveBytes, _ := resChNetReceiveBytes.Await()  
   resNetZoneGiB, _ := resChNetZoneGiB.Await()  
   resNetZoneCostPerGiB, _ := resChNetZoneCostPerGiB.Await()  
   resNetRegionGiB, _ := resChNetRegionGiB.Await()  
   resNetRegionCostPerGiB, _ := resChNetRegionCostPerGiB.Await()  
   resNetInternetGiB, _ := resChNetInternetGiB.Await()  
   resNetInternetCostPerGiB, _ := resChNetInternetCostPerGiB.Await()  
  
   resNamespaceLabels, _ := resChNamespaceLabels.Await()  
   resNamespaceAnnotations, _ := resChNamespaceAnnotations.Await()  
   resPodLabels, _ := resChPodLabels.Await()  
   resPodAnnotations, _ := resChPodAnnotations.Await()  
   resServiceLabels, _ := resChServiceLabels.Await()  
   resDeploymentLabels, _ := resChDeploymentLabels.Await()  
   resStatefulSetLabels, _ := resChStatefulSetLabels.Await()  
   resDaemonSetLabels, _ := resChDaemonSetLabels.Await()  
   resPodsWithReplicaSetOwner, _ := resChPodsWithReplicaSetOwner.Await()  
   resReplicaSetsWithoutOwners, _ := resChReplicaSetsWithoutOwners.Await()  
   resJobLabels, _ := resChJobLabels.Await()  
   resLBCostPerHr, _ := resChLBCostPerHr.Await()  
   resLBActiveMins, _ := resChLBActiveMins.Await()  
  
   if ctx.HasErrors() {  
      for _, err := range ctx.Errors() {  
         log.Errorf("CostModel.ComputeAllocation: query context error %s", err)  
      }  
  
      return allocSet, ctx.ErrorCollection()  
   }  
  
   // We choose to apply allocation before requests in the cases of RAM and  
   // CPU so that we can assert that allocation should always be greater than   // or equal to request.   applyCPUCoresAllocated(podMap, resCPUCoresAllocated, podUIDKeyMap)  
   applyCPUCoresRequested(podMap, resCPURequests, podUIDKeyMap)  
   applyCPUCoresUsedAvg(podMap, resCPUUsageAvg, podUIDKeyMap)  
   applyCPUCoresUsedMax(podMap, resCPUUsageMax, podUIDKeyMap)  
   applyRAMBytesAllocated(podMap, resRAMBytesAllocated, podUIDKeyMap)  
   applyRAMBytesRequested(podMap, resRAMRequests, podUIDKeyMap)  
   applyRAMBytesUsedAvg(podMap, resRAMUsageAvg, podUIDKeyMap)  
   applyRAMBytesUsedMax(podMap, resRAMUsageMax, podUIDKeyMap)  
   applyGPUsAllocated(podMap, resGPUsRequested, resGPUsAllocated, podUIDKeyMap)  
   applyNetworkTotals(podMap, resNetTransferBytes, resNetReceiveBytes, podUIDKeyMap)  
   applyNetworkAllocation(podMap, resNetZoneGiB, resNetZoneCostPerGiB, podUIDKeyMap, networkCrossZoneCost)  
   applyNetworkAllocation(podMap, resNetRegionGiB, resNetRegionCostPerGiB, podUIDKeyMap, networkCrossRegionCost)  
   applyNetworkAllocation(podMap, resNetInternetGiB, resNetInternetCostPerGiB, podUIDKeyMap, networkInternetCost)  
  
   // In the case that a two pods with the same name had different containers,  
   // we will double-count the containers. There is no way to associate each   // container with the proper pod from the usage metrics above. This will   // show up as a pod having two Allocations running for the whole pod runtime.  
   // Other than that case, Allocations should be associated with pods by the   // above functions.  
   namespaceLabels := resToNamespaceLabels(resNamespaceLabels)  
   podLabels := resToPodLabels(resPodLabels, podUIDKeyMap, ingestPodUID)  
   namespaceAnnotations := resToNamespaceAnnotations(resNamespaceAnnotations)  
   podAnnotations := resToPodAnnotations(resPodAnnotations, podUIDKeyMap, ingestPodUID)  
   applyLabels(podMap, namespaceLabels, podLabels)  
   applyAnnotations(podMap, namespaceAnnotations, podAnnotations)  
  
   podDeploymentMap := labelsToPodControllerMap(podLabels, resToDeploymentLabels(resDeploymentLabels))  
   podStatefulSetMap := labelsToPodControllerMap(podLabels, resToStatefulSetLabels(resStatefulSetLabels))  
   podDaemonSetMap := resToPodDaemonSetMap(resDaemonSetLabels, podUIDKeyMap, ingestPodUID)  
   podJobMap := resToPodJobMap(resJobLabels, podUIDKeyMap, ingestPodUID)  
   podReplicaSetMap := resToPodReplicaSetMap(resPodsWithReplicaSetOwner, resReplicaSetsWithoutOwners, podUIDKeyMap, ingestPodUID)  
   applyControllersToPods(podMap, podDeploymentMap)  
   applyControllersToPods(podMap, podStatefulSetMap)  
   applyControllersToPods(podMap, podDaemonSetMap)  
   applyControllersToPods(podMap, podJobMap)  
   applyControllersToPods(podMap, podReplicaSetMap)  
  
   serviceLabels := getServiceLabels(resServiceLabels)  
   allocsByService := map[serviceKey][]*kubecost.Allocation{}  
   applyServicesToPods(podMap, podLabels, allocsByService, serviceLabels)  
  
   // TODO breakdown network costs?  
  
   // Build out the map of all PVs with class, size and cost-per-hour.  
   // Note: this does not record time running, which we may want to   // include later for increased PV precision. (As long as the PV has   // a PVC, we get time running there, so this is only inaccurate   // for short-lived, unmounted PVs.)   pvMap := map[pvKey]*pv{}  
   buildPVMap(resolution, pvMap, resPVCostPerGiBHour, resPVActiveMins)  
   applyPVBytes(pvMap, resPVBytes)  
  
   // Build out the map of all PVCs with time running, bytes requested,  
   // and connect to the correct PV from pvMap. (If no PV exists, that   // is noted, but does not result in any allocation/cost.)   pvcMap := map[pvcKey]*pvc{}  
   buildPVCMap(resolution, pvcMap, pvMap, resPVCInfo)  
   applyPVCBytesRequested(pvcMap, resPVCBytesRequested)  
  
   // Build out the relationships of pods to their PVCs. This step  
   // populates the pvc.Count field so that pvc allocation can be   // split appropriately among each pod's container allocation.   podPVCMap := map[podKey][]*pvc{}  
   buildPodPVCMap(podPVCMap, pvMap, pvcMap, podMap, resPodPVCAllocation, podUIDKeyMap, ingestPodUID)  
   applyPVCsToPods(window, podMap, podPVCMap, pvcMap)  
  
   // Identify PVCs without pods and add pv costs to the unmounted Allocation for the pvc's cluster  
   applyUnmountedPVCs(window, podMap, pvcMap)  
  
   // Identify PVs without PVCs and add PV costs to the unmounted Allocation for the PV's cluster  
   applyUnmountedPVs(window, podMap, pvMap, pvcMap)  
  
   lbMap := make(map[serviceKey]*lbCost)  
   getLoadBalancerCosts(lbMap, resLBCostPerHr, resLBActiveMins, resolution)  
   applyLoadBalancersToPods(window, podMap, lbMap, allocsByService)  
  
   // Build out a map of Nodes with resource costs, discounts, and node types  
   // for converting resource allocation data to cumulative costs.   nodeMap := map[nodeKey]*nodePricing{}  
  
   applyNodeCostPerCPUHr(nodeMap, resNodeCostPerCPUHr)  
   applyNodeCostPerRAMGiBHr(nodeMap, resNodeCostPerRAMGiBHr)  
   applyNodeCostPerGPUHr(nodeMap, resNodeCostPerGPUHr)  
   applyNodeSpot(nodeMap, resNodeIsSpot)  
   applyNodeDiscount(nodeMap, cm)  
   cm.applyNodesToPod(podMap, nodeMap)  
  
   // (3) Build out AllocationSet from Pod map  
   for _, pod := range podMap {  
      for _, alloc := range pod.Allocations {  
         cluster := alloc.Properties.Cluster  
         nodeName := alloc.Properties.Node  
         namespace := alloc.Properties.Namespace  
         podName := alloc.Properties.Pod  
         container := alloc.Properties.Container  
  
         // Make sure that the name is correct (node may not be present at this  
         // point due to it missing from queryMinutes) then insert.         alloc.Name = fmt.Sprintf("%s/%s/%s/%s/%s", cluster, nodeName, namespace, podName, container)  
         allocSet.Set(alloc)  
      }  
   }  
  
   return allocSet, nil  
}

同样以 CPU 为例:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
queryCPUCoresAllocated := fmt.Sprintf(queryFmtCPUCoresAllocated, durStr, env.GetPromClusterLabel())  
resChCPUCoresAllocated := ctx.QueryAtTime(queryCPUCoresAllocated, end)  
  
queryCPURequests := fmt.Sprintf(queryFmtCPURequests, durStr, env.GetPromClusterLabel())  
resChCPURequests := ctx.QueryAtTime(queryCPURequests, end)  
  
queryCPUUsageAvg := fmt.Sprintf(queryFmtCPUUsageAvg, durStr, env.GetPromClusterLabel())  
resChCPUUsageAvg := ctx.QueryAtTime(queryCPUUsageAvg, end)  
  
queryCPUUsageMax := fmt.Sprintf(queryFmtCPUUsageMax, durStr, env.GetPromClusterLabel())  
resChCPUUsageMax := ctx.QueryAtTime(queryCPUUsageMax, end)

关键指标:

  • queryCPUCoresAllocated: container_cpu_allocation
  • queryCPURequests: kube_pod_container_resource_requests
  • queryCPUUsageAvg: avg(container_cpu_usage_seconds_total)
  • queryCPUUsageMax: max(container_cpu_usage_seconds_total)

这些都是可以从 Prometheus 查询到的。

还有一些不常见的指标,比如 LB 相关,这个是怎么来的呢?

1
2
3
4
5
6
7
8
queryFmtLBCostPerHr              = `avg(avg_over_time(kubecost_load_balancer_cost[%s])) by (namespace, service_name, %s)`  
queryFmtLBActiveMins             = `count(kubecost_load_balancer_cost) by (namespace, service_name, %s)[%s:%s]`

queryLBCostPerHr := fmt.Sprintf(queryFmtLBCostPerHr, durStr, env.GetPromClusterLabel())  
resChLBCostPerHr := ctx.QueryAtTime(queryLBCostPerHr, end)  
  
queryLBActiveMins := fmt.Sprintf(queryFmtLBActiveMins, env.GetPromClusterLabel(), durStr, resStr)  
resChLBActiveMins := ctx.QueryAtTime(queryLBActiveMins, end)

从 Prometheus 查询到这些 metrics 之后,需要通过计算将其转换为我们的 Allocation 数据结构

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
func applyCPUCoresRequested(podMap map[podKey]*pod, resCPUCoresRequested []*prom.QueryResult, podUIDKeyMap map[podKey][]podKey) {  
   for _, res := range resCPUCoresRequested {  
      key, err := resultPodKey(res, env.GetPromClusterLabel(), "namespace")  
      if err != nil {  
         log.DedupedWarningf(10, "CostModel.ComputeAllocation: CPU request result missing field: %s", err)  
         continue  
      }  
  
      container, err := res.GetString("container")  
      if err != nil {  
         log.DedupedWarningf(10, "CostModel.ComputeAllocation: CPU request query result missing 'container': %s", key)  
         continue  
      }  
  
      var pods []*pod  
      if thisPod, ok := podMap[key]; !ok {  
         if uidKeys, ok := podUIDKeyMap[key]; ok {  
            for _, uidKey := range uidKeys {  
               thisPod, ok = podMap[uidKey]  
               if ok {  
                  pods = append(pods, thisPod)  
               }  
            }  
         } else {  
            continue  
         }  
      } else {  
         pods = []*pod{thisPod}  
      }  
  
      for _, thisPod := range pods {  
  
         if _, ok := thisPod.Allocations[container]; !ok {  
            thisPod.appendContainer(container)  
         }  
  
         thisPod.Allocations[container].CPUCoreRequestAverage = res.Values[0].Value  
  
         // If CPU allocation is less than requests, set CPUCoreHours to  
         // request level.         if thisPod.Allocations[container].CPUCores() < res.Values[0].Value {  
            thisPod.Allocations[container].CPUCoreHours = res.Values[0].Value * (thisPod.Allocations[container].Minutes() / 60.0)  
         }  
         if thisPod.Allocations[container].CPUCores() > MAX_CPU_CAP {  
            log.Infof("[WARNING] Very large cpu allocation, clamping! to %f", res.Values[0].Value*(thisPod.Allocations[container].Minutes()/60.0))  
            thisPod.Allocations[container].CPUCoreHours = res.Values[0].Value * (thisPod.Allocations[container].Minutes() / 60.0)  
         }  
         node, err := res.GetString("node")  
         if err != nil {  
            log.Warnf("CostModel.ComputeAllocation: CPU request query result missing 'node': %s", key)  
            continue  
         }  
         thisPod.Allocations[container].Properties.Node = node  
      }  
   }  
}

具体成本计算

对于每个 Node,都有其独特的价格,这个也是从 Prometheus 查到的

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
queryFmtNodeCostPerCPUHr         = `avg(avg_over_time(node_cpu_hourly_cost[%s])) by (node, %s, instance_type, provider_id)`  
queryFmtNodeCostPerRAMGiBHr      = `avg(avg_over_time(node_ram_hourly_cost[%s])) by (node, %s, instance_type, provider_id)`  
queryFmtNodeCostPerGPUHr         = `avg(avg_over_time(node_gpu_hourly_cost[%s])) by (node, %s, instance_type, provider_id)`  
queryFmtNodeIsSpot               = `avg_over_time(kubecost_node_is_spot[%s])`


queryNodeCostPerCPUHr := fmt.Sprintf(queryFmtNodeCostPerCPUHr, durStr, env.GetPromClusterLabel())  
resChNodeCostPerCPUHr := ctx.QueryAtTime(queryNodeCostPerCPUHr, end)  
  
queryNodeCostPerRAMGiBHr := fmt.Sprintf(queryFmtNodeCostPerRAMGiBHr, durStr, env.GetPromClusterLabel())  
resChNodeCostPerRAMGiBHr := ctx.QueryAtTime(queryNodeCostPerRAMGiBHr, end)  
  
queryNodeCostPerGPUHr := fmt.Sprintf(queryFmtNodeCostPerGPUHr, durStr, env.GetPromClusterLabel())  
resChNodeCostPerGPUHr := ctx.QueryAtTime(queryNodeCostPerGPUHr, end)

那么是谁写的呢?OpenCost 会暴露出来对应的 Metrics

https://github.com/opencost/opencost/blob/develop/pkg/costmodel/metrics.go#L139

并且在这里暴露出来: https://github.com/opencost/opencost/blob/develop/pkg/costmodel/metrics.go#L539

具体的计算 CPU/RAM/GPU 等成本的方法:

1
nodes, err := cmme.Model.GetNodeCost(cmme.CloudProvider)
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
func (cm *CostModel) applyNodesToPod(podMap map[podKey]*pod, nodeMap map[nodeKey]*nodePricing) {  
   for _, pod := range podMap {  
      for _, alloc := range pod.Allocations {  
         cluster := alloc.Properties.Cluster  
         nodeName := alloc.Properties.Node  
         thisNodeKey := newNodeKey(cluster, nodeName)  
  
         node := cm.getNodePricing(nodeMap, thisNodeKey)  
         alloc.Properties.ProviderID = node.ProviderID  
         alloc.CPUCost = alloc.CPUCoreHours * node.CostPerCPUHr  
         alloc.RAMCost = (alloc.RAMByteHours / 1024 / 1024 / 1024) * node.CostPerRAMGiBHr  
         alloc.GPUCost = alloc.GPUHours * node.CostPerGPUHr  
      }  
   }  
}
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
// NodePricing describes the resource costs associated with a given node, as  
// well as the source of the information (e.g. prometheus, custom)  
type nodePricing struct {  
   Name            string  
   NodeType        string  
   ProviderID      string  
   Preemptible     bool  
   CostPerCPUHr    float64  
   CostPerRAMGiBHr float64  
   CostPerGPUHr    float64  
   Discount        float64  
   Source          string  
}

这个可以通过 AWS Pricing API 计算得到 https://pilotcoresystems.com/insights/how-to-use-aws-price-list-api-examples/ 对应于 OpenCost 的 Code 就是 https://github.com/opencost/opencost/blob/develop/pkg/cloud/awsprovider.go#L762

入口是: https://github.com/opencost/opencost/blob/develop/pkg/cloud/awsprovider.go#L762

基本思路是,可以通过 k8s 获取 Node 相关的 label 信息,包括 region/machine type/CPU count 等,然后通过 AWS Pricing API 下载价格并解析出来。

1
2
3
4
// DownloadPricingData fetches data from the AWS Pricing API
func (aws *AWS) DownloadPricingData() error {

}

AggregateBy

OpenCost 会在 AggregateBy 讲 ComputeAllocation 中计算出来的 Allocation 进行计算,包括 Idle、Unallocated 和各种 label 维度进行聚合

https://github.com/opencost/opencost/blob/develop/pkg/kubecost/allocation.go#L865

Provider

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
// Provider represents a k8s provider.type Provider interface {  
   ClusterInfo() (map[string]string, error)  
   GetAddresses() ([]byte, error)  
   GetDisks() ([]byte, error)  
   NodePricing(Key) (*Node, error)  
   PVPricing(PVKey) (*PV, error)  
   NetworkPricing() (*Network, error)           // TODO: add key interface arg for dynamic price fetching  
   LoadBalancerPricing() (*LoadBalancer, error) // TODO: add key interface arg for dynamic price fetching  
   AllNodePricing() (interface{}, error)  
   DownloadPricingData() error  
   GetKey(map[string]string, *v1.Node) Key  
   GetPVKey(*v1.PersistentVolume, map[string]string, string) PVKey  
   UpdateConfig(r io.Reader, updateType string) (*CustomPricing, error)  
   UpdateConfigFromConfigMap(map[string]string) (*CustomPricing, error)  
   GetConfig() (*CustomPricing, error)  
   GetManagementPlatform() (string, error)  
   GetLocalStorageQuery(time.Duration, time.Duration, bool, bool) string  
   ApplyReservedInstancePricing(map[string]*Node)  
   ServiceAccountStatus() *ServiceAccountStatus  
   PricingSourceStatus() map[string]*PricingSource  
   ClusterManagementPricing() (string, float64, error)  
   CombinedDiscountForNode(string, bool, float64, float64) float64  
   Regions() []string  
}

NodePricing

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
// NodePricing takes in a key from GetKey and returns a Node object for use in building the cost model.func (aws *AWS) NodePricing(k Key) (*Node, error) {  
   aws.DownloadPricingDataLock.RLock()  
   defer aws.DownloadPricingDataLock.RUnlock()  
  
   key := k.Features()  
   usageType := "ondemand"  
   if aws.isPreemptible(key) {  
      usageType = PreemptibleType  
   }  
  
   terms, ok := aws.Pricing[key]  
   if ok {  
      return aws.createNode(terms, usageType, k)  
   } else if _, ok := aws.ValidPricingKeys[key]; ok {  
      aws.DownloadPricingDataLock.RUnlock()  
      err := aws.DownloadPricingData()  
      aws.DownloadPricingDataLock.RLock()  
      if err != nil {  
         return &Node{  
            Cost:             aws.BaseCPUPrice,  
            BaseCPUPrice:     aws.BaseCPUPrice,  
            BaseRAMPrice:     aws.BaseRAMPrice,  
            BaseGPUPrice:     aws.BaseGPUPrice,  
            UsageType:        usageType,  
            UsesBaseCPUPrice: true,  
         }, err  
      }  
      terms, termsOk := aws.Pricing[key]  
      if !termsOk {  
         return &Node{  
            Cost:             aws.BaseCPUPrice,  
            BaseCPUPrice:     aws.BaseCPUPrice,  
            BaseRAMPrice:     aws.BaseRAMPrice,  
            BaseGPUPrice:     aws.BaseGPUPrice,  
            UsageType:        usageType,  
            UsesBaseCPUPrice: true,  
         }, fmt.Errorf("Unable to find any Pricing data for \"%s\"", key)  
      }  
      return aws.createNode(terms, usageType, k)  
   } else { // Fall back to base pricing if we can't find the key. Base pricing is handled at the costmodel level.  
      return nil, fmt.Errorf("Invalid Pricing Key \"%s\"", key)  
  
   }  
}

PVPricing

NetworkPricing

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
// Stubbed NetworkPricing for AWS. Pull directly from aws.json for now  
func (aws *AWS) NetworkPricing() (*Network, error) {  
   cpricing, err := aws.Config.GetCustomPricingData()  
   if err != nil {  
      return nil, err  
   }  
   znec, err := strconv.ParseFloat(cpricing.ZoneNetworkEgress, 64)  
   if err != nil {  
      return nil, err  
   }  
   rnec, err := strconv.ParseFloat(cpricing.RegionNetworkEgress, 64)  
   if err != nil {  
      return nil, err  
   }  
   inec, err := strconv.ParseFloat(cpricing.InternetNetworkEgress, 64)  
   if err != nil {  
      return nil, err  
   }  
  
   return &Network{  
      ZoneNetworkEgressCost:     znec,  
      RegionNetworkEgressCost:   rnec,  
      InternetNetworkEgressCost: inec,  
   }, nil  
}

LoadBalancerPricing

总结