Skip to content

Commit 658bee6

Browse files
authoredMar 13, 2025··
Add query to calculate cost of PV change with CloudCost exporter metrics (#35)
This ones a bit more of a challenge then CPU/Memory, due to three problems: 1. Cloudcost exporter does not emit metrics for persistent volumes for Azure(grafana/cloudcost-exporter#236) 2. AWS ebs cost metrics does not have a cluster label(grafana/cloudcost-exporter#450) 3. persisent volumes in GKE and EKS emit the total hourly cost of the volume, _not_ the hourly cost per GiB which we used previously to figure out the change in cost I utilized Prometheus or ooperator(https://prometheus.io/docs/prometheus/latest/querying/operators/#logical-set-binary-operators) to overcome not having Azure pv costs. Effectively the query will attempt to find the average cost of pvs for 1. eks volumes via CloudCost Exporter 2. gke volumes via CloudCost Exporter 3. azure volumes via OpenCost This works because we're only querying one cluster at a time _by name_, and we rely upon the fact that cluster names are unique within Grafana Labs infrastructure. The missing cluster label for eks cost metrics and persistent volumes not having cluster labels can be overcome by utilizing `kube_persistentvolume_capacity_bytes` metrics emitted by kube-state-metrics. This was tested by looking at an EKS cluster like so: ```shell go run ./cmd/estimator/ \ -use.cloud.cost.exporter.metrics=true -from $PWD/pkg/costmodel/testdata/resource/StatefulSet.json \ -to $PWD/pkg/costmodel/testdata/resource/StatefulSet-more-storage.json \ -http.config.file ~/.config/dev.yaml \ -prometheus.address $PROMETHEUS_ADDRESS \ dev-us-east-0 ``` - relates to #29
1 parent 4b5b827 commit 658bee6

File tree

2 files changed

+176
-1
lines changed

2 files changed

+176
-1
lines changed
 

‎pkg/costmodel/client.go

+29-1
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,31 @@ avg by (spot) (node_ram_hourly_cost{cluster="%s"}
6767
cloudcost_gcp_gke_instance_memory_usd_per_gib_hour{cluster_name="%s"}
6868
)
6969
`
70-
queryPersistentVolumeCost = "avg_over_time(avg(pv_hourly_cost{cluster=\"%s\"})[24h:1m])"
70+
queryPersistentVolumeCost = `
71+
avg_over_time(
72+
avg(
73+
pv_hourly_cost{cluster="%s"}
74+
)[24h:1m]
75+
)`
76+
cloudcostQueryPersistentVolumeCost = `
77+
avg(
78+
cloudcost_aws_ec2_persistent_volume_usd_per_hour{persistentvolume!="", state="in-use"}
79+
/ on (persistentvolume) group_left() (
80+
kube_persistentvolume_capacity_bytes{cluster="%s"} / 1e9
81+
)
82+
)
83+
or
84+
avg(
85+
cloudcost_gcp_gke_persistent_volume_usd_per_hour{persistentvolume!="", use_status="in-use", cluster_name="%s"}
86+
/ on (persistentvolume) group_left() (
87+
kube_persistentvolume_capacity_bytes{cluster="%s"} / 1e9
88+
)
89+
)
90+
or
91+
avg(
92+
pv_hourly_cost{cluster="%s"}
93+
)
94+
`
7195

7296
queryAverageNodeCount = `
7397
avg_over_time(
@@ -212,6 +236,10 @@ func (c *Client) GetNodeCount(ctx context.Context, cluster string) (int, error)
212236
// GetCostForPersistentVolume returns the average cost per persistent volume for a given cluster
213237
func (c *Client) GetCostForPersistentVolume(ctx context.Context, cluster string) (Cost, error) {
214238
query := fmt.Sprintf(queryPersistentVolumeCost, cluster)
239+
if c.useCloudCostExporterMetrics {
240+
slog.Info("GetCostForPersistentVolume", "cluster", cluster, "message", "using cloudcost exporter metrics")
241+
query = fmt.Sprintf(cloudcostQueryPersistentVolumeCost, cluster, cluster, cluster, cluster)
242+
}
215243
results, err := c.query(ctx, query)
216244
if err != nil {
217245
return Cost{}, err
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,147 @@
1+
{
2+
"apiVersion": "apps/v1",
3+
"kind": "StatefulSet",
4+
"metadata": {
5+
"creationTimestamp": "2022-10-05T20:38:31Z",
6+
"generation": 17,
7+
"labels": {
8+
"kustomize.toolkit.fluxcd.io/name": "kube-manifests-opencost",
9+
"kustomize.toolkit.fluxcd.io/namespace": "opencost",
10+
"tanka.dev/environment": "85ead74422d749cb54711e74c81bc5d6ed6da54e92b5fa69"
11+
},
12+
"name": "opencost",
13+
"namespace": "opencost",
14+
"resourceVersion": "2386985939",
15+
"uid": "56495ef8-2650-46e8-9528-28759cf47151"
16+
},
17+
"spec": {
18+
"podManagementPolicy": "OrderedReady",
19+
"replicas": 1,
20+
"revisionHistoryLimit": 10,
21+
"selector": {
22+
"matchLabels": {
23+
"name": "opencost"
24+
}
25+
},
26+
"serviceName": "opencost",
27+
"template": {
28+
"metadata": {
29+
"creationTimestamp": null,
30+
"labels": {
31+
"name": "opencost"
32+
}
33+
},
34+
"spec": {
35+
"affinity": {
36+
"nodeAffinity": {
37+
"preferredDuringSchedulingIgnoredDuringExecution": [
38+
{
39+
"preference": {
40+
"matchExpressions": [
41+
{
42+
"key": "cloud.google.com/gke-spot",
43+
"operator": "In",
44+
"values": [
45+
"true"
46+
]
47+
}
48+
]
49+
},
50+
"weight": 100
51+
}
52+
]
53+
}
54+
},
55+
"containers": [
56+
{
57+
"env": [
58+
],
59+
"image": "quay.io/kubecost1/kubecost-cost-model:prod-1.100.0",
60+
"imagePullPolicy": "IfNotPresent",
61+
"name": "opencost",
62+
"ports": [
63+
{
64+
"containerPort": 9003,
65+
"name": "http-metrics",
66+
"protocol": "TCP"
67+
}
68+
],
69+
"resources": {
70+
"limits": {
71+
"cpu": "4",
72+
"memory": "8Gi"
73+
},
74+
"requests": {
75+
"cpu": "1",
76+
"memory": "4Gi"
77+
}
78+
},
79+
"terminationMessagePath": "/dev/termination-log",
80+
"terminationMessagePolicy": "File",
81+
"volumeMounts": [
82+
{
83+
"mountPath": "/var/configs",
84+
"name": "opencost-data"
85+
}
86+
]
87+
}
88+
],
89+
"dnsPolicy": "ClusterFirst",
90+
"restartPolicy": "Always",
91+
"schedulerName": "default-scheduler",
92+
"securityContext": {
93+
"fsGroup": 10001
94+
},
95+
"serviceAccount": "opencost",
96+
"serviceAccountName": "opencost",
97+
"terminationGracePeriodSeconds": 30,
98+
"tolerations": [
99+
{
100+
"effect": "NoSchedule",
101+
"key": "type",
102+
"operator": "Equal",
103+
"value": "spot-node"
104+
}
105+
]
106+
}
107+
},
108+
"updateStrategy": {
109+
"type": "RollingUpdate"
110+
},
111+
"volumeClaimTemplates": [
112+
{
113+
"apiVersion": "v1",
114+
"kind": "PersistentVolumeClaim",
115+
"metadata": {
116+
"creationTimestamp": null,
117+
"name": "opencost-data"
118+
},
119+
"spec": {
120+
"accessModes": [
121+
"ReadWriteOnce"
122+
],
123+
"resources": {
124+
"requests": {
125+
"storage": "320Gi"
126+
}
127+
},
128+
"volumeMode": "Filesystem"
129+
},
130+
"status": {
131+
"phase": "Pending"
132+
}
133+
}
134+
]
135+
},
136+
"status": {
137+
"availableReplicas": 1,
138+
"collisionCount": 0,
139+
"currentReplicas": 1,
140+
"currentRevision": "opencost-6666f8bdb7",
141+
"observedGeneration": 17,
142+
"readyReplicas": 1,
143+
"replicas": 1,
144+
"updateRevision": "opencost-6666f8bdb7",
145+
"updatedReplicas": 1
146+
}
147+
}

0 commit comments

Comments
 (0)
Please sign in to comment.