Skip to content
Open
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ WIP TESTS
| [ ] | Ground Truth: storage byte hours|kube_persistentvolume_capacity_bytes| /allocations | For each aggregate, query prometheus to get the PVC capacity bytes. in the corresponding allocation ensure bytes match | Low |
| [ ] | Ground Truth: PV costs | pv_hourly_cost | /allocations | Query cloud provider pricing to get expected costs for a given PV. ensure that the prom results match expected costs. ensure allocations API call returns PVs that match the costs. fail the tests if no PVs are in the returned allocations data | Medium |
| [ ] | Ground Truth: PV info | kubecost_pv_info | /allocation | For each aggregate, query prometheus to get the PV info for each PV. Then, query allocations and confirm each PV is present in every aggregation and that the info matches. | Low |
| [ ] | Ground Truth: Network Bytes | container_network_receive_bytes_total/container_network_transmit_bytes_total | /allocation | For each aggregate, query prometheus to get thenetwork transmit/receive byte for every container. THen, query allocations and confirm each allocation matches the pod entry, and that for other aggregations, that the sums match. | Low |
| | Ground Truth: Network Bytes | container_network_receive_bytes_total/container_network_transmit_bytes_total | /allocation | For each aggregate, query prometheus to get thenetwork transmit/receive byte for every container. THen, query allocations and confirm each allocation matches the pod entry, and that for other aggregations, that the sums match. | Low |
| [ ] | Ground Truth: Node Labels | kube_node_labels | /asset | For each aggregate, query prometheus to get the labels for each node. then, query the assets API and confirm the labels match for all the different aggregations | Low |
| [ ] | Ground Truth: Node annotations | kube_node_annotations | /asset | For each aggregate, query prometheus to get the annotations for each node. then, query the assets API and confirm the annotations match for all the different aggregations | Low |
| [ ] | Ground Truth: Labels | kube_pod_labels/kube_namespace_labels | /allocation | For each aggregate, query prometheus to get the labels for each pod and namespace. then, query the allocation API and for each result, confirm the labels in each aggregate contain the expected labels | Medium |
Expand Down
1 change: 1 addition & 0 deletions pkg/prometheus/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ type PrometheusResponse struct {
Result []struct {
Metric struct {
Pod string `json:"pod"`
PodName string `json:"pod_name"`
Namespace string `json:"namespace"`
Container string `json:"container"`
} `json:"metric"`
Expand Down
2 changes: 1 addition & 1 deletion test/integration/prometheus/client_test.go
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package main
package prometheus

import (
"fmt"
Expand Down
195 changes: 195 additions & 0 deletions test/integration/prometheus/network_costs_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,195 @@
package prometheus

// Description - Compares Network Costs from Prometheus and Allocation

import (
// "fmt"
"github.com/opencost/opencost-integration-tests/pkg/api"
"github.com/opencost/opencost-integration-tests/pkg/prometheus"
"github.com/opencost/opencost-integration-tests/pkg/utils"
"slices"
"testing"
"time"
)

const tolerance = 0.05

func TestNetworkCosts(t *testing.T) {
apiObj := api.NewAPI()

testCases := []struct {
name string
window string
aggregate string
accumulate string
}{
{
name: "Yesterday",
window: "24h",
aggregate: "namespace",
accumulate: "false",
},
}

t.Logf("testCases: %v", testCases)

for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {

// Any data that is in a "raw allocation only" is not valid in any
// sort of cumulative Allocation (like one that is added).

type NetworkCostsAggregate struct {
PromNetworkTransferBytes float64
PromNetworkReceiveBytes float64
Pods []string
AllocNetworkTransferBytes float64
AllocNetworkReceiveBytes float64
}
networkCostsNamespaceMap := make(map[string]*NetworkCostsAggregate)

queryEnd := time.Now().UTC().Truncate(time.Hour).Add(time.Hour)
endTime := queryEnd.Unix()
// Collect Namespace results from Prometheus
client := prometheus.NewClient()

////////////////////////////////////////////////////////////////////////////
// Network Receive Bytes

// sum(increase(container_network_receive_bytes_total{pod!=""}[24h:5m])) by (pod, namespace)
////////////////////////////////////////////////////////////////////////////

promNetworkReceiveInput := prometheus.PrometheusInput{
Metric: "container_network_receive_bytes_total",
}
promNetworkReceiveInput.IgnoreFilters = map[string][]string{
"pod": {""},
}
promNetworkReceiveInput.Function = []string{"increase", "sum"}
promNetworkReceiveInput.QueryWindow = tc.window
promNetworkReceiveInput.QueryResolution = "5m"
promNetworkReceiveInput.AggregateBy = []string{"pod", "namespace"}
promNetworkReceiveInput.Time = &endTime

promNetworkReceiveResponse, err := client.RunPromQLQuery(promNetworkReceiveInput)
if err != nil {
t.Fatalf("Error while calling Prometheus API %v", err)
}

////////////////////////////////////////////////////////////////////////////
// Network Transfer Bytes

// sum(increase(container_network_transmit_bytes_total{pod!="", %s}[%s:%dm])) by (pod_name, pod, namespace, %s)
////////////////////////////////////////////////////////////////////////////

promNetworkTransferInput := prometheus.PrometheusInput{
Metric: "container_network_transmit_bytes_total",
}
promNetworkTransferInput.IgnoreFilters = map[string][]string{
"pod": {""},
}
promNetworkTransferInput.Function = []string{"increase", "sum"}
promNetworkTransferInput.QueryWindow = tc.window
promNetworkTransferInput.QueryResolution = "5m"
promNetworkTransferInput.AggregateBy = []string{"pod", "namespace"}
promNetworkTransferInput.Time = &endTime

promNetworkTransferResponse, err := client.RunPromQLQuery(promNetworkTransferInput)
if err != nil {
t.Fatalf("Error while calling Prometheus API %v", err)
}

// Network Receive Bytes
for _, promNetworkReceiveResponse := range promNetworkReceiveResponse.Data.Result {
namespace := promNetworkReceiveResponse.Metric.Namespace
pod := promNetworkReceiveResponse.Metric.Pod
networkReceiveBytesPod := promNetworkReceiveResponse.Value.Value
networkCostsNamespace, ok := networkCostsNamespaceMap[namespace]
if !ok {
networkCostsNamespaceMap[namespace] = &NetworkCostsAggregate{
PromNetworkReceiveBytes: networkReceiveBytesPod,
PromNetworkTransferBytes: 0.0,
AllocNetworkReceiveBytes: 0.0,
AllocNetworkTransferBytes: 0.0,
Pods: []string{pod},
}
continue
}

networkCostsNamespace.Pods = append(networkCostsNamespace.Pods, pod)
networkCostsNamespace.PromNetworkReceiveBytes += networkReceiveBytesPod
}

// Network Transfer Bytes
for _, promNetworkTransferResponse := range promNetworkTransferResponse.Data.Result {
namespace := promNetworkTransferResponse.Metric.Namespace
pod := promNetworkTransferResponse.Metric.Pod
networkTransferBytesPod := promNetworkTransferResponse.Value.Value
networkCostsNamespace, ok := networkCostsNamespaceMap[namespace]
if !ok {
networkCostsNamespaceMap[namespace] = &NetworkCostsAggregate{
PromNetworkReceiveBytes: networkTransferBytesPod,
PromNetworkTransferBytes: networkTransferBytesPod,
AllocNetworkReceiveBytes: 0.0,
AllocNetworkTransferBytes: 0.0,
Pods: []string{pod},
}
continue
}
if !slices.Contains(networkCostsNamespace.Pods, pod) {
networkCostsNamespace.Pods = append(networkCostsNamespace.Pods, pod)
}
networkCostsNamespace.PromNetworkTransferBytes += networkTransferBytesPod
}


/////////////////////////////////////////////
// API Client
/////////////////////////////////////////////

// Why doesn't allocation work on Namespace aggregate?
apiResponse, err := apiObj.GetAllocation(api.AllocationRequest{
Window: tc.window,
Aggregate: tc.aggregate,
Accumulate: tc.accumulate,
})

if err != nil {
t.Fatalf("Error while calling Allocation API %v", err)
}
if apiResponse.Code != 200 {
t.Errorf("API returned non-200 code")
}

for namespace, allocationResponseItem := range apiResponse.Data[0] {
networkCostsNamespace, ok := networkCostsNamespaceMap[namespace]
if !ok {
networkCostsNamespaceMap[namespace] = &NetworkCostsAggregate{
PromNetworkReceiveBytes: 0.0,
PromNetworkTransferBytes: 0.0,
AllocNetworkReceiveBytes: allocationResponseItem.NetworkReceiveBytes,
AllocNetworkTransferBytes: allocationResponseItem.NetworkTransferBytes,
}
continue
}
networkCostsNamespace.AllocNetworkReceiveBytes = allocationResponseItem.NetworkReceiveBytes
networkCostsNamespace.AllocNetworkTransferBytes = allocationResponseItem.NetworkTransferBytes
}

for namespace, networkCostValues := range networkCostsNamespaceMap {
t.Logf("Namespace %s", namespace)
withinRange, diff_percent := utils.AreWithinPercentage(networkCostValues.AllocNetworkTransferBytes, networkCostValues.PromNetworkTransferBytes, tolerance)
if !withinRange {
t.Errorf(" - NetworkTransferBytes[Fail]: DifferencePercent: %0.2f, Prometheus: %0.2f, /allocation: %0.2f", diff_percent, networkCostValues.PromNetworkTransferBytes, networkCostValues.AllocNetworkTransferBytes)
} else {
t.Logf(" - NetworkTransferBytes[Pass]: ~ %0.2f", networkCostValues.PromNetworkTransferBytes)
}
if !withinRange {
t.Errorf(" - NetworkReceiveBytes[Fail]: DifferencePercent: %0.2f, Prometheus: %0.2f, /allocation: %0.2f", diff_percent, networkCostValues.PromNetworkReceiveBytes, networkCostValues.AllocNetworkReceiveBytes)
} else {
t.Logf(" - NetworkReceiveBytes[Pass]: ~ %0.2f", networkCostValues.PromNetworkReceiveBytes)
}
}
})
}
}
180 changes: 180 additions & 0 deletions test/integration/prometheus/network_internet_costs_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,180 @@
package prometheus

// Description - Compares Network Internet Costs from Prometheus and Allocation

import (
// "fmt"
"github.com/opencost/opencost-integration-tests/pkg/api"
"github.com/opencost/opencost-integration-tests/pkg/prometheus"
"github.com/opencost/opencost-integration-tests/pkg/utils"
"testing"
"time"
)

const tolerance = 0.05

func TestNetworkInternetCosts(t *testing.T) {
apiObj := api.NewAPI()

testCases := []struct {
name string
window string
aggregate string
accumulate string
}{
{
name: "Yesterday",
window: "24h",
aggregate: "pod",
accumulate: "false",
},
}

t.Logf("testCases: %v", testCases)

for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {

// Any data that is in a "raw allocation only" is not valid in any
// sort of cumulative Allocation (like one that is added).

type NetworkCostsAggregate struct {
PromNetworkInternetGiB float64
AllocNetworkInternetGiB float64
}

networkCostsPodMap := make(map[string]*NetworkCostsAggregate)

queryEnd := time.Now().UTC().Truncate(time.Hour).Add(time.Hour)
endTime := queryEnd.Unix()
// Collect Namespace results from Prometheus
client := prometheus.NewClient()

////////////////////////////////////////////////////////////////////////////
// Network Internet GiB

// sum(increase(kubecost_pod_network_egress_bytes_total{internet="true"}[24h:5m])) by (pod_name, namespace) / 1024 / 1024 / 1024
// Apply Division by 1024^3 when you are looping over the response
////////////////////////////////////////////////////////////////////////////

promNetworkInternetInput := prometheus.PrometheusInput{
Metric: "kubecost_pod_network_egress_bytes_total",
}
promNetworkInternetInput.Filters = map[string]string{
"internet": "true",
}
promNetworkInternetInput.Function = []string{"increase", "sum"}
promNetworkInternetInput.QueryWindow = tc.window
promNetworkInternetInput.QueryResolution = "5m"
promNetworkInternetInput.AggregateBy = []string{"pod_name", "namespace"}
promNetworkInternetInput.Time = &endTime

promNetworkInternetResponse, err := client.RunPromQLQuery(promNetworkInternetInput)
if err != nil {
t.Fatalf("Error while calling Prometheus API %v", err)
}

////////////////////////////////////////////////////////////////////////////
// Network Internet price per GiB

// avg(avg_over_time(kubecost_network_internet_egress_cost{%s}[%s])) by (%s)
////////////////////////////////////////////////////////////////////////////

promNetworkInternetCostInput := prometheus.PrometheusInput{
Metric: "kubecost_network_internet_egress_cost",
}
promNetworkInternetCostInput.Function = []string{"avg_over_time", "avg"}
promNetworkInternetCostInput.QueryWindow = tc.window
promNetworkInternetCostInput.Time = &endTime

promNetworkInternetCostResponse, err := client.RunPromQLQuery(promNetworkInternetCostInput)
if err != nil {
t.Fatalf("Error while calling Prometheus API %v", err)
}

// --------------------------------
// Network Internet Cost for all Pods
// --------------------------------

networkInternetCost := promNetworkInternetCostResponse.Data.Result[0].Value.Value

// --------------------------------
// Assign Network Costs to Pods and Cumulate based on Namespace
// --------------------------------

// Form a key based on namespace and pod name

for _, promNetworkInternetItem := range promNetworkInternetResponse.Data.Result {
// namespace := promNetworkInternetItem.Metric.Namespace
pod := promNetworkInternetItem.Metric.PodName
gib := promNetworkInternetItem.Value.Value


networkCostsPodMap[pod] = &NetworkCostsAggregate{
PromNetworkInternetGiB: (gib / 1024 / 1024 / 1024) * networkInternetCost,
AllocNetworkInternetGiB: 0.0,
}

// networkCostsNamespace, ok := networkCostsPodMap[namespace]
// if !ok {
// networkCostsPodMap[pod] = &NetworkCostsAggregate{
// PromNetworkInternetGiB: (gib / 1024 / 1024 / 1024) * networkInternetCost,
// AllocNetworkInternetGiB: 0.0,
// }
// continue
// }
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why is this commented?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wanted the option to drill down to a pod level if needed. do you think we need to go down further down?

}


/////////////////////////////////////////////
// API Client
/////////////////////////////////////////////

// Why doesn't allocation work on Namespace aggregate?
apiResponse, err := apiObj.GetAllocation(api.AllocationRequest{
Window: tc.window,
Aggregate: tc.aggregate,
Accumulate: tc.accumulate,
})

if err != nil {
t.Fatalf("Error while calling Allocation API %v", err)
}
if apiResponse.Code != 200 {
t.Errorf("API returned non-200 code")
}

for pod, allocationResponseItem := range apiResponse.Data[0] {
networkCostsPod, ok := networkCostsPodMap[pod]
if !ok {
networkCostsPodMap[pod] = &NetworkCostsAggregate{
PromNetworkInternetGiB: 0.0,
AllocNetworkInternetGiB: allocationResponseItem.NetworkInternetCost,
}
continue
}
networkCostsPod.AllocNetworkInternetGiB = allocationResponseItem.NetworkInternetCost
}

validCostsSeen := false
negligilbleCost := 0.01 // 1 Cent of a Dollar
for pod, networkCostValues := range networkCostsPodMap {
if networkCostValues.AllocNetworkInternetGiB < negligilbleCost {
continue
} else {
validCostsSeen = true
}
t.Logf("Pod %s", pod)
withinRange, diff_percent := utils.AreWithinPercentage(networkCostValues.AllocNetworkInternetGiB, networkCostValues.PromNetworkInternetGiB, tolerance)
if !withinRange {
t.Errorf(" - NetworkInternetCost[Fail]: DifferencePercent: %0.2f, Prometheus: %0.9f, /allocation: %0.9f", diff_percent, networkCostValues.PromNetworkInternetGiB, networkCostValues.AllocNetworkInternetGiB)
} else {
t.Logf(" - NetworkInternetCost[Pass]: ~ %0.5f", networkCostValues.PromNetworkInternetGiB)
}
}
if !validCostsSeen {
t.Errorf("NetWork Internet Costs for all Pods are below 1 cent and hence cannot be considered as costs from resource usage and validated.")
}
})
}
}
Loading
Loading