Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
* [FEATURE] Querier: Support for configuring query optimizers and enabling XFunctions in the Thanos engine. #6873
* [FEATURE] Query Frontend: Add support /api/v1/format_query API for formatting queries. #6893
* [FEATURE] Query Frontend: Add support for /api/v1/parse_query API (experimental) to parse a PromQL expression and return it as a JSON-formatted AST (abstract syntax tree). #6978
* [ENHANCEMENT] Overrides Exporter: Expose all fields that can be converted to float64. #6979
* [ENHANCEMENT] Ingester: Add `cortex_ingester_tsdb_wal_replay_unknown_refs_total` and `cortex_ingester_tsdb_wbl_replay_unknown_refs_total` metrics to track unknown series references during wal/wbl replaying. #6945
* [ENHANCEMENT] Ruler: Emit an error message when the rule synchronization fails. #6902
* [ENHANCEMENT] Querier: Support snappy and zstd response compression for `-querier.response-compression` flag. #6848
Expand Down
60 changes: 54 additions & 6 deletions pkg/util/validation/exporter.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
package validation

import (
"reflect"
"strings"
"time"

"github.com/prometheus/client_golang/prometheus"
)

Expand Down Expand Up @@ -31,12 +35,56 @@ func (oe *OverridesExporter) Describe(ch chan<- *prometheus.Desc) {
func (oe *OverridesExporter) Collect(ch chan<- prometheus.Metric) {
allLimits := oe.tenantLimits.AllByUserID()
for tenant, limits := range allLimits {
ch <- prometheus.MustNewConstMetric(oe.description, prometheus.GaugeValue, limits.IngestionRate, "ingestion_rate", tenant)
ch <- prometheus.MustNewConstMetric(oe.description, prometheus.GaugeValue, float64(limits.IngestionBurstSize), "ingestion_burst_size", tenant)
for metricName, value := range ExtractNumericalValues(limits) {
ch <- prometheus.MustNewConstMetric(oe.description, prometheus.GaugeValue, value, metricName, tenant)
}
}
}

func ExtractNumericalValues(l *Limits) map[string]float64 {
metrics := make(map[string]float64)

v := reflect.ValueOf(l).Elem()
t := v.Type()

for i := 0; i < v.NumField(); i++ {
field := v.Field(i)
fieldType := t.Field(i)

tag := fieldType.Tag.Get("yaml")
if tag == "" || tag == "-" {
// not exist tag or tag is "-"
continue
}

// remove options like omitempty
if idx := strings.Index(tag, ","); idx != -1 {
tag = tag[:idx]
}

ch <- prometheus.MustNewConstMetric(oe.description, prometheus.GaugeValue, float64(limits.MaxLocalSeriesPerUser), "max_local_series_per_user", tenant)
ch <- prometheus.MustNewConstMetric(oe.description, prometheus.GaugeValue, float64(limits.MaxLocalSeriesPerMetric), "max_local_series_per_metric", tenant)
ch <- prometheus.MustNewConstMetric(oe.description, prometheus.GaugeValue, float64(limits.MaxGlobalSeriesPerUser), "max_global_series_per_user", tenant)
ch <- prometheus.MustNewConstMetric(oe.description, prometheus.GaugeValue, float64(limits.MaxGlobalSeriesPerMetric), "max_global_series_per_metric", tenant)
switch field.Kind() {
case reflect.Int, reflect.Int64:
if field.Type().String() == "model.Duration" {
// we export the model.Duration in seconds
metrics[tag] = time.Duration(field.Int()).Seconds()
} else {
metrics[tag] = float64(field.Int())
}
case reflect.Uint, reflect.Uint64:
metrics[tag] = float64(field.Uint())
case reflect.Float64:
metrics[tag] = field.Float()
case reflect.Bool:
if field.Bool() {
// true as 1.0
metrics[tag] = 1.0
} else {
// false as 0.0
metrics[tag] = 0.0
}
case reflect.String, reflect.Slice, reflect.Map, reflect.Struct:
continue
}
}
return metrics
}
158 changes: 158 additions & 0 deletions pkg/util/validation/exporter_test.go
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
package validation

import (
"flag"
"strings"
"testing"
"time"

"github.com/prometheus/client_golang/prometheus/testutil"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)

func TestOverridesExporter_noConfig(t *testing.T) {
Expand All @@ -21,10 +25,164 @@ func TestOverridesExporter_withConfig(t *testing.T) {
MaxQueriersPerTenant: 5,
},
}
fs := flag.NewFlagSet("test", flag.ContinueOnError)
tenantLimits["tenant-a"].RegisterFlags(fs)

exporter := NewOverridesExporter(newMockTenantLimits(tenantLimits))

// There should be at least a few metrics generated by receiving an override configuration map
count := testutil.CollectAndCount(exporter, "cortex_overrides")
assert.Greater(t, count, 0)
require.NoError(t, testutil.CollectAndCompare(exporter, strings.NewReader(`
# HELP cortex_overrides Resource limit overrides applied to tenants
# TYPE cortex_overrides gauge
cortex_overrides{limit_name="accept_ha_samples",user="tenant-a"} 0
cortex_overrides{limit_name="accept_mixed_ha_samples",user="tenant-a"} 0
cortex_overrides{limit_name="alertmanager_max_alerts_count",user="tenant-a"} 0
cortex_overrides{limit_name="alertmanager_max_alerts_size_bytes",user="tenant-a"} 0
cortex_overrides{limit_name="alertmanager_max_config_size_bytes",user="tenant-a"} 0
cortex_overrides{limit_name="alertmanager_max_dispatcher_aggregation_groups",user="tenant-a"} 0
cortex_overrides{limit_name="alertmanager_max_silences_count",user="tenant-a"} 0
cortex_overrides{limit_name="alertmanager_max_silences_size_bytes",user="tenant-a"} 0
cortex_overrides{limit_name="alertmanager_max_template_size_bytes",user="tenant-a"} 0
cortex_overrides{limit_name="alertmanager_max_templates_count",user="tenant-a"} 0
cortex_overrides{limit_name="alertmanager_notification_rate_limit",user="tenant-a"} 0
cortex_overrides{limit_name="alertmanager_receivers_firewall_block_private_addresses",user="tenant-a"} 0
cortex_overrides{limit_name="compactor_blocks_retention_period",user="tenant-a"} 0
cortex_overrides{limit_name="compactor_partition_index_size_bytes",user="tenant-a"} 6.8719476736e+10
cortex_overrides{limit_name="compactor_partition_series_count",user="tenant-a"} 0
cortex_overrides{limit_name="compactor_tenant_shard_size",user="tenant-a"} 0
cortex_overrides{limit_name="creation_grace_period",user="tenant-a"} 600
cortex_overrides{limit_name="enable_native_histograms",user="tenant-a"} 0
cortex_overrides{limit_name="enforce_metadata_metric_name",user="tenant-a"} 1
cortex_overrides{limit_name="enforce_metric_name",user="tenant-a"} 1
cortex_overrides{limit_name="ha_max_clusters",user="tenant-a"} 0
cortex_overrides{limit_name="ingestion_burst_size",user="tenant-a"} 50000
cortex_overrides{limit_name="ingestion_rate",user="tenant-a"} 25000
cortex_overrides{limit_name="ingestion_tenant_shard_size",user="tenant-a"} 0
cortex_overrides{limit_name="max_cache_freshness",user="tenant-a"} 60
cortex_overrides{limit_name="max_downloaded_bytes_per_request",user="tenant-a"} 0
cortex_overrides{limit_name="max_exemplars",user="tenant-a"} 0
cortex_overrides{limit_name="max_fetched_chunk_bytes_per_query",user="tenant-a"} 0
cortex_overrides{limit_name="max_fetched_chunks_per_query",user="tenant-a"} 2e+06
cortex_overrides{limit_name="max_fetched_data_bytes_per_query",user="tenant-a"} 0
cortex_overrides{limit_name="max_fetched_series_per_query",user="tenant-a"} 0
cortex_overrides{limit_name="max_global_metadata_per_metric",user="tenant-a"} 0
cortex_overrides{limit_name="max_global_metadata_per_user",user="tenant-a"} 0
cortex_overrides{limit_name="max_global_native_histogram_series_per_user",user="tenant-a"} 0
cortex_overrides{limit_name="max_global_series_per_metric",user="tenant-a"} 0
cortex_overrides{limit_name="max_global_series_per_user",user="tenant-a"} 0
cortex_overrides{limit_name="max_label_name_length",user="tenant-a"} 1024
cortex_overrides{limit_name="max_label_names_per_series",user="tenant-a"} 30
cortex_overrides{limit_name="max_label_value_length",user="tenant-a"} 2048
cortex_overrides{limit_name="max_labels_size_bytes",user="tenant-a"} 0
cortex_overrides{limit_name="max_metadata_length",user="tenant-a"} 1024
cortex_overrides{limit_name="max_metadata_per_metric",user="tenant-a"} 10
cortex_overrides{limit_name="max_metadata_per_user",user="tenant-a"} 8000
cortex_overrides{limit_name="max_native_histogram_buckets",user="tenant-a"} 0
cortex_overrides{limit_name="max_native_histogram_sample_size_bytes",user="tenant-a"} 0
cortex_overrides{limit_name="max_native_histogram_series_per_user",user="tenant-a"} 0
cortex_overrides{limit_name="max_outstanding_requests_per_tenant",user="tenant-a"} 100
cortex_overrides{limit_name="max_queriers_per_tenant",user="tenant-a"} 0
cortex_overrides{limit_name="max_query_length",user="tenant-a"} 0
cortex_overrides{limit_name="max_query_lookback",user="tenant-a"} 0
cortex_overrides{limit_name="max_query_parallelism",user="tenant-a"} 14
cortex_overrides{limit_name="max_query_response_size",user="tenant-a"} 0
cortex_overrides{limit_name="max_series_per_metric",user="tenant-a"} 50000
cortex_overrides{limit_name="max_series_per_user",user="tenant-a"} 5e+06
cortex_overrides{limit_name="native_histogram_ingestion_burst_size",user="tenant-a"} 0
cortex_overrides{limit_name="native_histogram_ingestion_rate",user="tenant-a"} 1.7976931348623157e+308
cortex_overrides{limit_name="out_of_order_time_window",user="tenant-a"} 0
cortex_overrides{limit_name="parquet_converter_enabled",user="tenant-a"} 0
cortex_overrides{limit_name="parquet_converter_tenant_shard_size",user="tenant-a"} 0
cortex_overrides{limit_name="parquet_max_fetched_chunk_bytes",user="tenant-a"} 0
cortex_overrides{limit_name="parquet_max_fetched_data_bytes",user="tenant-a"} 0
cortex_overrides{limit_name="parquet_max_fetched_row_count",user="tenant-a"} 0
cortex_overrides{limit_name="query_partial_data",user="tenant-a"} 0
cortex_overrides{limit_name="query_vertical_shard_size",user="tenant-a"} 0
cortex_overrides{limit_name="reject_old_samples",user="tenant-a"} 0
cortex_overrides{limit_name="reject_old_samples_max_age",user="tenant-a"} 1.2096e+06
cortex_overrides{limit_name="ruler_evaluation_delay_duration",user="tenant-a"} 0
cortex_overrides{limit_name="ruler_max_rule_groups_per_tenant",user="tenant-a"} 0
cortex_overrides{limit_name="ruler_max_rules_per_rule_group",user="tenant-a"} 0
cortex_overrides{limit_name="ruler_query_offset",user="tenant-a"} 0
cortex_overrides{limit_name="ruler_tenant_shard_size",user="tenant-a"} 0
cortex_overrides{limit_name="rules_partial_data",user="tenant-a"} 0
cortex_overrides{limit_name="store_gateway_tenant_shard_size",user="tenant-a"} 0
`), "cortex_overrides"))
}

func TestExtractNumericalValues(t *testing.T) {
limits := &Limits{}
fs := flag.NewFlagSet("test", flag.ContinueOnError)
limits.RegisterFlags(fs)
extracted := ExtractNumericalValues(limits)
t.Run("float64 should be converted", func(t *testing.T) {
require.Equal(t, limits.IngestionRate, extracted["ingestion_rate"])
})
t.Run("int should be converted", func(t *testing.T) {
require.Equal(t, float64(limits.IngestionBurstSize), extracted["ingestion_burst_size"])
})
t.Run("int64 should be converted", func(t *testing.T) {
require.Equal(t, float64(limits.MaxQueryResponseSize), extracted["max_query_response_size"])
})
t.Run("string shouldn't be converted", func(t *testing.T) {
_, ok := extracted["ingestion_rate_strategy"]
require.False(t, ok, "string should be not converted")
})
t.Run("bool should be converted, default value false converted to 0", func(t *testing.T) {
val, ok := extracted["accept_ha_samples"]
require.True(t, ok)
require.Equal(t, 0.0, val)
})
t.Run("bool should be converted, default value true converted to 1", func(t *testing.T) {
val, ok := extracted["enforce_metric_name"]
require.True(t, ok)
require.Equal(t, 1.0, val)
})
t.Run("flagext.StringSlice shouldn't be converted", func(t *testing.T) {
_, ok := extracted["drop_labels"]
require.False(t, ok)
})
t.Run("model.Duration should be converted", func(t *testing.T) {
val, ok := extracted["reject_old_samples_max_age"]
require.True(t, ok)
require.Equal(t, time.Duration(limits.RejectOldSamplesMaxAge).Seconds(), val)
})
t.Run("[]*relabel.Config shouldn't be converted", func(t *testing.T) {
_, ok := extracted["metric_relabel_configs"]
require.False(t, ok)
})
t.Run("[]string shouldn't be converted", func(t *testing.T) {
_, ok := extracted["promote_resource_attributes"]
require.False(t, ok)
})
t.Run("[]LimitsPerLabelSet shouldn't be converted", func(t *testing.T) {
_, ok := extracted["limits_per_label_set"]
require.False(t, ok)
})
t.Run("QueryPriority shouldn't be converted", func(t *testing.T) {
_, ok := extracted["query_priority"]
require.False(t, ok)
})
t.Run("QueryRejection shouldn't be converted", func(t *testing.T) {
_, ok := extracted["query_rejection"]
require.False(t, ok)
})
t.Run("labels.Labels shouldn't be converted", func(t *testing.T) {
_, ok := extracted["ruler_external_labels"]
require.False(t, ok)
})
t.Run("flagext.CIDRSliceCSV shouldn't be converted", func(t *testing.T) {
_, ok := extracted["alertmanager_receivers_firewall_block_cidr_networks"]
require.False(t, ok)
})
t.Run("NotificationRateLimitMap shouldn't be converted", func(t *testing.T) {
_, ok := extracted["alertmanager_notification_rate_limit_per_integration"]
require.False(t, ok)
})
t.Run("DisabledRuleGroups shouldn't be converted", func(t *testing.T) {
_, ok := extracted["disabled_rule_groups"]
require.False(t, ok)
})
}
Loading