|
| 1 | +package metrics |
| 2 | + |
| 3 | +import ( |
| 4 | + "sync" |
| 5 | + "time" |
| 6 | + |
| 7 | + compbasemetrics "k8s.io/component-base/metrics" |
| 8 | + "k8s.io/component-base/metrics/legacyregistry" |
| 9 | + klog "k8s.io/klog/v2" |
| 10 | +) |
| 11 | + |
| 12 | +const ( |
| 13 | + InferenceModelComponent = "inference_model" |
| 14 | +) |
| 15 | + |
| 16 | +var ( |
| 17 | + requestCounter = compbasemetrics.NewCounterVec( |
| 18 | + &compbasemetrics.CounterOpts{ |
| 19 | + Subsystem: InferenceModelComponent, |
| 20 | + Name: "request_total", |
| 21 | + Help: "Counter of inference model requests broken out for each model and target model.", |
| 22 | + StabilityLevel: compbasemetrics.ALPHA, |
| 23 | + }, |
| 24 | + []string{"model_name", "target_model_name"}, |
| 25 | + ) |
| 26 | + |
| 27 | + requestLatencies = compbasemetrics.NewHistogramVec( |
| 28 | + &compbasemetrics.HistogramOpts{ |
| 29 | + Subsystem: InferenceModelComponent, |
| 30 | + Name: "request_duration_seconds", |
| 31 | + Help: "Inference model response latency distribution in seconds for each model and target model.", |
| 32 | + Buckets: []float64{0.005, 0.025, 0.05, 0.1, 0.2, 0.4, 0.6, 0.8, 1.0, 1.25, 1.5, 2, 3, |
| 33 | + 4, 5, 6, 8, 10, 15, 20, 30, 45, 60, 120, 180, 240, 300, 360, 480, 600, 900, 1200, 1800, 2700, 3600}, |
| 34 | + StabilityLevel: compbasemetrics.ALPHA, |
| 35 | + }, |
| 36 | + []string{"model_name", "target_model_name"}, |
| 37 | + ) |
| 38 | + |
| 39 | + requestSizes = compbasemetrics.NewHistogramVec( |
| 40 | + &compbasemetrics.HistogramOpts{ |
| 41 | + Subsystem: InferenceModelComponent, |
| 42 | + Name: "request_sizes", |
| 43 | + Help: "Inference model requests size distribution in bytes for each model and target model.", |
| 44 | + // Use buckets ranging from 1000 bytes (1KB) to 10^9 bytes (1GB). |
| 45 | + Buckets: []float64{ |
| 46 | + 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768, 65536, // More fine-grained up to 64KB |
| 47 | + 131072, 262144, 524288, 1048576, 2097152, 4194304, 8388608, // Exponential up to 8MB |
| 48 | + 16777216, 33554432, 67108864, 134217728, 268435456, 536870912, 1073741824, // Exponential up to 1GB |
| 49 | + }, |
| 50 | + StabilityLevel: compbasemetrics.ALPHA, |
| 51 | + }, |
| 52 | + []string{"model_name", "target_model_name"}, |
| 53 | + ) |
| 54 | +) |
| 55 | + |
| 56 | +var registerMetrics sync.Once |
| 57 | + |
| 58 | +// Register all metrics. |
| 59 | +func Register() { |
| 60 | + registerMetrics.Do(func() { |
| 61 | + legacyregistry.MustRegister(requestCounter) |
| 62 | + legacyregistry.MustRegister(requestLatencies) |
| 63 | + legacyregistry.MustRegister(requestSizes) |
| 64 | + }) |
| 65 | +} |
| 66 | + |
| 67 | +// RecordRequstCounter records the number of requests. |
| 68 | +func RecordRequestCounter(modelName, targetModelName string) { |
| 69 | + requestCounter.WithLabelValues(modelName, targetModelName).Inc() |
| 70 | +} |
| 71 | + |
| 72 | +// RecordRequestSizes records the request sizes. |
| 73 | +func RecordRequestSizes(modelName, targetModelName string, reqSize int) { |
| 74 | + requestSizes.WithLabelValues(modelName, targetModelName).Observe(float64(reqSize)) |
| 75 | +} |
| 76 | + |
| 77 | +// RecordRequstLatencies records duration of request. |
| 78 | +func RecordRequestLatencies(modelName, targetModelName string, received time.Time, complete time.Time) bool { |
| 79 | + if !complete.After(received) { |
| 80 | + klog.Errorf("request latency value error for model name %v, target model name %v: complete time %v is before received time %v", modelName, targetModelName, complete, received) |
| 81 | + return false |
| 82 | + } |
| 83 | + elapsedSeconds := complete.Sub(received).Seconds() |
| 84 | + requestLatencies.WithLabelValues(modelName, targetModelName).Observe(elapsedSeconds) |
| 85 | + return true |
| 86 | +} |
0 commit comments