Skip to content

Commit c966806

Browse files
committed
add request metrics
Signed-off-by: Jie WU <[email protected]> add request metrics Signed-off-by: Jie WU <[email protected]> rename api and metrics fix go mod Adding metrics handler Signed-off-by: Jie WU <[email protected]> Adding metrics handler Signed-off-by: Jie WU <[email protected]> add request metrics rename api and metrics fix mod Updated request metrics to be handled in server processing loop Signed-off-by: Jie WU <[email protected]> Updated request metrics to be handled in server processing loop Signed-off-by: Jie WU <[email protected]> fix go mod Signed-off-by: Jie WU <[email protected]> fix go mod Signed-off-by: Jie WU <[email protected]>
1 parent f1dda9f commit c966806

12 files changed

+518
-2
lines changed

go.mod

+4-1
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ require (
1111
github.com/jhump/protoreflect v1.17.0
1212
github.com/onsi/ginkgo/v2 v2.22.2
1313
github.com/onsi/gomega v1.36.2
14+
github.com/prometheus/client_golang v1.20.4
1415
github.com/prometheus/client_model v0.6.1
1516
github.com/prometheus/common v0.61.0
1617
github.com/stretchr/testify v1.10.0
@@ -21,6 +22,7 @@ require (
2122
k8s.io/apimachinery v0.31.4
2223
k8s.io/client-go v0.31.4
2324
k8s.io/code-generator v0.31.4
25+
k8s.io/component-base v0.31.4
2426
k8s.io/klog/v2 v2.130.1
2527
sigs.k8s.io/controller-runtime v0.19.3
2628
sigs.k8s.io/structured-merge-diff/v4 v4.5.0
@@ -35,6 +37,7 @@ require (
3537
github.com/Masterminds/sprig/v3 v3.2.3 // indirect
3638
github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751 // indirect
3739
github.com/beorn7/perks v1.0.1 // indirect
40+
github.com/blang/semver/v4 v4.0.0 // indirect
3841
github.com/bufbuild/protocompile v0.14.1 // indirect
3942
github.com/cespare/xxhash/v2 v2.3.0 // indirect
4043
github.com/cncf/xds/go v0.0.0-20240905190251-b4127c9b8d78 // indirect
@@ -63,6 +66,7 @@ require (
6366
github.com/josharian/intern v1.0.0 // indirect
6467
github.com/json-iterator/go v1.1.12 // indirect
6568
github.com/klauspost/compress v1.17.9 // indirect
69+
github.com/kylelemons/godebug v1.1.0 // indirect
6670
github.com/mailru/easyjson v0.7.7 // indirect
6771
github.com/mitchellh/copystructure v1.0.0 // indirect
6872
github.com/mitchellh/reflectwalk v1.0.1 // indirect
@@ -72,7 +76,6 @@ require (
7276
github.com/pkg/errors v0.9.1 // indirect
7377
github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10 // indirect
7478
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
75-
github.com/prometheus/client_golang v1.20.4 // indirect
7679
github.com/prometheus/procfs v0.15.1 // indirect
7780
github.com/shopspring/decimal v1.2.0 // indirect
7881
github.com/spf13/cast v1.4.1 // indirect

go.sum

+4
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@ github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751 h1:JYp7IbQjafo
1515
github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
1616
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
1717
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
18+
github.com/blang/semver/v4 v4.0.0 h1:1PFHFE6yCCTv8C1TeyNNarDzntLi7wMI5i/pzqYIsAM=
19+
github.com/blang/semver/v4 v4.0.0/go.mod h1:IbckMUScFkM3pff0VJDNKRiT6TG/YpiHIM2yvyW5YoQ=
1820
github.com/bojand/ghz v0.120.0 h1:6F4wsmZVwFg5UnD+/R+IABWk6sKE/0OKIBdUQUZnOdo=
1921
github.com/bojand/ghz v0.120.0/go.mod h1:HfECuBZj1v02XObGnRuoZgyB1PR24/25dIYiJIMjJnE=
2022
github.com/bufbuild/protocompile v0.14.1 h1:iA73zAf/fyljNjQKwYzUHD6AD4R8KMasmwa/FBatYVw=
@@ -278,6 +280,8 @@ k8s.io/client-go v0.31.4 h1:t4QEXt4jgHIkKKlx06+W3+1JOwAFU/2OPiOo7H92eRQ=
278280
k8s.io/client-go v0.31.4/go.mod h1:kvuMro4sFYIa8sulL5Gi5GFqUPvfH2O/dXuKstbaaeg=
279281
k8s.io/code-generator v0.31.4 h1:Vu+8fKz+239rKiVDHFVHgjQ162cg5iUQPtTyQbwXeQw=
280282
k8s.io/code-generator v0.31.4/go.mod h1:yMDt13Kn7m4MMZ4LxB1KBzdZjEyxzdT4b4qXq+lnI90=
283+
k8s.io/component-base v0.31.4 h1:wCquJh4ul9O8nNBSB8N/o8+gbfu3BVQkVw9jAUY/Qtw=
284+
k8s.io/component-base v0.31.4/go.mod h1:G4dgtf5BccwiDT9DdejK0qM6zTK0jwDGEKnCmb9+u/s=
281285
k8s.io/gengo/v2 v2.0.0-20240228010128-51d4e06bde70 h1:NGrVE502P0s0/1hudf8zjgwki1X/TByhmAoILTarmzo=
282286
k8s.io/gengo/v2 v2.0.0-20240228010128-51d4e06bde70/go.mod h1:VH3AT8AaQOqiGjMF9p0/IM1Dj+82ZwjfxUP1IxaHE+8=
283287
k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk=

pkg/ext-proc/handlers/request.go

+2
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,8 @@ func (s *Server) HandleRequestBody(reqCtx *RequestContext, req *extProcPb.Proces
7676
klog.V(3).Infof("Selected target model %v in target pod: %v\n", llmReq.ResolvedTargetModel, targetPod)
7777

7878
reqCtx.Model = llmReq.Model
79+
reqCtx.ResolvedTargetModel = llmReq.ResolvedTargetModel
80+
reqCtx.RequestSize = len(v.RequestBody.Body)
7981
reqCtx.TargetPod = targetPod
8082

8183
// Insert "target-pod" to instruct Envoy to route requests to the specified target pod.

pkg/ext-proc/handlers/server.go

+15-1
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,15 @@ package handlers
22

33
import (
44
"io"
5+
"time"
56

67
extProcPb "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3"
78
envoyTypePb "github.com/envoyproxy/go-control-plane/envoy/type/v3"
89
"google.golang.org/grpc/codes"
910
"google.golang.org/grpc/status"
1011
"inference.networking.x-k8s.io/gateway-api-inference-extension/api/v1alpha1"
1112
"inference.networking.x-k8s.io/gateway-api-inference-extension/pkg/ext-proc/backend"
13+
"inference.networking.x-k8s.io/gateway-api-inference-extension/pkg/ext-proc/metrics"
1214
"inference.networking.x-k8s.io/gateway-api-inference-extension/pkg/ext-proc/scheduling"
1315
klog "k8s.io/klog/v2"
1416
)
@@ -75,22 +77,30 @@ func (s *Server) Process(srv extProcPb.ExternalProcessor_ProcessServer) error {
7577
var resp *extProcPb.ProcessingResponse
7678
switch v := req.Request.(type) {
7779
case *extProcPb.ProcessingRequest_RequestHeaders:
80+
reqCtx.RequestReceivedTimestamp = time.Now()
7881
resp = HandleRequestHeaders(reqCtx, req)
7982
klog.V(3).Infof("Request context after HandleRequestHeaders: %+v", reqCtx)
8083
case *extProcPb.ProcessingRequest_RequestBody:
8184
resp, err = s.HandleRequestBody(reqCtx, req)
85+
if err == nil {
86+
metrics.RecordRequestCounter(reqCtx.Model, reqCtx.ResolvedTargetModel)
87+
metrics.RecordRequestSizes(reqCtx.Model, reqCtx.ResolvedTargetModel, reqCtx.RequestSize)
88+
}
8289
klog.V(3).Infof("Request context after HandleRequestBody: %+v", reqCtx)
8390
case *extProcPb.ProcessingRequest_ResponseHeaders:
8491
resp, err = s.HandleResponseHeaders(reqCtx, req)
8592
klog.V(3).Infof("Request context after HandleResponseHeaders: %+v", reqCtx)
8693
case *extProcPb.ProcessingRequest_ResponseBody:
8794
resp, err = s.HandleResponseBody(reqCtx, req)
95+
reqCtx.ResponseCompleteTimestamp = time.Now()
96+
if err == nil {
97+
metrics.RecordRequestLatencies(reqCtx.Model, reqCtx.ResolvedTargetModel, reqCtx.RequestReceivedTimestamp, reqCtx.ResponseCompleteTimestamp)
98+
}
8899
klog.V(3).Infof("Request context after HandleResponseBody: %+v", reqCtx)
89100
default:
90101
klog.Errorf("Unknown Request type %+v", v)
91102
return status.Error(codes.Unknown, "unknown request type")
92103
}
93-
94104
if err != nil {
95105
klog.Errorf("failed to process request: %v", err)
96106
switch status.Code(err) {
@@ -123,5 +133,9 @@ func (s *Server) Process(srv extProcPb.ExternalProcessor_ProcessServer) error {
123133
type RequestContext struct {
124134
TargetPod backend.Pod
125135
Model string
136+
ResolvedTargetModel string
137+
RequestReceivedTimestamp time.Time
138+
ResponseCompleteTimestamp time.Time
139+
RequestSize int
126140
Response Response
127141
}

pkg/ext-proc/main.go

+5
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ import (
1919
"inference.networking.x-k8s.io/gateway-api-inference-extension/pkg/ext-proc/backend"
2020
"inference.networking.x-k8s.io/gateway-api-inference-extension/pkg/ext-proc/backend/vllm"
2121
"inference.networking.x-k8s.io/gateway-api-inference-extension/pkg/ext-proc/handlers"
22+
"inference.networking.x-k8s.io/gateway-api-inference-extension/pkg/ext-proc/metrics"
2223
"inference.networking.x-k8s.io/gateway-api-inference-extension/pkg/ext-proc/scheduling"
2324
"k8s.io/apimachinery/pkg/runtime"
2425
"k8s.io/apimachinery/pkg/types"
@@ -33,6 +34,8 @@ var (
3334
"port",
3435
9002,
3536
"gRPC port")
37+
metricsPort = flag.Int(
38+
"metricsPort", 9090, "metrics port")
3639
targetPodHeader = flag.String(
3740
"targetPodHeader",
3841
"target-pod",
@@ -104,6 +107,8 @@ func main() {
104107
klog.Fatalf("failed to listen: %v", err)
105108
}
106109

110+
metrics.Register()
111+
go metrics.StartMetricsHandler(*metricsPort)
107112
datastore := backend.NewK8sDataStore()
108113

109114
mgr, err := ctrl.NewManager(ctrl.GetConfigOrDie(), ctrl.Options{

pkg/ext-proc/metrics/metrics.go

+86
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
package metrics
2+
3+
import (
4+
"sync"
5+
"time"
6+
7+
compbasemetrics "k8s.io/component-base/metrics"
8+
"k8s.io/component-base/metrics/legacyregistry"
9+
klog "k8s.io/klog/v2"
10+
)
11+
12+
const (
13+
InferenceModelComponent = "inference_model"
14+
)
15+
16+
var (
17+
requestCounter = compbasemetrics.NewCounterVec(
18+
&compbasemetrics.CounterOpts{
19+
Subsystem: InferenceModelComponent,
20+
Name: "request_total",
21+
Help: "Counter of inference model requests broken out for each model and target model.",
22+
StabilityLevel: compbasemetrics.ALPHA,
23+
},
24+
[]string{"model_name", "target_model_name"},
25+
)
26+
27+
requestLatencies = compbasemetrics.NewHistogramVec(
28+
&compbasemetrics.HistogramOpts{
29+
Subsystem: InferenceModelComponent,
30+
Name: "request_duration_seconds",
31+
Help: "Inference model response latency distribution in seconds for each model and target model.",
32+
Buckets: []float64{0.005, 0.025, 0.05, 0.1, 0.2, 0.4, 0.6, 0.8, 1.0, 1.25, 1.5, 2, 3,
33+
4, 5, 6, 8, 10, 15, 20, 30, 45, 60, 120, 180, 240, 300, 360, 480, 600, 900, 1200, 1800, 2700, 3600},
34+
StabilityLevel: compbasemetrics.ALPHA,
35+
},
36+
[]string{"model_name", "target_model_name"},
37+
)
38+
39+
requestSizes = compbasemetrics.NewHistogramVec(
40+
&compbasemetrics.HistogramOpts{
41+
Subsystem: InferenceModelComponent,
42+
Name: "request_sizes",
43+
Help: "Inference model requests size distribution in bytes for each model and target model.",
44+
// Use buckets ranging from 1000 bytes (1KB) to 10^9 bytes (1GB).
45+
Buckets: []float64{
46+
64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768, 65536, // More fine-grained up to 64KB
47+
131072, 262144, 524288, 1048576, 2097152, 4194304, 8388608, // Exponential up to 8MB
48+
16777216, 33554432, 67108864, 134217728, 268435456, 536870912, 1073741824, // Exponential up to 1GB
49+
},
50+
StabilityLevel: compbasemetrics.ALPHA,
51+
},
52+
[]string{"model_name", "target_model_name"},
53+
)
54+
)
55+
56+
var registerMetrics sync.Once
57+
58+
// Register all metrics.
59+
func Register() {
60+
registerMetrics.Do(func() {
61+
legacyregistry.MustRegister(requestCounter)
62+
legacyregistry.MustRegister(requestLatencies)
63+
legacyregistry.MustRegister(requestSizes)
64+
})
65+
}
66+
67+
// RecordRequstCounter records the number of requests.
68+
func RecordRequestCounter(modelName, targetModelName string) {
69+
requestCounter.WithLabelValues(modelName, targetModelName).Inc()
70+
}
71+
72+
// RecordRequestSizes records the request sizes.
73+
func RecordRequestSizes(modelName, targetModelName string, reqSize int) {
74+
requestSizes.WithLabelValues(modelName, targetModelName).Observe(float64(reqSize))
75+
}
76+
77+
// RecordRequstLatencies records duration of request.
78+
func RecordRequestLatencies(modelName, targetModelName string, received time.Time, complete time.Time) bool {
79+
if !complete.After(received) {
80+
klog.Errorf("request latency value error for model name %v, target model name %v: complete time %v is before received time %v", modelName, targetModelName, complete, received)
81+
return false
82+
}
83+
elapsedSeconds := complete.Sub(received).Seconds()
84+
requestLatencies.WithLabelValues(modelName, targetModelName).Observe(elapsedSeconds)
85+
return true
86+
}
+29
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
package metrics
2+
3+
import (
4+
"net"
5+
"net/http"
6+
"strconv"
7+
8+
"github.com/prometheus/client_golang/prometheus/promhttp"
9+
"k8s.io/component-base/metrics/legacyregistry"
10+
"k8s.io/klog/v2"
11+
)
12+
13+
func StartMetricsHandler(port int) {
14+
klog.Info("Starting metrics HTTP handler ...")
15+
16+
mux := http.NewServeMux()
17+
mux.Handle("/metrics", promhttp.HandlerFor(
18+
legacyregistry.DefaultGatherer,
19+
promhttp.HandlerOpts{},
20+
))
21+
22+
server := &http.Server{
23+
Addr: net.JoinHostPort("", strconv.Itoa(port)),
24+
Handler: mux,
25+
}
26+
if err := server.ListenAndServe(); err != http.ErrServerClosed {
27+
klog.Fatalf("failed to start metrics HTTP handler: %v", err)
28+
}
29+
}

0 commit comments

Comments
 (0)