tensorflow · jeongukjae · Jun 15, 2023
diff --git a/tensorflow_serving/servables/tensorflow/classifier.cc b/tensorflow_serving/servables/tensorflow/classifier.cc
@@ -74,8 +74,10 @@ class SavedModelTensorFlowClassifier : public ClassifierInterface {
         run_options_, request.input(), input_tensor_name, output_tensor_names,
         session_, &outputs, &num_examples, thread_pool_options_,
         &runtime_latency));
-    RecordRuntimeLatency(request.model_spec().name(), /*api=*/"Classify",
-                         /*runtime=*/"TF1", runtime_latency);
+    RecordRuntimeLatency(request.model_spec().name(),
+                         /*signature_name=*/"tensorflow/serving/classify",
+                         /*api=*/"Classify", /*runtime=*/"TF1",
+                         runtime_latency);
 
     TRACELITERAL("ConvertToClassificationResult");
     return PostProcessClassificationResult(

diff --git a/tensorflow_serving/servables/tensorflow/predict_util.cc b/tensorflow_serving/servables/tensorflow/predict_util.cc
@@ -104,7 +104,8 @@ Status RunPredict(
                                   output_tensor_names, {}, &outputs,
                                   &run_metadata, thread_pool_options));
   const uint64_t end_microseconds = EnvTime::NowMicros();
-  RecordRuntimeLatency(request.model_spec().name(), /*api=*/"Predict",
+  RecordRuntimeLatency(request.model_spec().name(),
+                       /*signature_name=*/signature_name, /*api=*/"Predict",
                        /*runtime=*/"TF1",
                        end_microseconds - start_microseconds);
 

diff --git a/tensorflow_serving/servables/tensorflow/regressor.cc b/tensorflow_serving/servables/tensorflow/regressor.cc
@@ -73,8 +73,9 @@ class SavedModelTensorFlowRegressor : public RegressorInterface {
         run_options_, request.input(), input_tensor_name, output_tensor_names,
         session_, &outputs, &num_examples, thread_pool_options_,
         &runtime_latency));
-    RecordRuntimeLatency(request.model_spec().name(), /*api=*/"Regress",
-                         /*runtime=*/"TF1", runtime_latency);
+    RecordRuntimeLatency(request.model_spec().name(),
+                         /*signature_name=*/"tensorflow/serving/regress",
+                         /*api=*/"Regress", /*runtime=*/"TF1", runtime_latency);
 
     TRACELITERAL("ConvertToRegressionResult");
     return PostProcessRegressionResult(*signature_, num_examples,

diff --git a/tensorflow_serving/servables/tensorflow/util.cc b/tensorflow_serving/servables/tensorflow/util.cc
@@ -60,11 +60,12 @@ auto* model_request_status_count_total = monitoring::Counter<2>::New(
     "/tensorflow/serving/request_count", "The total number of requests.",
     "model_name", "status");
 
-auto* runtime_latency = monitoring::Sampler<3>::New(
+auto* runtime_latency = monitoring::Sampler<4>::New(
     {
         "/tensorflow/serving/runtime_latency",
         "Distribution of wall time (in microseconds) for Tensorflow runtime.",
         "model_name",
+        "signature_name",
         "API",
         "runtime",
     },  // Scale of 10, power of 1.8 with bucket count 33 (~20 minutes).
@@ -341,9 +342,11 @@ Status EstimateResourceFromPathUsingDiskState(const string& path,
   return OkStatus();
 }
 
-void RecordRuntimeLatency(const string& model_name, const string& api,
+void RecordRuntimeLatency(const string& model_name,
+                          const string& signature_name, const string& api,
                           const string& runtime, int64_t latency_usec) {
-  runtime_latency->GetCell(model_name, api, runtime)->Add(latency_usec);
+  runtime_latency->GetCell(model_name, signature_name, api, runtime)->Add(
+      latency_usec);
 }
 
 void RecordRequestLatency(const string& model_name, const string& api,

diff --git a/tensorflow_serving/servables/tensorflow/util.h b/tensorflow_serving/servables/tensorflow/util.h
@@ -115,7 +115,8 @@ Status EstimateResourceFromPathUsingDiskState(const string& path,
                                               ResourceAllocation* estimate);
 
 // Update metrics for runtime latency.
-void RecordRuntimeLatency(const string& model_name, const string& api,
+void RecordRuntimeLatency(const string& model_name,
+                          const string& signature_name, const string& api,
                           const string& runtime, int64_t latency_usec);
 
 // Update metrics for request latency.