Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions tensorflow_serving/servables/tensorflow/classifier.cc
Original file line number Diff line number Diff line change
Expand Up @@ -74,8 +74,10 @@ class SavedModelTensorFlowClassifier : public ClassifierInterface {
run_options_, request.input(), input_tensor_name, output_tensor_names,
session_, &outputs, &num_examples, thread_pool_options_,
&runtime_latency));
RecordRuntimeLatency(request.model_spec().name(), /*api=*/"Classify",
/*runtime=*/"TF1", runtime_latency);
RecordRuntimeLatency(request.model_spec().name(),
/*signature_name=*/"tensorflow/serving/classify",
/*api=*/"Classify", /*runtime=*/"TF1",
runtime_latency);

TRACELITERAL("ConvertToClassificationResult");
return PostProcessClassificationResult(
Expand Down
3 changes: 2 additions & 1 deletion tensorflow_serving/servables/tensorflow/predict_util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,8 @@ Status RunPredict(
output_tensor_names, {}, &outputs,
&run_metadata, thread_pool_options));
const uint64_t end_microseconds = EnvTime::NowMicros();
RecordRuntimeLatency(request.model_spec().name(), /*api=*/"Predict",
RecordRuntimeLatency(request.model_spec().name(),
/*signature_name=*/signature_name, /*api=*/"Predict",
/*runtime=*/"TF1",
end_microseconds - start_microseconds);

Expand Down
5 changes: 3 additions & 2 deletions tensorflow_serving/servables/tensorflow/regressor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,9 @@ class SavedModelTensorFlowRegressor : public RegressorInterface {
run_options_, request.input(), input_tensor_name, output_tensor_names,
session_, &outputs, &num_examples, thread_pool_options_,
&runtime_latency));
RecordRuntimeLatency(request.model_spec().name(), /*api=*/"Regress",
/*runtime=*/"TF1", runtime_latency);
RecordRuntimeLatency(request.model_spec().name(),
/*signature_name=*/"tensorflow/serving/regress",
/*api=*/"Regress", /*runtime=*/"TF1", runtime_latency);

TRACELITERAL("ConvertToRegressionResult");
return PostProcessRegressionResult(*signature_, num_examples,
Expand Down
9 changes: 6 additions & 3 deletions tensorflow_serving/servables/tensorflow/util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -60,11 +60,12 @@ auto* model_request_status_count_total = monitoring::Counter<2>::New(
"/tensorflow/serving/request_count", "The total number of requests.",
"model_name", "status");

auto* runtime_latency = monitoring::Sampler<3>::New(
auto* runtime_latency = monitoring::Sampler<4>::New(
{
"/tensorflow/serving/runtime_latency",
"Distribution of wall time (in microseconds) for Tensorflow runtime.",
"model_name",
"signature_name",
"API",
"runtime",
}, // Scale of 10, power of 1.8 with bucket count 33 (~20 minutes).
Expand Down Expand Up @@ -341,9 +342,11 @@ Status EstimateResourceFromPathUsingDiskState(const string& path,
return OkStatus();
}

void RecordRuntimeLatency(const string& model_name, const string& api,
void RecordRuntimeLatency(const string& model_name,
const string& signature_name, const string& api,
const string& runtime, int64_t latency_usec) {
runtime_latency->GetCell(model_name, api, runtime)->Add(latency_usec);
runtime_latency->GetCell(model_name, signature_name, api, runtime)->Add(
latency_usec);
}

void RecordRequestLatency(const string& model_name, const string& api,
Expand Down
3 changes: 2 additions & 1 deletion tensorflow_serving/servables/tensorflow/util.h
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,8 @@ Status EstimateResourceFromPathUsingDiskState(const string& path,
ResourceAllocation* estimate);

// Update metrics for runtime latency.
void RecordRuntimeLatency(const string& model_name, const string& api,
void RecordRuntimeLatency(const string& model_name,
const string& signature_name, const string& api,
const string& runtime, int64_t latency_usec);

// Update metrics for request latency.
Expand Down