docs: add Tensorflow Serving and TorchServe sample yamls

Convention from past docs we are hewing to here: - if the file is a valid yaml, it ends with .yaml - if the file is a diff with '+' to illustrate how you should be modifying your existing setup, it ends with .snippet so users don't blindly apply it.
GoogleCloudPlatform · Feb 24, 2025 · f0e896b · f0e896b
1 parent affaca0
commit f0e896b
Show file tree

Hide file tree

Showing 8 changed files with 230 additions and 0 deletions.
diff --git a/examples/tf-serving/README.md b/examples/tf-serving/README.md
@@ -0,0 +1,3 @@
+# TensorFlow Serving sample manifests
+
+Please refer to the [Google Cloud documentation](https://cloud.google.com/stackdriver/docs/managed-prometheus/exporters/tf-serving) for how to use these manifests.
diff --git a/examples/tf-serving/exporter.yaml.snippet b/examples/tf-serving/exporter.yaml.snippet
@@ -0,0 +1,63 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: tfserve-deployment
+  labels:
+    app: tfserve-server
+spec:
+  selector:
+    matchLabels:
+      app: tfserve
+  replicas: 1
+  template:
+    metadata:
+      labels:
+        app: tfserve
+      annotations:
+        gke-gcsfuse/volumes: 'true'
+    spec:
+      nodeSelector:
+        cloud.google.com/gke-accelerator: nvidia-l4
+      containers:
+        - name: tfserve-server
+          image: 'tensorflow/serving:2.13.1-gpu'
+          command:
+            - tensorflow_model_server
+            - '--model_name=$MODEL_NAME'
+            - '--model_base_path=/data/tfserve-model-repository/$MODEL_NAME'
+            - '--rest_api_port=8000'
++           - '--monitoring_config_file=$PATH_TO_MONITORING_CONFIG'
+          ports:
+            - name: http
+              containerPort: 8000
+            - name: grpc
+              containerPort: 8500
+          resources:
+            ...
+          volumeMounts:
+            - name: gcs-fuse-csi-vol
+              mountPath: /data
+              readOnly: false
+      serviceAccountName: $K8S_SA_NAME
+      volumes:
+        - name: gcs-fuse-csi-vol
+          csi:
+            driver: gcsfuse.csi.storage.gke.io
+            readOnly: false
+            volumeAttributes:
+              bucketName: $GSBUCKET
+              mountOptions: implicit-dirs
diff --git a/examples/tf-serving/monitoring_config.txt b/examples/tf-serving/monitoring_config.txt
@@ -0,0 +1,4 @@
+prometheus_config {
+  enable: true,
+  path: "/monitoring/prometheus/metrics"
+}
diff --git a/examples/tf-serving/pod-monitoring.yaml b/examples/tf-serving/pod-monitoring.yaml
@@ -0,0 +1,30 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+apiVersion: monitoring.googleapis.com/v1
+kind: PodMonitoring
+metadata:
+  name: tfserve
+  labels:
+    app.kubernetes.io/name: tfserve
+    app.kubernetes.io/part-of: google-cloud-managed-prometheus
+spec:
+  endpoints:
+  - port: 8000
+    scheme: http
+    interval: 30s
+    path: /monitoring/prometheus/metrics
+  selector:
+    matchLabels:
+      app: tfserve
diff --git a/examples/torchserve/README.md b/examples/torchserve/README.md
@@ -0,0 +1,3 @@
+# TorchServe sample manifests
+
+Please refer to the [Google Cloud documentation](https://cloud.google.com/stackdriver/docs/managed-prometheus/exporters/torchserve) for how to use these manifests.
diff --git a/examples/torchserve/config.properties.snippet b/examples/torchserve/config.properties.snippet
@@ -0,0 +1,23 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+  inference_address=http://0.0.0.0:8080
+  management_address=http://0.0.0.0:8081
++ metrics_address=http://0.0.0.0:8082
++ metrics_mode=prometheus
+  number_of_netty_threads=32
+  job_queue_size=1000
+  install_py_dep_per_model=true
+  model_store=/home/model-server/model-store
+  load_models=all
diff --git a/examples/torchserve/exporter.yaml.snippet b/examples/torchserve/exporter.yaml.snippet
@@ -0,0 +1,72 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: t5-inference
+  labels:
+    model: t5
+    version: v1.0
+    machine: gpu
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      model: t5
+      version: v1.0
+      machine: gpu
+  template:
+    metadata:
+      labels:
+        model: t5
+        version: v1.0
+        machine: gpu
+    spec:
+      nodeSelector:
+        cloud.google.com/gke-accelerator: nvidia-l4
+      containers:
+        - name: inference
+          ...
+          args: ["torchserve", "--start", "--foreground"]
+          resources:
+            ...
+          ports:
+            - containerPort: 8080
+              name: http
+            - containerPort: 8081
+              name: management
++           - containerPort: 8082
++             name: metrics
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: t5-inference
+  labels:
+    model: t5
+    version: v1.0
+    machine: gpu
+spec:
+  ...
+  ports:
+    - port: 8080
+      name: http
+      targetPort: http
+    - port: 8081
+      name: management
+      targetPort: management
++   - port: 8082
++     name: metrics
++     targetPort: metrics
diff --git a/examples/torchserve/pod-monitoring.yaml b/examples/torchserve/pod-monitoring.yaml
@@ -0,0 +1,32 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+apiVersion: monitoring.googleapis.com/v1
+kind: PodMonitoring
+metadata:
+  name: torchserve
+  labels:
+    app.kubernetes.io/name: torchserve
+    app.kubernetes.io/part-of: google-cloud-managed-prometheus
+spec:
+  endpoints:
+  - port: 8082
+    scheme: http
+    interval: 30s
+    path: /metrics
+  selector:
+    matchLabels:
+      model: t5
+      version: v1.0
+      machine: gpu
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		# TensorFlow Serving sample manifests

		Please refer to the [Google Cloud documentation](https://cloud.google.com/stackdriver/docs/managed-prometheus/exporters/tf-serving) for how to use these manifests.
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		# TorchServe sample manifests

		Please refer to the [Google Cloud documentation](https://cloud.google.com/stackdriver/docs/managed-prometheus/exporters/torchserve) for how to use these manifests.