From 80ce52e098b201d8823acbeb85547072101c3152 Mon Sep 17 00:00:00 2001 From: Sai Kiran Polisetty Date: Mon, 1 Dec 2025 11:21:33 +0530 Subject: [PATCH 01/17] Update --- .../model_ready_check/check_model_ready.py | 48 ++++++++ .../model_ready_check/test.sh | 110 ++++++++++++++++++ qa/L0_backend_python/test.sh | 2 +- 3 files changed, 159 insertions(+), 1 deletion(-) create mode 100644 qa/L0_backend_python/model_ready_check/check_model_ready.py create mode 100644 qa/L0_backend_python/model_ready_check/test.sh diff --git a/qa/L0_backend_python/model_ready_check/check_model_ready.py b/qa/L0_backend_python/model_ready_check/check_model_ready.py new file mode 100644 index 0000000000..70dc5aac2d --- /dev/null +++ b/qa/L0_backend_python/model_ready_check/check_model_ready.py @@ -0,0 +1,48 @@ +import unittest +import tritonclient.http as httpclient +import tritonclient.grpc as grpcclient + +class ModelReadyTest(unittest.TestCase): + def setUp(self): + self.model_name = "identity_fp32" + self.url_http = "localhost:8000" + self.url_grpc = "localhost:8001" + self.client_http = httpclient.InferenceServerClient(url=self.url_http) + self.client_grpc = grpcclient.InferenceServerClient(url=self.url_grpc) + + def test_model_ready(self): + print(f"\nTesting if model '{self.model_name}' is READY ...") + + # Check HTTP + try: + is_ready = self.client_http.is_model_ready(self.model_name) + self.assertTrue(is_ready, f"[HTTP] Model {self.model_name} should be READY but is NOT") + except Exception as e: + self.fail(f"[HTTP] Unexpected error: {str(e)}") + + # Check gRPC + try: + is_ready = self.client_grpc.is_model_ready(self.model_name) + self.assertTrue(is_ready, f"[gRPC] Model {self.model_name} should be READY but is NOT") + except Exception as e: + self.fail(f"[gRPC] Unexpected error: {str(e)}") + + def test_model_not_ready(self): + print(f"\nTesting if model '{self.model_name}' is NOT READY ...") + + # Check HTTP + try: + is_ready = self.client_http.is_model_ready(self.model_name) + self.assertFalse(is_ready, f"[HTTP] Model {self.model_name} should be NOT READY but is READY.") + except Exception as e: + self.fail(f"[HTTP] Unexpected error: {str(e)}") + + # Check gRPC + try: + is_ready = self.client_grpc.is_model_ready(self.model_name) + self.assertFalse(is_ready, f"[gRPC] Model {self.model_name} should be NOT READY but is READY") + except Exception as e: + self.fail(f"[gRPC] Unexpected error: {str(e)}") + +if __name__ == "__main__": + unittest.main() diff --git a/qa/L0_backend_python/model_ready_check/test.sh b/qa/L0_backend_python/model_ready_check/test.sh new file mode 100644 index 0000000000..9e7e53ebf6 --- /dev/null +++ b/qa/L0_backend_python/model_ready_check/test.sh @@ -0,0 +1,110 @@ +#!/bin/bash +# Copyright 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of NVIDIA CORPORATION nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +CLIENT_LOG="./model_ready_check_client.log" +TEST_RESULT_FILE='test_results.txt' +source ../common.sh +source ../../common/util.sh + +SERVER_ARGS="--model-repository=${MODELDIR}/model_ready_check/models --backend-directory=${BACKEND_DIR} --log-verbose=1" +SERVER_LOG="./model_ready_check_server.log" + +RET=0 +rm -fr *.log ./models + +mkdir -p models/identity_fp32/1/ +cp ../../python_models/identity_fp32/model.py ./models/identity_fp32/1/model.py +cp ../../python_models/identity_fp32/config.pbtxt ./models/identity_fp32/config.pbtxt + +# +# Test Model Ready Check (TRITONBACKEND_ModelInstanceReady) +# Test with different signals to simulate various crash/exit scenarios: +# 11 (SIGSEGV) - Segmentation fault / crash +# 9 (SIGKILL) - Force kill +# +for SIGNAL in 11 9; do + echo "----------------------------------------------------------------" + echo "Testing Model Ready Check with Signal $SIGNAL" + echo "----------------------------------------------------------------" + + run_server + if [ "$SERVER_PID" == "0" ]; then + cat $SERVER_LOG + echo -e "\n***\n*** Failed to start $SERVER\n***" + exit 1 + fi + + set +e + + # 1. Verify model is initially ready + echo "Checking Initial Readiness..." + python3 -m unittest check_model_ready.ModelReadyTest.test_model_ready + if [ $? -ne 0 ]; then + echo -e "\n***\n*** Model Ready Check Failed (Signal $SIGNAL): Initial readiness check failed \n***" + RET=1 + kill_server + exit 1 + fi + + # 2. Find the stub process PID + stub_pid=$(pgrep -f "triton_python_backend_stub*") + + if [ -z "$stub_pid" ]; then + echo -e "\n***\n*** Model Ready Check Failed (Signal $SIGNAL): Could not find stub process \n***" + RET=1 + kill_server + else + echo "Found stub process: $stub_pid" + + # 3. Kill the stub process + echo "Killing stub with signal $SIGNAL..." + kill -$SIGNAL $stub_pid + sleep 1 + + # 4. Verify model is now NOT ready + echo "Checking Not Ready Status..." + python3 -m unittest check_model_ready.ModelReadyTest.test_model_not_ready + if [ $? -ne 0 ]; then + echo -e "\n***\n*** Model Ready Check Failed (Signal $SIGNAL): Model reported ready after kill \n***" + RET=1 + else + echo "***\n Model Ready Check Passed for Signal $SIGNAL" + fi + fi + + set -e + kill_server +done + +if [ $RET -eq 0 ]; then + echo -e "\n***\n*** Test Passed\n***" +else + echo -e "\n***\n*** Test FAILED\n***" +fi + +exit $RET + diff --git a/qa/L0_backend_python/test.sh b/qa/L0_backend_python/test.sh index 58b92590e1..7edcb41906 100755 --- a/qa/L0_backend_python/test.sh +++ b/qa/L0_backend_python/test.sh @@ -522,7 +522,7 @@ SUBTESTS="lifecycle argument_validation logging custom_metrics parameters" # [DLIS-6123] Disable examples test for Windows since it requires updates to the example clients if [[ ${TEST_WINDOWS} == 0 ]]; then # TODO: Reimplement restart on decoupled data pipeline and enable restart. - SUBTESTS+=" model_control examples request_rescheduling" + SUBTESTS+=" model_control examples request_rescheduling model_ready_check" fi for TEST in ${SUBTESTS}; do # Run each subtest in a separate virtual environment to avoid conflicts From 748feba3cdf2648fee8505de59adb91c504b86e8 Mon Sep 17 00:00:00 2001 From: Sai Kiran Polisetty Date: Tue, 2 Dec 2025 15:39:34 +0530 Subject: [PATCH 02/17] Fix pre-commit --- .../model_ready_check/check_model_ready.py | 54 ++++++++++++++++--- .../model_ready_check/test.sh | 2 +- 2 files changed, 48 insertions(+), 8 deletions(-) diff --git a/qa/L0_backend_python/model_ready_check/check_model_ready.py b/qa/L0_backend_python/model_ready_check/check_model_ready.py index 70dc5aac2d..88313979b9 100644 --- a/qa/L0_backend_python/model_ready_check/check_model_ready.py +++ b/qa/L0_backend_python/model_ready_check/check_model_ready.py @@ -1,6 +1,35 @@ +#!/bin/bash +# Copyright 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of NVIDIA CORPORATION nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + import unittest -import tritonclient.http as httpclient + import tritonclient.grpc as grpcclient +import tritonclient.http as httpclient + class ModelReadyTest(unittest.TestCase): def setUp(self): @@ -12,37 +41,48 @@ def setUp(self): def test_model_ready(self): print(f"\nTesting if model '{self.model_name}' is READY ...") - + # Check HTTP try: is_ready = self.client_http.is_model_ready(self.model_name) - self.assertTrue(is_ready, f"[HTTP] Model {self.model_name} should be READY but is NOT") + self.assertTrue( + is_ready, f"[HTTP] Model {self.model_name} should be READY but is NOT" + ) except Exception as e: self.fail(f"[HTTP] Unexpected error: {str(e)}") # Check gRPC try: is_ready = self.client_grpc.is_model_ready(self.model_name) - self.assertTrue(is_ready, f"[gRPC] Model {self.model_name} should be READY but is NOT") + self.assertTrue( + is_ready, f"[gRPC] Model {self.model_name} should be READY but is NOT" + ) except Exception as e: self.fail(f"[gRPC] Unexpected error: {str(e)}") def test_model_not_ready(self): print(f"\nTesting if model '{self.model_name}' is NOT READY ...") - + # Check HTTP try: is_ready = self.client_http.is_model_ready(self.model_name) - self.assertFalse(is_ready, f"[HTTP] Model {self.model_name} should be NOT READY but is READY.") + self.assertFalse( + is_ready, + f"[HTTP] Model {self.model_name} should be NOT READY but is READY.", + ) except Exception as e: self.fail(f"[HTTP] Unexpected error: {str(e)}") # Check gRPC try: is_ready = self.client_grpc.is_model_ready(self.model_name) - self.assertFalse(is_ready, f"[gRPC] Model {self.model_name} should be NOT READY but is READY") + self.assertFalse( + is_ready, + f"[gRPC] Model {self.model_name} should be NOT READY but is READY", + ) except Exception as e: self.fail(f"[gRPC] Unexpected error: {str(e)}") + if __name__ == "__main__": unittest.main() diff --git a/qa/L0_backend_python/model_ready_check/test.sh b/qa/L0_backend_python/model_ready_check/test.sh index 9e7e53ebf6..67c2784dce 100644 --- a/qa/L0_backend_python/model_ready_check/test.sh +++ b/qa/L0_backend_python/model_ready_check/test.sh @@ -60,7 +60,7 @@ for SIGNAL in 11 9; do set +e - # 1. Verify model is initially ready + # 1. Verify model is initially ready echo "Checking Initial Readiness..." python3 -m unittest check_model_ready.ModelReadyTest.test_model_ready if [ $? -ne 0 ]; then From 0f998e59bfe05a0685688986df52e33ed35c19d0 Mon Sep 17 00:00:00 2001 From: Sai Kiran Polisetty Date: Tue, 2 Dec 2025 21:46:46 +0530 Subject: [PATCH 03/17] Update --- qa/L0_backend_python/model_ready_check/test.sh | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/qa/L0_backend_python/model_ready_check/test.sh b/qa/L0_backend_python/model_ready_check/test.sh index 67c2784dce..540a0faa84 100644 --- a/qa/L0_backend_python/model_ready_check/test.sh +++ b/qa/L0_backend_python/model_ready_check/test.sh @@ -42,14 +42,11 @@ cp ../../python_models/identity_fp32/config.pbtxt ./models/identity_fp32/config. # # Test Model Ready Check (TRITONBACKEND_ModelInstanceReady) -# Test with different signals to simulate various crash/exit scenarios: +# Test with different signals to simulate various crash/exit scenarios # 11 (SIGSEGV) - Segmentation fault / crash # 9 (SIGKILL) - Force kill -# for SIGNAL in 11 9; do - echo "----------------------------------------------------------------" - echo "Testing Model Ready Check with Signal $SIGNAL" - echo "----------------------------------------------------------------" + echo -e "\n***\n*** Testing Model Ready Check with Signal $SIGNAL\n***" run_server if [ "$SERVER_PID" == "0" ]; then @@ -72,7 +69,7 @@ for SIGNAL in 11 9; do # 2. Find the stub process PID stub_pid=$(pgrep -f "triton_python_backend_stub*") - + if [ -z "$stub_pid" ]; then echo -e "\n***\n*** Model Ready Check Failed (Signal $SIGNAL): Could not find stub process \n***" RET=1 From 59b5be241cc9d1104d74ab390b5f139cd632c71c Mon Sep 17 00:00:00 2001 From: Sai Kiran Polisetty Date: Tue, 2 Dec 2025 21:49:50 +0530 Subject: [PATCH 04/17] Fix pre-commit --- qa/L0_backend_python/model_ready_check/check_model_ready.py | 0 qa/L0_backend_python/model_ready_check/test.sh | 0 2 files changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 qa/L0_backend_python/model_ready_check/check_model_ready.py mode change 100644 => 100755 qa/L0_backend_python/model_ready_check/test.sh diff --git a/qa/L0_backend_python/model_ready_check/check_model_ready.py b/qa/L0_backend_python/model_ready_check/check_model_ready.py old mode 100644 new mode 100755 diff --git a/qa/L0_backend_python/model_ready_check/test.sh b/qa/L0_backend_python/model_ready_check/test.sh old mode 100644 new mode 100755 From 0194ef64ebe2fb14a8089020a066974dc25e3f61 Mon Sep 17 00:00:00 2001 From: Sai Kiran Polisetty Date: Tue, 2 Dec 2025 21:57:02 +0530 Subject: [PATCH 05/17] Update qa/L0_backend_python/model_ready_check/check_model_ready.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- qa/L0_backend_python/model_ready_check/check_model_ready.py | 1 - 1 file changed, 1 deletion(-) diff --git a/qa/L0_backend_python/model_ready_check/check_model_ready.py b/qa/L0_backend_python/model_ready_check/check_model_ready.py index 88313979b9..f93dcfce73 100755 --- a/qa/L0_backend_python/model_ready_check/check_model_ready.py +++ b/qa/L0_backend_python/model_ready_check/check_model_ready.py @@ -1,4 +1,3 @@ -#!/bin/bash # Copyright 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # # Redistribution and use in source and binary forms, with or without From d4b929a77e766c6fbe799fe43ee924a9fd853bfb Mon Sep 17 00:00:00 2001 From: Sai Kiran Polisetty Date: Tue, 2 Dec 2025 21:57:44 +0530 Subject: [PATCH 06/17] Update qa/L0_backend_python/model_ready_check/check_model_ready.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- qa/L0_backend_python/model_ready_check/check_model_ready.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/qa/L0_backend_python/model_ready_check/check_model_ready.py b/qa/L0_backend_python/model_ready_check/check_model_ready.py index f93dcfce73..d73606aeb2 100755 --- a/qa/L0_backend_python/model_ready_check/check_model_ready.py +++ b/qa/L0_backend_python/model_ready_check/check_model_ready.py @@ -77,7 +77,7 @@ def test_model_not_ready(self): is_ready = self.client_grpc.is_model_ready(self.model_name) self.assertFalse( is_ready, - f"[gRPC] Model {self.model_name} should be NOT READY but is READY", + f"[gRPC] Model {self.model_name} should be NOT READY but is READY.", ) except Exception as e: self.fail(f"[gRPC] Unexpected error: {str(e)}") From b04956f6cad6f86b532698d44fdb73d1a46a5992 Mon Sep 17 00:00:00 2001 From: Sai Kiran Polisetty Date: Tue, 2 Dec 2025 21:57:52 +0530 Subject: [PATCH 07/17] Update qa/L0_backend_python/model_ready_check/check_model_ready.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- qa/L0_backend_python/model_ready_check/check_model_ready.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/qa/L0_backend_python/model_ready_check/check_model_ready.py b/qa/L0_backend_python/model_ready_check/check_model_ready.py index d73606aeb2..e27ac277ef 100755 --- a/qa/L0_backend_python/model_ready_check/check_model_ready.py +++ b/qa/L0_backend_python/model_ready_check/check_model_ready.py @@ -67,7 +67,7 @@ def test_model_not_ready(self): is_ready = self.client_http.is_model_ready(self.model_name) self.assertFalse( is_ready, - f"[HTTP] Model {self.model_name} should be NOT READY but is READY.", + f"[HTTP] Model {self.model_name} should be NOT READY but is READY", ) except Exception as e: self.fail(f"[HTTP] Unexpected error: {str(e)}") From a563c53c066538bbf631f6483a3a8cb42a1fd1bc Mon Sep 17 00:00:00 2001 From: Sai Kiran Polisetty Date: Tue, 2 Dec 2025 21:58:03 +0530 Subject: [PATCH 08/17] Update qa/L0_backend_python/model_ready_check/test.sh Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- qa/L0_backend_python/model_ready_check/test.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/qa/L0_backend_python/model_ready_check/test.sh b/qa/L0_backend_python/model_ready_check/test.sh index 540a0faa84..2bf8db7210 100755 --- a/qa/L0_backend_python/model_ready_check/test.sh +++ b/qa/L0_backend_python/model_ready_check/test.sh @@ -89,7 +89,7 @@ for SIGNAL in 11 9; do echo -e "\n***\n*** Model Ready Check Failed (Signal $SIGNAL): Model reported ready after kill \n***" RET=1 else - echo "***\n Model Ready Check Passed for Signal $SIGNAL" + echo -e "\n***\n Model Ready Check Passed for Signal $SIGNAL" fi fi From 65cd1eb6a7eec77fabd2c46ab210cf955f162e6c Mon Sep 17 00:00:00 2001 From: Sai Kiran Polisetty Date: Tue, 2 Dec 2025 21:58:36 +0530 Subject: [PATCH 09/17] Update qa/L0_backend_python/model_ready_check/test.sh Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- qa/L0_backend_python/model_ready_check/test.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/qa/L0_backend_python/model_ready_check/test.sh b/qa/L0_backend_python/model_ready_check/test.sh index 2bf8db7210..78152a5556 100755 --- a/qa/L0_backend_python/model_ready_check/test.sh +++ b/qa/L0_backend_python/model_ready_check/test.sh @@ -68,7 +68,7 @@ for SIGNAL in 11 9; do fi # 2. Find the stub process PID - stub_pid=$(pgrep -f "triton_python_backend_stub*") + stub_pid=$(pgrep -f "triton_python_backend_stub") if [ -z "$stub_pid" ]; then echo -e "\n***\n*** Model Ready Check Failed (Signal $SIGNAL): Could not find stub process \n***" From f262812de400c2d3b874e4848ea5290f0fca5db7 Mon Sep 17 00:00:00 2001 From: Sai Kiran Polisetty Date: Tue, 2 Dec 2025 22:01:57 +0530 Subject: [PATCH 10/17] Update --- qa/L0_backend_python/model_ready_check/check_model_ready.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100755 => 100644 qa/L0_backend_python/model_ready_check/check_model_ready.py diff --git a/qa/L0_backend_python/model_ready_check/check_model_ready.py b/qa/L0_backend_python/model_ready_check/check_model_ready.py old mode 100755 new mode 100644 From 7c1ac8178950a4fbd380d5d88644832bca6e40e8 Mon Sep 17 00:00:00 2001 From: Sai Kiran Polisetty Date: Fri, 5 Dec 2025 21:21:52 +0530 Subject: [PATCH 11/17] Update --- .../model_ready_check/test.sh | 24 ++++++++++++------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/qa/L0_backend_python/model_ready_check/test.sh b/qa/L0_backend_python/model_ready_check/test.sh index 78152a5556..1538d6e1df 100755 --- a/qa/L0_backend_python/model_ready_check/test.sh +++ b/qa/L0_backend_python/model_ready_check/test.sh @@ -25,20 +25,19 @@ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -CLIENT_LOG="./model_ready_check_client.log" TEST_RESULT_FILE='test_results.txt' source ../common.sh source ../../common/util.sh SERVER_ARGS="--model-repository=${MODELDIR}/model_ready_check/models --backend-directory=${BACKEND_DIR} --log-verbose=1" -SERVER_LOG="./model_ready_check_server.log" RET=0 rm -fr *.log ./models -mkdir -p models/identity_fp32/1/ -cp ../../python_models/identity_fp32/model.py ./models/identity_fp32/1/model.py -cp ../../python_models/identity_fp32/config.pbtxt ./models/identity_fp32/config.pbtxt +MODEL_NAME="identity_fp32" +mkdir -p models/$MODEL_NAME/1/ +cp ../../python_models/$MODEL_NAME/model.py ./models/$MODEL_NAME/1/model.py +cp ../../python_models/$MODEL_NAME/config.pbtxt ./models/$MODEL_NAME/config.pbtxt # # Test Model Ready Check (TRITONBACKEND_ModelInstanceReady) @@ -47,6 +46,8 @@ cp ../../python_models/identity_fp32/config.pbtxt ./models/identity_fp32/config. # 9 (SIGKILL) - Force kill for SIGNAL in 11 9; do echo -e "\n***\n*** Testing Model Ready Check with Signal $SIGNAL\n***" + SERVER_LOG="./model_ready_check_signal_${SIGNAL}_server.log" + CLIENT_LOG="./model_ready_check_${SIGNAL}_client.log" run_server if [ "$SERVER_PID" == "0" ]; then @@ -59,7 +60,7 @@ for SIGNAL in 11 9; do # 1. Verify model is initially ready echo "Checking Initial Readiness..." - python3 -m unittest check_model_ready.ModelReadyTest.test_model_ready + python3 -m unittest check_model_ready.ModelReadyTest.test_model_ready >> ${CLIENT_LOG} 2>&1 if [ $? -ne 0 ]; then echo -e "\n***\n*** Model Ready Check Failed (Signal $SIGNAL): Initial readiness check failed \n***" RET=1 @@ -84,12 +85,19 @@ for SIGNAL in 11 9; do # 4. Verify model is now NOT ready echo "Checking Not Ready Status..." - python3 -m unittest check_model_ready.ModelReadyTest.test_model_not_ready + python3 -m unittest check_model_ready.ModelReadyTest.test_model_not_ready >> ${CLIENT_LOG} 2>&1 if [ $? -ne 0 ]; then echo -e "\n***\n*** Model Ready Check Failed (Signal $SIGNAL): Model reported ready after kill \n***" RET=1 else - echo -e "\n***\n Model Ready Check Passed for Signal $SIGNAL" + # 5. Verify correct error message in logs + if grep -q "Stub process '${MODEL_NAME}_0_0' is not alive" $SERVER_LOG; then + echo -e "\n***\n Model Ready Check Passed for Signal $SIGNAL \n***" + else + echo -e "\n***\n*** Model Ready Check Failed (Signal $SIGNAL): Expected error message not found in the server logs \n***" + cat $SERVER_LOG + RET=1 + fi fi fi From 25072c1dea086143fc4e6a9cb8e02836ddff30db Mon Sep 17 00:00:00 2001 From: Sai Kiran Polisetty Date: Fri, 5 Dec 2025 22:05:13 +0530 Subject: [PATCH 12/17] Update test --- .../model_ready_check/test.sh | 26 ++++++++++--------- 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/qa/L0_backend_python/model_ready_check/test.sh b/qa/L0_backend_python/model_ready_check/test.sh index 1538d6e1df..3559b849f3 100755 --- a/qa/L0_backend_python/model_ready_check/test.sh +++ b/qa/L0_backend_python/model_ready_check/test.sh @@ -45,7 +45,7 @@ cp ../../python_models/$MODEL_NAME/config.pbtxt ./models/$MODEL_NAME/config.pbtx # 11 (SIGSEGV) - Segmentation fault / crash # 9 (SIGKILL) - Force kill for SIGNAL in 11 9; do - echo -e "\n***\n*** Testing Model Ready Check with Signal $SIGNAL\n***" + echo -e "\n***\n*** Testing model_ready_check with Signal $SIGNAL\n***" SERVER_LOG="./model_ready_check_signal_${SIGNAL}_server.log" CLIENT_LOG="./model_ready_check_${SIGNAL}_client.log" @@ -58,43 +58,45 @@ for SIGNAL in 11 9; do set +e - # 1. Verify model is initially ready + # Verify model is initially ready echo "Checking Initial Readiness..." python3 -m unittest check_model_ready.ModelReadyTest.test_model_ready >> ${CLIENT_LOG} 2>&1 if [ $? -ne 0 ]; then - echo -e "\n***\n*** Model Ready Check Failed (Signal $SIGNAL): Initial readiness check failed \n***" + echo -e "\n***\n*** Test model_ready_check Failed (Signal $SIGNAL): Initial readiness check failed \n***" RET=1 kill_server exit 1 fi - # 2. Find the stub process PID + # Find the stub process PID stub_pid=$(pgrep -f "triton_python_backend_stub") if [ -z "$stub_pid" ]; then - echo -e "\n***\n*** Model Ready Check Failed (Signal $SIGNAL): Could not find stub process \n***" + echo -e "\n***\n*** Test model_ready_check Failed (Signal $SIGNAL): Could not find stub process \n***" RET=1 kill_server else echo "Found stub process: $stub_pid" - # 3. Kill the stub process + # Kill the stub process echo "Killing stub with signal $SIGNAL..." kill -$SIGNAL $stub_pid sleep 1 - # 4. Verify model is now NOT ready + # Verify model is now NOT ready echo "Checking Not Ready Status..." python3 -m unittest check_model_ready.ModelReadyTest.test_model_not_ready >> ${CLIENT_LOG} 2>&1 if [ $? -ne 0 ]; then - echo -e "\n***\n*** Model Ready Check Failed (Signal $SIGNAL): Model reported ready after kill \n***" + echo -e "\n***\n*** Test model_ready_check Failed (Signal $SIGNAL): Model reported ready after kill \n***" RET=1 else - # 5. Verify correct error message in logs - if grep -q "Stub process '${MODEL_NAME}_0_0' is not alive" $SERVER_LOG; then - echo -e "\n***\n Model Ready Check Passed for Signal $SIGNAL \n***" + # Verify correct error message in logs + # Expect 2 occurrences: HTTP and gRPC checks + error_count=$(grep -c "Model '${MODEL_NAME}' version 1 is not ready: Stub process '${MODEL_NAME}_0_0' is not healthy." $SERVER_LOG) + if [ "$error_count" -eq 2 ]; then + echo -e "\n***\n Test model_ready_check Passed for Signal $SIGNAL \n***" else - echo -e "\n***\n*** Model Ready Check Failed (Signal $SIGNAL): Expected error message not found in the server logs \n***" + echo -e "\n***\n*** Test model_ready_check Failed (Signal $SIGNAL): Expected 2 error messages, found $error_count \n***" cat $SERVER_LOG RET=1 fi From 0b29cb433aa0124f146de2e67a5fe80d2f3e8fe2 Mon Sep 17 00:00:00 2001 From: Sai Kiran Polisetty Date: Mon, 8 Dec 2025 14:00:59 +0530 Subject: [PATCH 13/17] Update test name --- .../check_model_ready.py | 0 .../test.sh | 18 +++++++++--------- qa/L0_backend_python/test.sh | 2 +- 3 files changed, 10 insertions(+), 10 deletions(-) rename qa/L0_backend_python/{model_ready_check => model_readiness}/check_model_ready.py (100%) rename qa/L0_backend_python/{model_ready_check => model_readiness}/test.sh (80%) mode change 100755 => 100644 diff --git a/qa/L0_backend_python/model_ready_check/check_model_ready.py b/qa/L0_backend_python/model_readiness/check_model_ready.py similarity index 100% rename from qa/L0_backend_python/model_ready_check/check_model_ready.py rename to qa/L0_backend_python/model_readiness/check_model_ready.py diff --git a/qa/L0_backend_python/model_ready_check/test.sh b/qa/L0_backend_python/model_readiness/test.sh old mode 100755 new mode 100644 similarity index 80% rename from qa/L0_backend_python/model_ready_check/test.sh rename to qa/L0_backend_python/model_readiness/test.sh index 3559b849f3..e3ba5dda3d --- a/qa/L0_backend_python/model_ready_check/test.sh +++ b/qa/L0_backend_python/model_readiness/test.sh @@ -29,7 +29,7 @@ TEST_RESULT_FILE='test_results.txt' source ../common.sh source ../../common/util.sh -SERVER_ARGS="--model-repository=${MODELDIR}/model_ready_check/models --backend-directory=${BACKEND_DIR} --log-verbose=1" +SERVER_ARGS="--model-repository=${MODELDIR}/model_readiness/models --backend-directory=${BACKEND_DIR} --log-verbose=1" RET=0 rm -fr *.log ./models @@ -45,9 +45,9 @@ cp ../../python_models/$MODEL_NAME/config.pbtxt ./models/$MODEL_NAME/config.pbtx # 11 (SIGSEGV) - Segmentation fault / crash # 9 (SIGKILL) - Force kill for SIGNAL in 11 9; do - echo -e "\n***\n*** Testing model_ready_check with Signal $SIGNAL\n***" - SERVER_LOG="./model_ready_check_signal_${SIGNAL}_server.log" - CLIENT_LOG="./model_ready_check_${SIGNAL}_client.log" + echo -e "\n***\n*** Testing model_readiness with Signal $SIGNAL\n***" + SERVER_LOG="./model_readiness_signal_${SIGNAL}_server.log" + CLIENT_LOG="./model_readiness_${SIGNAL}_client.log" run_server if [ "$SERVER_PID" == "0" ]; then @@ -62,7 +62,7 @@ for SIGNAL in 11 9; do echo "Checking Initial Readiness..." python3 -m unittest check_model_ready.ModelReadyTest.test_model_ready >> ${CLIENT_LOG} 2>&1 if [ $? -ne 0 ]; then - echo -e "\n***\n*** Test model_ready_check Failed (Signal $SIGNAL): Initial readiness check failed \n***" + echo -e "\n***\n*** Test model_readiness Failed (Signal $SIGNAL): Initial readiness check failed \n***" RET=1 kill_server exit 1 @@ -72,7 +72,7 @@ for SIGNAL in 11 9; do stub_pid=$(pgrep -f "triton_python_backend_stub") if [ -z "$stub_pid" ]; then - echo -e "\n***\n*** Test model_ready_check Failed (Signal $SIGNAL): Could not find stub process \n***" + echo -e "\n***\n*** Test model_readiness Failed (Signal $SIGNAL): Could not find stub process \n***" RET=1 kill_server else @@ -87,16 +87,16 @@ for SIGNAL in 11 9; do echo "Checking Not Ready Status..." python3 -m unittest check_model_ready.ModelReadyTest.test_model_not_ready >> ${CLIENT_LOG} 2>&1 if [ $? -ne 0 ]; then - echo -e "\n***\n*** Test model_ready_check Failed (Signal $SIGNAL): Model reported ready after kill \n***" + echo -e "\n***\n*** Test model_readiness Failed (Signal $SIGNAL): Model reported ready after kill \n***" RET=1 else # Verify correct error message in logs # Expect 2 occurrences: HTTP and gRPC checks error_count=$(grep -c "Model '${MODEL_NAME}' version 1 is not ready: Stub process '${MODEL_NAME}_0_0' is not healthy." $SERVER_LOG) if [ "$error_count" -eq 2 ]; then - echo -e "\n***\n Test model_ready_check Passed for Signal $SIGNAL \n***" + echo -e "\n***\n Test model_readiness Passed for Signal $SIGNAL \n***" else - echo -e "\n***\n*** Test model_ready_check Failed (Signal $SIGNAL): Expected 2 error messages, found $error_count \n***" + echo -e "\n***\n*** Test model_readiness Failed (Signal $SIGNAL): Expected 2 error messages, found $error_count \n***" cat $SERVER_LOG RET=1 fi diff --git a/qa/L0_backend_python/test.sh b/qa/L0_backend_python/test.sh index 7edcb41906..73e825ebf2 100755 --- a/qa/L0_backend_python/test.sh +++ b/qa/L0_backend_python/test.sh @@ -522,7 +522,7 @@ SUBTESTS="lifecycle argument_validation logging custom_metrics parameters" # [DLIS-6123] Disable examples test for Windows since it requires updates to the example clients if [[ ${TEST_WINDOWS} == 0 ]]; then # TODO: Reimplement restart on decoupled data pipeline and enable restart. - SUBTESTS+=" model_control examples request_rescheduling model_ready_check" + SUBTESTS+=" model_control examples request_rescheduling model_readiness" fi for TEST in ${SUBTESTS}; do # Run each subtest in a separate virtual environment to avoid conflicts From e525825fcd0f4e396be50ba751b7265cd7981197 Mon Sep 17 00:00:00 2001 From: Sai Kiran Polisetty Date: Mon, 8 Dec 2025 14:13:42 +0530 Subject: [PATCH 14/17] Update test name --- qa/L0_backend_python/model_readiness/test.sh | 4 ++-- .../{check_model_ready.py => test_model_readiness.py} | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) mode change 100644 => 100755 qa/L0_backend_python/model_readiness/test.sh rename qa/L0_backend_python/model_readiness/{check_model_ready.py => test_model_readiness.py} (96%) mode change 100644 => 100755 diff --git a/qa/L0_backend_python/model_readiness/test.sh b/qa/L0_backend_python/model_readiness/test.sh old mode 100644 new mode 100755 index e3ba5dda3d..979d43e24b --- a/qa/L0_backend_python/model_readiness/test.sh +++ b/qa/L0_backend_python/model_readiness/test.sh @@ -60,7 +60,7 @@ for SIGNAL in 11 9; do # Verify model is initially ready echo "Checking Initial Readiness..." - python3 -m unittest check_model_ready.ModelReadyTest.test_model_ready >> ${CLIENT_LOG} 2>&1 + python3 -m unittest test_model_readiness.ModelReadinessTest.test_is_model_ready >> ${CLIENT_LOG} 2>&1 if [ $? -ne 0 ]; then echo -e "\n***\n*** Test model_readiness Failed (Signal $SIGNAL): Initial readiness check failed \n***" RET=1 @@ -85,7 +85,7 @@ for SIGNAL in 11 9; do # Verify model is now NOT ready echo "Checking Not Ready Status..." - python3 -m unittest check_model_ready.ModelReadyTest.test_model_not_ready >> ${CLIENT_LOG} 2>&1 + python3 -m unittest test_model_readiness.ModelReadinessTest.test_is_model_not_ready >> ${CLIENT_LOG} 2>&1 if [ $? -ne 0 ]; then echo -e "\n***\n*** Test model_readiness Failed (Signal $SIGNAL): Model reported ready after kill \n***" RET=1 diff --git a/qa/L0_backend_python/model_readiness/check_model_ready.py b/qa/L0_backend_python/model_readiness/test_model_readiness.py old mode 100644 new mode 100755 similarity index 96% rename from qa/L0_backend_python/model_readiness/check_model_ready.py rename to qa/L0_backend_python/model_readiness/test_model_readiness.py index e27ac277ef..5463f969b2 --- a/qa/L0_backend_python/model_readiness/check_model_ready.py +++ b/qa/L0_backend_python/model_readiness/test_model_readiness.py @@ -30,7 +30,7 @@ import tritonclient.http as httpclient -class ModelReadyTest(unittest.TestCase): +class ModelReadinessTest(unittest.TestCase): def setUp(self): self.model_name = "identity_fp32" self.url_http = "localhost:8000" @@ -38,7 +38,7 @@ def setUp(self): self.client_http = httpclient.InferenceServerClient(url=self.url_http) self.client_grpc = grpcclient.InferenceServerClient(url=self.url_grpc) - def test_model_ready(self): + def test_is_model_ready(self): print(f"\nTesting if model '{self.model_name}' is READY ...") # Check HTTP @@ -59,7 +59,7 @@ def test_model_ready(self): except Exception as e: self.fail(f"[gRPC] Unexpected error: {str(e)}") - def test_model_not_ready(self): + def test_is_model_not_ready(self): print(f"\nTesting if model '{self.model_name}' is NOT READY ...") # Check HTTP From a85a459a60db04833f4637cbd98d11f4cd1c2605 Mon Sep 17 00:00:00 2001 From: Sai Kiran Polisetty Date: Mon, 8 Dec 2025 14:16:15 +0530 Subject: [PATCH 15/17] Fix pre-commit --- qa/L0_backend_python/model_readiness/test_model_readiness.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100755 => 100644 qa/L0_backend_python/model_readiness/test_model_readiness.py diff --git a/qa/L0_backend_python/model_readiness/test_model_readiness.py b/qa/L0_backend_python/model_readiness/test_model_readiness.py old mode 100755 new mode 100644 From 839a813dbac84620ab43b0aac3f598b560de30fc Mon Sep 17 00:00:00 2001 From: Sai Kiran Polisetty Date: Mon, 8 Dec 2025 14:17:55 +0530 Subject: [PATCH 16/17] Update --- qa/L0_backend_python/model_readiness/test.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/qa/L0_backend_python/model_readiness/test.sh b/qa/L0_backend_python/model_readiness/test.sh index 979d43e24b..1cf9a2e5ec 100755 --- a/qa/L0_backend_python/model_readiness/test.sh +++ b/qa/L0_backend_python/model_readiness/test.sh @@ -40,7 +40,7 @@ cp ../../python_models/$MODEL_NAME/model.py ./models/$MODEL_NAME/1/model.py cp ../../python_models/$MODEL_NAME/config.pbtxt ./models/$MODEL_NAME/config.pbtxt # -# Test Model Ready Check (TRITONBACKEND_ModelInstanceReady) +# Test Model Readiness (TRITONBACKEND_ModelInstanceReady) # Test with different signals to simulate various crash/exit scenarios # 11 (SIGSEGV) - Segmentation fault / crash # 9 (SIGKILL) - Force kill From 56e4c8b204a7fb9b71198267e06ca2303f32ebac Mon Sep 17 00:00:00 2001 From: Sai Kiran Polisetty Date: Mon, 8 Dec 2025 21:49:46 +0530 Subject: [PATCH 17/17] Update --- qa/L0_backend_python/model_readiness/test.sh | 4 ++-- .../model_readiness/test_model_readiness.py | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/qa/L0_backend_python/model_readiness/test.sh b/qa/L0_backend_python/model_readiness/test.sh index 1cf9a2e5ec..cc87aeacd8 100755 --- a/qa/L0_backend_python/model_readiness/test.sh +++ b/qa/L0_backend_python/model_readiness/test.sh @@ -60,7 +60,7 @@ for SIGNAL in 11 9; do # Verify model is initially ready echo "Checking Initial Readiness..." - python3 -m unittest test_model_readiness.ModelReadinessTest.test_is_model_ready >> ${CLIENT_LOG} 2>&1 + python3 -m unittest test_model_readiness.TestModelReadiness.test_model_ready >> ${CLIENT_LOG} 2>&1 if [ $? -ne 0 ]; then echo -e "\n***\n*** Test model_readiness Failed (Signal $SIGNAL): Initial readiness check failed \n***" RET=1 @@ -85,7 +85,7 @@ for SIGNAL in 11 9; do # Verify model is now NOT ready echo "Checking Not Ready Status..." - python3 -m unittest test_model_readiness.ModelReadinessTest.test_is_model_not_ready >> ${CLIENT_LOG} 2>&1 + python3 -m unittest test_model_readiness.TestModelReadiness.test_model_not_ready >> ${CLIENT_LOG} 2>&1 if [ $? -ne 0 ]; then echo -e "\n***\n*** Test model_readiness Failed (Signal $SIGNAL): Model reported ready after kill \n***" RET=1 diff --git a/qa/L0_backend_python/model_readiness/test_model_readiness.py b/qa/L0_backend_python/model_readiness/test_model_readiness.py index 5463f969b2..65d1c81d8a 100644 --- a/qa/L0_backend_python/model_readiness/test_model_readiness.py +++ b/qa/L0_backend_python/model_readiness/test_model_readiness.py @@ -30,7 +30,7 @@ import tritonclient.http as httpclient -class ModelReadinessTest(unittest.TestCase): +class TestModelReadiness(unittest.TestCase): def setUp(self): self.model_name = "identity_fp32" self.url_http = "localhost:8000" @@ -38,7 +38,7 @@ def setUp(self): self.client_http = httpclient.InferenceServerClient(url=self.url_http) self.client_grpc = grpcclient.InferenceServerClient(url=self.url_grpc) - def test_is_model_ready(self): + def test_model_ready(self): print(f"\nTesting if model '{self.model_name}' is READY ...") # Check HTTP @@ -59,7 +59,7 @@ def test_is_model_ready(self): except Exception as e: self.fail(f"[gRPC] Unexpected error: {str(e)}") - def test_is_model_not_ready(self): + def test_model_not_ready(self): print(f"\nTesting if model '{self.model_name}' is NOT READY ...") # Check HTTP