From b7bcacda9c1daa479a14ad8912f96240555d8137 Mon Sep 17 00:00:00 2001 From: Sai Kiran Polisetty Date: Mon, 1 Dec 2025 11:21:58 +0530 Subject: [PATCH 1/5] Update --- src/python_be.cc | 27 +++++++++++++++++++++++++++ src/stub_launcher.cc | 27 +++++++++++++++++++++++---- 2 files changed, 50 insertions(+), 4 deletions(-) diff --git a/src/python_be.cc b/src/python_be.cc index c152e035..bcead239 100644 --- a/src/python_be.cc +++ b/src/python_be.cc @@ -2416,6 +2416,33 @@ TRITONBACKEND_ModelInstanceExecute( return nullptr; } +TRITONBACKEND_ISPEC TRITONSERVER_Error* +TRITONBACKEND_ModelInstanceReady(TRITONBACKEND_ModelInstance* instance) +{ + void* vstate; + RETURN_IF_ERROR(TRITONBACKEND_ModelInstanceState(instance, &vstate)); + ModelInstanceState* instance_state = + reinterpret_cast(vstate); + + if (!instance_state->Stub()->StubActive()) { + return TRITONSERVER_ErrorNew( + TRITONSERVER_ERROR_INTERNAL, + (std::string("Stub process '") + instance_state->Name() + + "' is not alive") + .c_str()); + } + + if (!instance_state->IsStubProcessAlive()) { + return TRITONSERVER_ErrorNew( + TRITONSERVER_ERROR_INTERNAL, + (std::string("Stub process '") + instance_state->Name() + + "' is not healthy (unresponsive).") + .c_str()); + } + + return nullptr; +} + TRITONBACKEND_ISPEC TRITONSERVER_Error* TRITONBACKEND_ModelInstanceFinalize(TRITONBACKEND_ModelInstance* instance) { diff --git a/src/stub_launcher.cc b/src/stub_launcher.cc index 32f5d1bd..f7b26b28 100644 --- a/src/stub_launcher.cc +++ b/src/stub_launcher.cc @@ -743,7 +743,24 @@ StubLauncher::StubActive() GetExitCodeProcess(stub_pid_.hProcess, &ec); return (ec == STILL_ACTIVE); #else - return (stub_pid_ != 0); + if (stub_pid_ == 0) { + return false; + } + + int status; + pid_t return_pid = waitpid(stub_pid_, &status, WNOHANG); + if (return_pid == -1) { + // If waitpid fails, it likely means the process no longer exists (ECHILD) + stub_pid_ = 0; + return false; + } else if (return_pid == stub_pid_) { + // Process has exited and has been reaped + stub_pid_ = 0; + return false; + } + + // return_pid == 0 means the process is still running + return true; #endif } @@ -824,9 +841,11 @@ StubLauncher::KillStubProcess() CloseHandle(stub_pid_.hProcess); CloseHandle(stub_pid_.hThread); #else - kill(stub_pid_, SIGKILL); - WaitForStubProcess(); - stub_pid_ = 0; + if (stub_pid_ != 0) { + kill(stub_pid_, SIGKILL); + WaitForStubProcess(); + stub_pid_ = 0; + } #endif } From 353540b0aca06c163c354be5fc1b969ab9ca5188 Mon Sep 17 00:00:00 2001 From: Sai Kiran Polisetty Date: Mon, 1 Dec 2025 14:31:32 +0530 Subject: [PATCH 2/5] Fix pre-commit errors --- src/python_be.cc | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/python_be.cc b/src/python_be.cc index bcead239..9706fcae 100644 --- a/src/python_be.cc +++ b/src/python_be.cc @@ -2426,10 +2426,9 @@ TRITONBACKEND_ModelInstanceReady(TRITONBACKEND_ModelInstance* instance) if (!instance_state->Stub()->StubActive()) { return TRITONSERVER_ErrorNew( - TRITONSERVER_ERROR_INTERNAL, - (std::string("Stub process '") + instance_state->Name() + - "' is not alive") - .c_str()); + TRITONSERVER_ERROR_INTERNAL, (std::string("Stub process '") + + instance_state->Name() + "' is not alive") + .c_str()); } if (!instance_state->IsStubProcessAlive()) { From 30a0f99946115525d69c13180bfa61664f230411 Mon Sep 17 00:00:00 2001 From: Sai Kiran Polisetty Date: Fri, 5 Dec 2025 17:31:17 +0530 Subject: [PATCH 3/5] Update --- src/python_be.cc | 2 ++ src/stub_launcher.cc | 9 +++++++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/src/python_be.cc b/src/python_be.cc index 9706fcae..93da8aae 100644 --- a/src/python_be.cc +++ b/src/python_be.cc @@ -2424,6 +2424,7 @@ TRITONBACKEND_ModelInstanceReady(TRITONBACKEND_ModelInstance* instance) ModelInstanceState* instance_state = reinterpret_cast(vstate); + // Check if the stub process is running if (!instance_state->Stub()->StubActive()) { return TRITONSERVER_ErrorNew( TRITONSERVER_ERROR_INTERNAL, (std::string("Stub process '") + @@ -2431,6 +2432,7 @@ TRITONBACKEND_ModelInstanceReady(TRITONBACKEND_ModelInstance* instance) .c_str()); } + // Check if the stub process is responsive if (!instance_state->IsStubProcessAlive()) { return TRITONSERVER_ErrorNew( TRITONSERVER_ERROR_INTERNAL, diff --git a/src/stub_launcher.cc b/src/stub_launcher.cc index f7b26b28..cbe039a4 100644 --- a/src/stub_launcher.cc +++ b/src/stub_launcher.cc @@ -751,11 +751,16 @@ StubLauncher::StubActive() pid_t return_pid = waitpid(stub_pid_, &status, WNOHANG); if (return_pid == -1) { // If waitpid fails, it likely means the process no longer exists (ECHILD) - stub_pid_ = 0; + if (errno != ECHILD) { + LOG_MESSAGE( + TRITONSERVER_LOG_VERBOSE, + (std::string("waitpid failed for stub process ") + + std::to_string(stub_pid_) + ": " + strerror(errno)) + .c_str()); + } return false; } else if (return_pid == stub_pid_) { // Process has exited and has been reaped - stub_pid_ = 0; return false; } From dd066f365d645abc870c4f2382c23fe567fd7e85 Mon Sep 17 00:00:00 2001 From: Sai Kiran Polisetty Date: Fri, 5 Dec 2025 19:31:13 +0530 Subject: [PATCH 4/5] Update --- src/python_be.cc | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/python_be.cc b/src/python_be.cc index 93da8aae..ae14b052 100644 --- a/src/python_be.cc +++ b/src/python_be.cc @@ -2427,9 +2427,10 @@ TRITONBACKEND_ModelInstanceReady(TRITONBACKEND_ModelInstance* instance) // Check if the stub process is running if (!instance_state->Stub()->StubActive()) { return TRITONSERVER_ErrorNew( - TRITONSERVER_ERROR_INTERNAL, (std::string("Stub process '") + - instance_state->Name() + "' is not alive") - .c_str()); + TRITONSERVER_ERROR_INTERNAL, + (std::string("Stub process '") + instance_state->Name() + + "' is not healthy.") + .c_str()); } // Check if the stub process is responsive @@ -2437,7 +2438,7 @@ TRITONBACKEND_ModelInstanceReady(TRITONBACKEND_ModelInstance* instance) return TRITONSERVER_ErrorNew( TRITONSERVER_ERROR_INTERNAL, (std::string("Stub process '") + instance_state->Name() + - "' is not healthy (unresponsive).") + "' is not healthy.") .c_str()); } From b0e23d2a3402e17f1b80d6c0c9ea7eea747390d4 Mon Sep 17 00:00:00 2001 From: Sai Kiran Polisetty Date: Sun, 7 Dec 2025 17:47:31 +0530 Subject: [PATCH 5/5] Test removing IsStubProcessAlive() check to fix CI --- src/python_be.cc | 9 --------- 1 file changed, 9 deletions(-) diff --git a/src/python_be.cc b/src/python_be.cc index ae14b052..94e3509d 100644 --- a/src/python_be.cc +++ b/src/python_be.cc @@ -2433,15 +2433,6 @@ TRITONBACKEND_ModelInstanceReady(TRITONBACKEND_ModelInstance* instance) .c_str()); } - // Check if the stub process is responsive - if (!instance_state->IsStubProcessAlive()) { - return TRITONSERVER_ErrorNew( - TRITONSERVER_ERROR_INTERNAL, - (std::string("Stub process '") + instance_state->Name() + - "' is not healthy.") - .c_str()); - } - return nullptr; }