From 67bc8767ca4f4acf174c964ea98f300bb71a5707 Mon Sep 17 00:00:00 2001 From: Jaeic Lee <2484055+jaeiclee@users.noreply.github.com> Date: Wed, 18 Mar 2026 04:20:51 +0900 Subject: [PATCH] Add auto-unload timeout for idle models (env var only) Implements automatic unloading of models after a configurable idle period via "LEMONADE_GLOBAL_AUTO_UNLOAD_TIMER" env variable. - Uses condition-variable-based timers for non-blocking cancellation, preventing deadlocks between timer threads and the model loading mutex. - Timers are cancelled on all eviction paths (LRU, NPU exclusivity, nuclear option) to prevent orphaned timers. - Added the new env variable in the documentation. --- docs/server/lemonade-server-cli.md | 1 + src/cpp/include/lemon/cli_parser.h | 3 + src/cpp/include/lemon/router.h | 46 +++- src/cpp/include/lemon/server.h | 3 +- src/cpp/include/lemon/wrapped_server.h | 10 + src/cpp/server/cli_parser.cpp | 13 + src/cpp/server/main.cpp | 2 +- src/cpp/server/router.cpp | 316 +++++++++++++++++++++++-- src/cpp/server/server.cpp | 17 +- 9 files changed, 382 insertions(+), 29 deletions(-) diff --git a/docs/server/lemonade-server-cli.md b/docs/server/lemonade-server-cli.md index d9c314533..3af574583 100644 --- a/docs/server/lemonade-server-cli.md +++ b/docs/server/lemonade-server-cli.md @@ -93,6 +93,7 @@ These settings can also be provided via environment variables that Lemonade Serv | `LEMONADE_DISABLE_MODEL_FILTERING` | Set to `1` to disable hardware-based model filtering (e.g., RAM amount, NPU availability) and show all models regardless of system capabilities | | `LEMONADE_ENABLE_DGPU_GTT` | Set to `1` to include GTT for hardware-based model filtering | | `LEMONADE_GLOBAL_TIMEOUT` | Global default timeout for HTTP requests, inference, and readiness checks in seconds | +| `LEMONADE_GLOBAL_AUTO_UNLOAD_TIMER` | Automatically unload models after this many seconds of idle time. Set to a positive integer to enable, or `0` to disable. Default: `0` (disabled) | #### Custom Backend Binaries diff --git a/src/cpp/include/lemon/cli_parser.h b/src/cpp/include/lemon/cli_parser.h index 882492cfb..eef6a6a61 100644 --- a/src/cpp/include/lemon/cli_parser.h +++ b/src/cpp/include/lemon/cli_parser.h @@ -20,6 +20,9 @@ struct ServerConfig { // Multi-model support: Max loaded models per type slot int max_loaded_models = 1; + + // Auto-unload support: Global default timer timeout in seconds after inactivity (0 = disabled) + long global_auto_unload_timer = 0; }; struct TrayConfig { diff --git a/src/cpp/include/lemon/router.h b/src/cpp/include/lemon/router.h index 414b3dbd7..4c6d3b2ca 100644 --- a/src/cpp/include/lemon/router.h +++ b/src/cpp/include/lemon/router.h @@ -5,6 +5,11 @@ #include #include #include +#include +#include +#include +#include +#include #include #include #include "wrapped_server.h" @@ -21,7 +26,8 @@ class Router { const std::string& log_level, ModelManager* model_manager, int max_loaded_models, - BackendManager* backend_manager); + BackendManager* backend_manager, + long auto_unload_timeout); // Auto-unload timeout in seconds (0 = disabled) ~Router(); @@ -88,7 +94,33 @@ class Router { // Update prompt_tokens field from usage void update_prompt_tokens(int prompt_tokens); + // Auto-unload support: Get/set timeout at runtime + long get_auto_unload_timeout() const { return auto_unload_timeout_sec_; } + void set_auto_unload_timeout(long timeout_seconds); + + // Auto-unload support: Reset timer for a specific model (called on each request) + void reset_auto_unload_timer(const std::string& model_name); + private: + // Auto-unload timer entry structure + struct ModelTimerEntry { + // Shared state between Router and timer thread, kept alive by shared_ptr + // so the thread can safely detect cancellation even after entry is erased from map + struct State { + std::mutex mtx; + std::condition_variable cv; + bool cancelled{false}; // Set by cancel(), wakes the timer thread + bool shutdown{false}; // Set during unload to prevent rescheduling + std::chrono::seconds timeout_duration{0}; + }; + std::shared_ptr state; + std::thread timer_thread; + + void start_timer(std::chrono::seconds duration, std::function on_timeout); + void cancel(); // Non-blocking: marks cancelled + notifies cv, no thread join + bool is_active() const { return state != nullptr && !state->cancelled; } + }; + // Multi-model support: Manage multiple WrappedServers std::vector> loaded_servers_; @@ -106,6 +138,12 @@ class Router { bool is_loading_ = false; // True when a load operation is in progress std::condition_variable load_cv_; // Signals when load completes + // Auto-unload support + long auto_unload_timeout_sec_ = 0; // Global timeout in seconds (0 = disabled) + mutable std::mutex timers_mutex_; // Protects model_timers_ + std::unordered_map> model_timers_; + std::atomic unload_in_progress_{false}; // Prevents new timer creation during unload + // Helper methods for multi-model management WrappedServer* find_server_by_model_name(const std::string& model_name) const; WrappedServer* get_most_recent_server() const; @@ -116,10 +154,14 @@ class Router { WrappedServer* find_npu_server_by_recipe(const std::string& recipe) const; WrappedServer* find_flm_server_by_type(ModelType type) const; void evict_all_npu_servers(); - void evict_server(WrappedServer* server); + bool evict_server(WrappedServer* server); // Returns true if evicted, false if skipped (still busy) void evict_all_servers(); std::unique_ptr create_backend_server(const ModelInfo& model_info); + // Auto-unload helper methods + void start_auto_unload_timer(const std::string& model_name, long timeout_seconds); + void cancel_auto_unload_timer(const std::string& model_name); + // Generic inference wrapper that handles locking and busy state template auto execute_inference(const json& request, Func&& inference_func) -> decltype(inference_func(nullptr)); diff --git a/src/cpp/include/lemon/server.h b/src/cpp/include/lemon/server.h index b16675d39..5899a8221 100644 --- a/src/cpp/include/lemon/server.h +++ b/src/cpp/include/lemon/server.h @@ -30,7 +30,8 @@ class Server { int max_loaded_models, const std::string& extra_models_dir, bool no_broadcast, - long http_timeout); + long http_timeout, + long auto_unload_timeout); ~Server(); diff --git a/src/cpp/include/lemon/wrapped_server.h b/src/cpp/include/lemon/wrapped_server.h index 75bd1729c..fa414024a 100644 --- a/src/cpp/include/lemon/wrapped_server.h +++ b/src/cpp/include/lemon/wrapped_server.h @@ -96,6 +96,16 @@ class WrappedServer : public ICompletionServer { } } + // Wait with timeout - returns true if not busy, false if timeout expired + bool wait_until_not_busy_with_timeout(std::chrono::seconds timeout) const { + std::unique_lock lock(busy_mutex_); + if (!is_busy_) { + return true; + } + bool result = busy_cv_.wait_for(lock, timeout, [this] { return !is_busy_; }); + return result; + } + // Multi-model support: Model metadata void set_model_metadata(const std::string& model_name, const std::string& checkpoint, ModelType type, DeviceType device, const RecipeOptions& recipe_options) { diff --git a/src/cpp/server/cli_parser.cpp b/src/cpp/server/cli_parser.cpp index 61e360055..eb55934b9 100644 --- a/src/cpp/server/cli_parser.cpp +++ b/src/cpp/server/cli_parser.cpp @@ -77,6 +77,19 @@ static void add_serve_options(CLI::App* serve, ServerConfig& config) { return "Value must be a positive integer or -1 for unlimited (got '" + val + "')"; } }); + + // Auto-unload support: Global default timer timeout in seconds (0 = disabled, via env var) + if (auto env_val = std::getenv("LEMONADE_GLOBAL_AUTO_UNLOAD_TIMER")) { + try { + config.global_auto_unload_timer = std::stol(env_val); + if (config.global_auto_unload_timer < 0) { + config.global_auto_unload_timer = 0; + } + } catch (...) { + config.global_auto_unload_timer = 0; + } + } + RecipeOptions::add_cli_options(*serve, config.recipe_options); } diff --git a/src/cpp/server/main.cpp b/src/cpp/server/main.cpp index 29296d5ad..5be4f0609 100644 --- a/src/cpp/server/main.cpp +++ b/src/cpp/server/main.cpp @@ -79,7 +79,7 @@ int main(int argc, char** argv) { Server server(config.port, config.host, config.log_level, config.recipe_options, config.max_loaded_models, config.extra_models_dir, config.no_broadcast, - config.global_timeout); + config.global_timeout, config.global_auto_unload_timer); // Register signal handler for Ctrl+C g_server_instance = &server; diff --git a/src/cpp/server/router.cpp b/src/cpp/server/router.cpp index 99b9334de..6040494f4 100644 --- a/src/cpp/server/router.cpp +++ b/src/cpp/server/router.cpp @@ -15,15 +15,23 @@ namespace lemon { Router::Router(const json& default_options, const std::string& log_level, ModelManager* model_manager, - int max_loaded_models, BackendManager* backend_manager) + int max_loaded_models, BackendManager* backend_manager, + long auto_unload_timeout) : default_options_(default_options), log_level_(log_level), model_manager_(model_manager), - max_loaded_models_(max_loaded_models), backend_manager_(backend_manager) { + max_loaded_models_(max_loaded_models), backend_manager_(backend_manager), + auto_unload_timeout_sec_(auto_unload_timeout) { if (max_loaded_models_ == -1) { LOG(DEBUG, "Router") << "Max loaded models per type: unlimited" << std::endl; } else { LOG(DEBUG, "Router") << "Max loaded models per type: " << max_loaded_models_ << std::endl; } + + if (auto_unload_timeout_sec_ > 0) { + LOG(INFO, "Router") << "Auto-unload timeout enabled: " << auto_unload_timeout_sec_ << " seconds" << std::endl; + } else { + LOG(DEBUG, "Router") << "Auto-unload timeout disabled" << std::endl; + } } Router::~Router() { @@ -31,6 +39,170 @@ Router::~Router() { unload_model(""); // Unload all } +// ============================================================================ +// Auto-unload timer methods +// ============================================================================ + +void Router::ModelTimerEntry::start_timer(std::chrono::seconds duration, + std::function on_timeout) { + state = std::make_shared(); + state->timeout_duration = duration; + + // Capture shared_ptr by value to keep State alive for thread's lifetime + auto s = state; + timer_thread = std::thread([s, duration, on_timeout]() { + // Use condition_variable for interruptible sleep + { + std::unique_lock lock(s->mtx); + s->cv.wait_for(lock, duration, [s] { return s->cancelled; }); + } + + // Check if cancelled or shutdown before executing callback + if (s->cancelled || s->shutdown || !on_timeout) { + return; + } + + try { + on_timeout(); + } catch (const std::exception& e) { + // Log but don't crash the timer thread + } catch (...) { + // Catch any other exceptions + } + }); +} + +void Router::ModelTimerEntry::cancel() { + if (state) { + std::lock_guard lock(state->mtx); + state->cancelled = true; + state->cv.notify_one(); + } + // Non-blocking: does NOT join the thread. + // The thread will exit on its own when it wakes and sees cancelled=true. + // Thread is detached to avoid "thread not joined" std::terminate. + if (timer_thread.joinable()) { + timer_thread.detach(); + } +} + +void Router::set_auto_unload_timeout(long timeout_seconds) { + std::lock_guard lock(timers_mutex_); + auto_unload_timeout_sec_ = timeout_seconds; + + if (auto_unload_timeout_sec_ > 0) { + LOG(INFO, "Router") << "Auto-unload timeout set to: " << auto_unload_timeout_sec_ << " seconds" << std::endl; + } else { + LOG(INFO, "Router") << "Auto-unload timeout disabled" << std::endl; + // Cancel all existing timers + for (auto& [model_name, timer_entry] : model_timers_) { + if (timer_entry) { + timer_entry->cancel(); + } + } + model_timers_.clear(); + } +} + +void Router::reset_auto_unload_timer(const std::string& model_name) { + if (auto_unload_timeout_sec_ <= 0) { + return; // Auto-unload disabled + } + + // Don't create new timers during unload (prevents race with unload_model) + if (unload_in_progress_.load()) { + return; + } + + std::lock_guard lock(timers_mutex_); + auto it = model_timers_.find(model_name); + + // Cancel existing timer if present (direct flag set + notify, no lock re-entry) + if (it != model_timers_.end() && it->second && it->second->state) { + auto& s = it->second->state; + { + std::lock_guard state_lock(s->mtx); + s->cancelled = true; + } + s->cv.notify_one(); + if (it->second->timer_thread.joinable()) { + it->second->timer_thread.detach(); + } + } + + // Always create a new timer + auto timer_entry = std::make_unique(); + timer_entry->start_timer( + std::chrono::seconds(auto_unload_timeout_sec_), + [this, model_name]() { + LOG(INFO, "Router") << "Auto-unload timeout expired for: " << model_name << std::endl; + try { + unload_model(model_name); + } catch (const std::exception& e) { + LOG(DEBUG, "Router") << "Auto-unload skipped: " << e.what() << std::endl; + } + } + ); + model_timers_[model_name] = std::move(timer_entry); + + LOG(DEBUG, "Router") << "Reset auto-unload timer for: " << model_name << std::endl; +} + +void Router::start_auto_unload_timer(const std::string& model_name, long timeout_seconds) { + if (timeout_seconds <= 0) { + return; // Auto-unload disabled + } + + // Don't create new timers during unload (prevents race with unload_model) + if (unload_in_progress_.load()) { + return; + } + + std::lock_guard lock(timers_mutex_); + + // Cancel any existing timer for this model (direct flag set, no lock re-entry) + auto existing_it = model_timers_.find(model_name); + if (existing_it != model_timers_.end() && existing_it->second && existing_it->second->state) { + auto& s = existing_it->second->state; + { + std::lock_guard state_lock(s->mtx); + s->cancelled = true; + } + s->cv.notify_one(); + if (existing_it->second->timer_thread.joinable()) { + existing_it->second->timer_thread.detach(); + } + } + + // Create new timer entry + auto timer_entry = std::make_unique(); + timer_entry->start_timer( + std::chrono::seconds(timeout_seconds), + [this, model_name]() { + LOG(INFO, "Router") << "Auto-unload timeout expired for: " << model_name << std::endl; + try { + unload_model(model_name); + } catch (const std::exception& e) { + LOG(DEBUG, "Router") << "Auto-unload skipped: " << e.what() << std::endl; + } + } + ); + + model_timers_[model_name] = std::move(timer_entry); + LOG(DEBUG, "Router") << "Started auto-unload timer for: " << model_name + << " (" << timeout_seconds << " seconds)" << std::endl; +} + +void Router::cancel_auto_unload_timer(const std::string& model_name) { + std::lock_guard lock(timers_mutex_); + auto it = model_timers_.find(model_name); + if (it != model_timers_.end() && it->second) { + it->second->cancel(); // Non-blocking: sets flag + detaches thread + model_timers_.erase(it); + LOG(DEBUG, "Router") << "Cancelled auto-unload timer for: " << model_name << std::endl; + } +} + WrappedServer* Router::find_server_by_model_name(const std::string& model_name) const { for (const auto& server : loaded_servers_) { if (server->get_model_name() == model_name) { @@ -128,19 +300,32 @@ void Router::evict_all_npu_servers() { } for (auto* server : npu_servers) { LOG(INFO, "Router") << "Evicting NPU server: " << server->get_model_name() << std::endl; - evict_server(server); + evict_server(server); // Ignore return value for bulk eviction } } // Helper: Evict a specific server -void Router::evict_server(WrappedServer* server) { - if (!server) return; +// Returns true if successfully evicted, false if skipped (still busy) +// NOTE: Called with load_mutex_ held. Timer cancellation must be non-blocking. +bool Router::evict_server(WrappedServer* server) { + if (!server) return true; std::string model_name = server->get_model_name(); LOG(INFO, "Router") << "Evicting model: " << model_name << std::endl; - // Wait for any ongoing inference to complete - server->wait_until_not_busy(); + // Cancel auto-unload timer for this model (non-blocking, safe under load_mutex_) + if (auto_unload_timeout_sec_ > 0) { + cancel_auto_unload_timer(model_name); + } + + // Wait for any ongoing inference to complete (with timeout to avoid deadlock) + // Use 30 second timeout - if model is still busy after this, skip unload + const std::chrono::seconds wait_timeout(30); + if (!server->wait_until_not_busy_with_timeout(wait_timeout)) { + LOG(WARNING, "Router") << "Model still busy after " << wait_timeout.count() + << " seconds, skipping unload: " << model_name << std::endl; + return false; // Don't unload - caller should reschedule the timer + } // Unload the server server->unload(); @@ -155,19 +340,34 @@ void Router::evict_server(WrappedServer* server) { ); LOG(INFO, "Router") << "Evicted model: " << model_name << std::endl; + return true; } void Router::evict_all_servers() { LOG(INFO, "Router") << "Evicting all models (" << loaded_servers_.size() << " total)" << std::endl; - // Wait for all servers to finish + // Cancel all auto-unload timers first (non-blocking, safe under load_mutex_) + if (auto_unload_timeout_sec_ > 0) { + std::lock_guard timer_lock(timers_mutex_); + for (auto& [name, timer_entry] : model_timers_) { + if (timer_entry) { + timer_entry->cancel(); + } + } + model_timers_.clear(); + } + + // Wait for all servers to finish (with timeout to avoid indefinite blocking) + const std::chrono::seconds wait_timeout(30); for (const auto& server : loaded_servers_) { - server->wait_until_not_busy(); + if (!server->wait_until_not_busy_with_timeout(wait_timeout)) { + LOG(WARNING, "Router") << "Model still busy after timeout: " << server->get_model_name() << std::endl; + } } // Unload all for (const auto& server : loaded_servers_) { - LOG(INFO, "Router") << "Unloading: " << server->get_model_name() << std::endl; + LOG(INFO, "Router") << "Unloading: " << server->get_model_name() << std::endl; server->unload(); } @@ -344,6 +544,11 @@ void Router::load_model(const std::string& model_name, LOG(INFO, "Router") << "Model loaded successfully. Total loaded: " << loaded_servers_.size() << std::endl; + + // Start auto-unload timer if enabled + if (auto_unload_timeout_sec_ > 0) { + start_auto_unload_timer(model_name, auto_unload_timeout_sec_); + } } else { // ERROR HANDLING (from spec: Error Handling section) // Check if error is "file not found" (exception to nuclear policy) @@ -386,6 +591,11 @@ void Router::load_model(const std::string& model_name, load_cv_.notify_all(); LOG(DEBUG, "Router") << "Retry successful!" << std::endl; + + // Start auto-unload timer if enabled + if (auto_unload_timeout_sec_ > 0) { + start_auto_unload_timer(model_name, auto_unload_timeout_sec_); + } } catch (const std::exception& retry_error) { lock.lock(); is_loading_ = false; @@ -410,21 +620,48 @@ void Router::load_model(const std::string& model_name, } void Router::unload_model(const std::string& model_name) { - std::lock_guard lock(load_mutex_); + // Set flag to prevent new timer creation during unload + unload_in_progress_ = true; - if (model_name.empty()) { - // Unload all models - LOG(INFO, "Router") << "Unload all models called" << std::endl; - evict_all_servers(); + // Cancel auto-unload timer(s) BEFORE acquiring load_mutex_ + // This prevents deadlock with timer thread (which also needs load_mutex_) + if (!model_name.empty()) { + cancel_auto_unload_timer(model_name); } else { - // Unload specific model - LOG(INFO, "Router") << "Unload model called: " << model_name << std::endl; - WrappedServer* server = find_server_by_model_name(model_name); - if (!server) { - throw std::runtime_error("Model not loaded: " + model_name); + // Cancel all timers when unloading all models + std::lock_guard timer_lock(timers_mutex_); + for (auto& [name, timer_entry] : model_timers_) { + if (timer_entry) { + timer_entry->cancel(); + } } - evict_server(server); + model_timers_.clear(); } + + std::lock_guard lock(load_mutex_); + + try { + if (model_name.empty()) { + // Unload all models + LOG(INFO, "Router") << "Unload all models called" << std::endl; + evict_all_servers(); + } else { + // Unload specific model + LOG(INFO, "Router") << "Unload model called: " << model_name << std::endl; + WrappedServer* server = find_server_by_model_name(model_name); + if (!server) { + throw std::runtime_error("Model not loaded: " + model_name); + } + if (!evict_server(server)) { + throw std::runtime_error("Model is busy and could not be unloaded within timeout: " + model_name); + } + } + } catch (...) { + unload_in_progress_ = false; + throw; + } + + unload_in_progress_ = false; } std::string Router::get_loaded_model() const { @@ -509,12 +746,12 @@ std::string Router::get_backend_address() const { template auto Router::execute_inference(const json& request, Func&& inference_func) -> decltype(inference_func(nullptr)) { WrappedServer* server = nullptr; + std::string requested_model; { std::lock_guard lock(load_mutex_); // Extract model from request - required field, no fallback to avoid silent misrouting - std::string requested_model; if (request.contains("model") && request["model"].is_string()) { requested_model = request["model"].get(); } @@ -528,6 +765,11 @@ auto Router::execute_inference(const json& request, Func&& inference_func) -> de return ErrorResponse::from_exception(ModelNotLoadedException(requested_model)); } + // Cancel auto-unload timer while processing request + if (auto_unload_timeout_sec_ > 0) { + cancel_auto_unload_timer(requested_model); + } + // Mark as busy and update access time server->set_busy(true); server->update_access_time(); @@ -537,9 +779,21 @@ auto Router::execute_inference(const json& request, Func&& inference_func) -> de try { auto response = inference_func(server); server->set_busy(false); + + // Reset auto-unload timer when model becomes idle (after request completes) + if (!requested_model.empty() && auto_unload_timeout_sec_ > 0) { + reset_auto_unload_timer(requested_model); + } + return response; } catch (...) { server->set_busy(false); + + // Reset auto-unload timer when model becomes idle (after error) + if (!requested_model.empty() && auto_unload_timeout_sec_ > 0) { + reset_auto_unload_timer(requested_model); + } + throw; } } @@ -548,12 +802,12 @@ auto Router::execute_inference(const json& request, Func&& inference_func) -> de template void Router::execute_streaming(const std::string& request_body, httplib::DataSink& sink, Func&& streaming_func) { WrappedServer* server = nullptr; + std::string requested_model; { std::lock_guard lock(load_mutex_); // Extract model from request body if present (same logic as execute_inference) - std::string requested_model; try { json request = json::parse(request_body); if (request.contains("model") && request["model"].is_string()) { @@ -579,6 +833,11 @@ void Router::execute_streaming(const std::string& request_body, httplib::DataSin return; } + // Cancel auto-unload timer while processing request + if (auto_unload_timeout_sec_ > 0) { + cancel_auto_unload_timer(requested_model); + } + server->set_busy(true); server->update_access_time(); } @@ -586,8 +845,19 @@ void Router::execute_streaming(const std::string& request_body, httplib::DataSin try { streaming_func(server); server->set_busy(false); + + // Reset auto-unload timer when model becomes idle (after streaming completes) + if (!requested_model.empty() && auto_unload_timeout_sec_ > 0) { + reset_auto_unload_timer(requested_model); + } } catch (...) { server->set_busy(false); + + // Reset auto-unload timer when model becomes idle (after error) + if (!requested_model.empty() && auto_unload_timeout_sec_ > 0) { + reset_auto_unload_timer(requested_model); + } + throw; } } diff --git a/src/cpp/server/server.cpp b/src/cpp/server/server.cpp index 4c8d837d4..2fc6be7ba 100644 --- a/src/cpp/server/server.cpp +++ b/src/cpp/server/server.cpp @@ -67,7 +67,7 @@ static const json MIME_TYPES = { Server::Server(int port, const std::string& host, const std::string& log_level, const json& default_options, int max_loaded_models, const std::string& extra_models_dir, bool no_broadcast, - long global_timeout) + long global_timeout, long auto_unload_timeout) : port_(port), host_(host), log_level_(log_level), default_options_(default_options), no_broadcast_(no_broadcast), running_(false), udp_beacon_() { @@ -112,9 +112,22 @@ Server::Server(int port, const std::string& host, const std::string& log_level, backend_manager_ = std::make_unique(); + // Read global auto-unload timer from environment variable + long global_auto_unload_timer = 0; + if (auto env_val = std::getenv("LEMONADE_GLOBAL_AUTO_UNLOAD_TIMER")) { + try { + global_auto_unload_timer = std::stol(env_val); + if (global_auto_unload_timer < 0) { + global_auto_unload_timer = 0; + } + } catch (...) { + global_auto_unload_timer = 0; + } + } + router_ = std::make_unique(default_options_, log_level_, model_manager_.get(), max_loaded_models, - backend_manager_.get()); + backend_manager_.get(), global_auto_unload_timer); LOG(DEBUG, "Server") << "Debug logging enabled - subprocess output will be visible" << std::endl;