Skip to content
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions src/cpp/server/backends/llamacpp_server.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,7 @@ void LlamaCppServer::load(const std::string& model_name,
LOG(DEBUG, "LlamaCpp") << "Per-model settings: " << options.to_log_string() << std::endl;

int ctx_size = options.get_option("ctx_size");
std::string backend_device = options.get_option("device");
std::string llamacpp_backend = options.get_option("llamacpp_backend");
std::string llamacpp_args = options.get_option("llamacpp_args");

Expand Down Expand Up @@ -200,6 +201,9 @@ void LlamaCppServer::load(const std::string& model_name,

push_arg(args, reserved_flags, "-m", gguf_path, std::vector<std::string>{"--model"});
push_arg(args, reserved_flags, "--ctx-size", std::to_string(ctx_size), std::vector<std::string>{"-c"});
if (backend_device != "") {
push_arg(args, reserved_flags, "--device", backend_device);
}
push_arg(args, reserved_flags, "--port", std::to_string(port_));
push_arg(args, reserved_flags, "--jinja", std::vector<std::string>{"--no-jinja"});

Expand Down
9 changes: 8 additions & 1 deletion src/cpp/server/recipe_options.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ using json = nlohmann::json;

static const json DEFAULTS = {
{"ctx_size", 4096},
{"device", ""},
{"llamacpp_backend", ""}, // Will be overridden dynamically
{"llamacpp_args", ""},
{"sd-cpp_backend", ""}, // sd.cpp backend selection (cpu or rocm)
Expand All @@ -32,6 +33,12 @@ static const json CLI_OPTIONS = {
{"envname", "LEMONADE_CTX_SIZE"},
{"help", "Context size for the model"}
}},
{"--device", {
{"option_name", "device"},
{"type_name", "DEVICE"},
{"envname", "LEMONADE_DEVICE"},
{"help", "Comma-separated list of accelerator devices to use (e.g. Vulkan0)"}
}},
{"--llamacpp", {
{"option_name", "llamacpp_backend"},
{"type_name", "BACKEND"},
Expand Down Expand Up @@ -100,7 +107,7 @@ static const json CLI_OPTIONS = {

static std::vector<std::string> get_keys_for_recipe(const std::string& recipe) {
if (recipe == "llamacpp") {
return {"ctx_size", "llamacpp_backend", "llamacpp_args"};
return {"ctx_size", "device", "llamacpp_backend", "llamacpp_args"};
} else if (recipe == "whispercpp") {
return {"whispercpp_backend", "whispercpp_args"};
} else if (recipe == "flm") {
Expand Down
Loading