Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions lib/llm/src/entrypoint/input/batch.rs
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,7 @@ async fn evaluate(
inner,
common: Default::default(),
nvext: None,
chat_template_args: None,
};
let mut stream = engine.generate(Context::new(req)).await?;
let mut output = String::new();
Expand Down
1 change: 1 addition & 0 deletions lib/llm/src/entrypoint/input/text.rs
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ async fn main_loop(
inner,
common: Default::default(),
nvext: None,
chat_template_args: None,
};

// Call the model
Expand Down
8 changes: 8 additions & 0 deletions lib/llm/src/http/service/openai.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1350,6 +1350,7 @@ mod tests {
},
common: Default::default(),
nvext: None,
chat_template_args: None,
};
let result = validate_chat_completion_required_fields(&request);
assert!(result.is_err());
Expand Down Expand Up @@ -1377,6 +1378,7 @@ mod tests {
},
common: Default::default(),
nvext: None,
chat_template_args: None,
};
let result = validate_chat_completion_required_fields(&request);
assert!(result.is_ok());
Expand Down Expand Up @@ -1549,6 +1551,7 @@ mod tests {
},
common: Default::default(),
nvext: None,
chat_template_args: None,
};

let result = validate_chat_completion_fields_generic(&request);
Expand Down Expand Up @@ -1576,6 +1579,7 @@ mod tests {
},
common: Default::default(),
nvext: None,
chat_template_args: None,
};
let result = validate_chat_completion_fields_generic(&request);
assert!(result.is_err());
Expand All @@ -1602,6 +1606,7 @@ mod tests {
},
common: Default::default(),
nvext: None,
chat_template_args: None,
};
let result = validate_chat_completion_fields_generic(&request);
assert!(result.is_err());
Expand All @@ -1628,6 +1633,7 @@ mod tests {
},
common: Default::default(),
nvext: None,
chat_template_args: None,
};
let result = validate_chat_completion_fields_generic(&request);
assert!(result.is_err());
Expand Down Expand Up @@ -1656,6 +1662,7 @@ mod tests {
.build()
.unwrap(),
nvext: None,
chat_template_args: None,
};
let result = validate_chat_completion_fields_generic(&request);
assert!(result.is_err());
Expand All @@ -1682,6 +1689,7 @@ mod tests {
},
common: Default::default(),
nvext: None,
chat_template_args: None,
};
let result = validate_chat_completion_fields_generic(&request);
assert!(result.is_err());
Expand Down
6 changes: 6 additions & 0 deletions lib/llm/src/preprocessor/prompt.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

use anyhow::Result;
use minijinja::value::Value;
use std::collections::HashMap;
use std::sync::Arc;

mod template;
Expand Down Expand Up @@ -57,6 +58,11 @@ pub trait OAIChatLikeRequest {

fn should_add_generation_prompt(&self) -> bool;

/// Optional additional args to merge into the chat template context
fn chat_template_args(&self) -> Option<&HashMap<String, serde_json::Value>> {
None
}

/// Returns the type of input for the prompt. Default is Text.
fn prompt_input_type(&self) -> PromptInput {
PromptInput::Text(TextInput::Single(String::new()))
Expand Down
14 changes: 11 additions & 3 deletions lib/llm/src/preprocessor/prompt/template/oai.rs
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,10 @@ impl OAIChatLikeRequest for NvCreateChatCompletionRequest {
fn extract_text(&self) -> Option<TextInput> {
Some(TextInput::Single(String::new()))
}

fn chat_template_args(&self) -> Option<&std::collections::HashMap<String, serde_json::Value>> {
self.chat_template_args.as_ref()
}
}

impl OAIChatLikeRequest for NvCreateCompletionRequest {
Expand Down Expand Up @@ -207,9 +211,13 @@ impl OAIPromptFormatter for HfTokenizerConfigJsonFormatter {
..mixins
};

let ctx = context! { ..ctx, ..context! {

}};
// Merge any additional args into the context last so they take precedence
let ctx = if let Some(args) = req.chat_template_args() {
let extra = Value::from_serialize(args);
context! { ..ctx, ..extra }
} else {
ctx
};

let tmpl: minijinja::Template<'_, '_> = if has_tools {
self.env.get_template("tool_use")?
Expand Down
4 changes: 4 additions & 0 deletions lib/llm/src/protocols/openai/chat_completions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,10 @@ pub struct NvCreateChatCompletionRequest {

#[serde(skip_serializing_if = "Option::is_none")]
pub nvext: Option<NvExt>,

/// Extra args to pass to the chat template rendering context
#[serde(default, skip_serializing_if = "Option::is_none")]
pub chat_template_args: Option<std::collections::HashMap<String, serde_json::Value>>,
}

/// A response structure for unary chat completion responses, embedding OpenAI's
Expand Down
1 change: 1 addition & 0 deletions lib/llm/src/protocols/openai/responses.rs
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,7 @@ impl TryFrom<NvCreateResponse> for NvCreateChatCompletionRequest {
},
common: Default::default(),
nvext: resp.nvext,
chat_template_args: None,
})
}
}
Expand Down
3 changes: 3 additions & 0 deletions lib/llm/tests/http-service.rs
Original file line number Diff line number Diff line change
Expand Up @@ -768,6 +768,7 @@ async fn test_nv_custom_client() {
inner: inner_request,
common: Default::default(),
nvext: None,
chat_template_args: None,
};

let result = nv_custom_client.chat_stream(request).await;
Expand Down Expand Up @@ -807,6 +808,7 @@ async fn test_nv_custom_client() {
inner: inner_request,
common: Default::default(),
nvext: None,
chat_template_args: None,
};

let result = nv_custom_client.chat_stream(request).await;
Expand Down Expand Up @@ -847,6 +849,7 @@ async fn test_nv_custom_client() {
inner: inner_request,
common: Default::default(),
nvext: None,
chat_template_args: None,
};

let result = nv_custom_client
Expand Down
1 change: 1 addition & 0 deletions lib/llm/tests/preprocessor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,7 @@ impl Request {
inner,
common: Default::default(),
nvext: None,
chat_template_args: None,
}
}
}
Expand Down
3 changes: 3 additions & 0 deletions lib/llm/tests/test_common_ext.rs
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ fn test_sampling_parameters_include_stop_str_in_output_extraction() {
.build()
.unwrap(),
nvext: None,
chat_template_args: None,
};

let sampling = request.extract_sampling_options().unwrap();
Expand Down Expand Up @@ -327,6 +328,7 @@ fn test_serialization_preserves_structure() {
ignore_eos: Some(false),
..Default::default()
}),
chat_template_args: None,
};

let json = serde_json::to_value(&request).unwrap();
Expand Down Expand Up @@ -376,6 +378,7 @@ fn test_sampling_parameters_extraction() {
.build()
.unwrap(),
nvext: None,
chat_template_args: None,
};

let sampling_options = request.extract_sampling_options().unwrap();
Expand Down
1 change: 1 addition & 0 deletions lib/llm/tests/test_streaming_usage.rs
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,7 @@ fn create_chat_request(include_usage: Option<bool>) -> NvCreateChatCompletionReq
inner,
common: Default::default(),
nvext: None,
chat_template_args: None,
}
}

Expand Down
Loading