lastsunday
diff --git a/‎apps/server/api/src/config/logic.rs‎
Lines changed: 6 additions & 0 deletions b/‎apps/server/api/src/config/logic.rs‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎apps/server/api/src/llm/client.rs‎
Lines changed: 31 additions & 1 deletion b/‎apps/server/api/src/llm/client.rs‎
Lines changed: 31 additions & 1 deletion
diff --git a/‎apps/server/api/src/llm/mod.rs‎
Lines changed: 20 additions & 1 deletion b/‎apps/server/api/src/llm/mod.rs‎
Lines changed: 20 additions & 1 deletion
diff --git a/‎apps/server/api/src/llm/model/minicpm4/mod.rs‎
Lines changed: 13 additions & 1 deletion b/‎apps/server/api/src/llm/model/minicpm4/mod.rs‎
Lines changed: 13 additions & 1 deletion
@@ -7,6 +7,7 @@ pub struct LogicConfig {
     /// unit: ms
     silence_voice_timeout: Option<i64>,
     system_prompt: Option<String>,
+    max_prompt_len: Option<u64>,
 }
 
 impl LogicConfig {
@@ -17,6 +18,7 @@ impl LogicConfig {
             system_prompt: Some(String::from(
                 "你是一个助手，所有回答必须使用纯文本自然语言，禁止使用任何Markdown符号如#、-、*等。",
             )),
+            max_prompt_len: Some(3000),
         }
     }
 
@@ -31,4 +33,8 @@ impl LogicConfig {
     pub fn system_prompt(&self) -> &str {
         self.system_prompt.as_deref().unwrap_or_default()
     }
+
+    pub fn max_prompt_len(&self) -> u64 {
+        self.max_prompt_len.unwrap()
+    }
 }
@@ -1,4 +1,4 @@
-use std::{sync::Arc, thread};
+use std::{collections::VecDeque, sync::Arc, thread};
 
 use crate::{
     common::ModelError,
@@ -26,6 +26,7 @@ pub struct Client {
     model: Arc<Box<dyn Model>>,
     temperature: Option<f64>,
     max_tokens: Option<u64>,
+    max_prompt_len: Option<u64>,
     history: Arc<Mutex<History>>,
     mcp_host: Option<Arc<Mutex<dyn McpHost>>>,
 }
@@ -59,6 +60,11 @@ impl Client {
         self
     }
 
+    pub fn with_max_prompt_len(mut self, max_prompt_len: Option<u64>) -> Self {
+        self.max_prompt_len = max_prompt_len;
+        self
+    }
+
     pub fn chat(
         &self,
         request: ChatRequest,
@@ -70,6 +76,7 @@ impl Client {
         let clone_history = self.history.clone();
         let temperature = self.temperature;
         let max_tokens = self.max_tokens;
+        let max_prompt_len = self.max_prompt_len;
         thread::spawn(move || {
             let output = block_on(async move {
                 let tools = {
@@ -84,6 +91,28 @@ impl Client {
                 while has_next_step {
                     let history = clone_history.clone();
                     let mut history = history.lock().await;
+                    if let Some(max_prompt_len) = max_prompt_len {
+                        // cut prompt
+                        let mut current_len: u64 = 0;
+                        if let Some(item) = &history.preamble {
+                            current_len += item.len() as u64;
+                        }
+                        current_len += model.calculate_tools_prompt_len(&tools);
+                        let mut target_message_list = VecDeque::new();
+                        // TODO: remove clone?
+                        let chat_history: Vec<_> =
+                            history.chat_history.clone().into_iter().rev().collect();
+                        for message in chat_history {
+                            let len = model.calculate_message_prompt_len(&message);
+                            current_len += len;
+                            if current_len <= max_prompt_len {
+                                target_message_list.push_front(message);
+                            } else {
+                                break;
+                            }
+                        }
+                        history.chat_history = target_message_list.into();
+                    }
                     let chat_history = {
                         if !history.chat_history.is_empty() {
                             let mut result = OneOrMany::many(history.chat_history.clone()).unwrap();
@@ -293,6 +322,7 @@ impl ClientBuilder {
             model: self.model,
             temperature: None,
             max_tokens: None,
+            max_prompt_len: Some(3000),
             history: Arc::new(Mutex::new(History {
                 preamble: None,
                 chat_history: vec![],
 
@@ -8,7 +8,8 @@ use crate::{
 };
 use async_trait::async_trait;
 use rig::{
-    completion::{CompletionError, CompletionRequest},
+    completion::{CompletionError, CompletionRequest, ToolDefinition},
+    message::Message,
     streaming::StreamingCompletionResponse,
 };
 use std::sync::{Arc, OnceLock};
@@ -22,6 +23,12 @@ pub trait Model: Send + Sync {
         StreamingCompletionResponse<rig::providers::openai::streaming::StreamingCompletionResponse>,
         CompletionError,
     >;
+
+    fn calculate_system_prompt_len(&self, system_prompt: &Option<String>) -> u64;
+
+    fn calculate_tools_prompt_len(&self, tools: &[ToolDefinition]) -> u64;
+
+    fn calculate_message_prompt_len(&self, message: &Message) -> u64;
 }
 
 #[derive(Default, Clone)]
@@ -38,6 +45,18 @@ impl Model for DummyModel {
     > {
         todo!()
     }
+
+    fn calculate_system_prompt_len(&self, _system_prompt: &Option<String>) -> u64 {
+        todo!()
+    }
+
+    fn calculate_tools_prompt_len(&self, _tools: &[ToolDefinition]) -> u64 {
+        todo!()
+    }
+
+    fn calculate_message_prompt_len(&self, _message: &Message) -> u64 {
+        todo!()
+    }
 }
 
 static INSTANCE: OnceLock<LlmFactory> = OnceLock::new();
 
@@ -10,7 +10,7 @@ use async_trait::async_trait;
 use futures::{SinkExt, StreamExt, executor::block_on};
 use futures_channel::mpsc::unbounded;
 use rig::{
-    completion::{CompletionError, CompletionRequest},
+    completion::{CompletionError, CompletionRequest, ToolDefinition},
     message::{Message, UserContent},
     streaming::{RawStreamingChoice, StreamingCompletionResponse},
 };
@@ -101,6 +101,18 @@ impl<'a> Model for Minicpm4<'a> {
         });
         Ok(StreamingCompletionResponse::stream(Box::pin(rx)))
     }
+
+    fn calculate_system_prompt_len(&self, _system_prompt: &Option<String>) -> u64 {
+        todo!()
+    }
+
+    fn calculate_tools_prompt_len(&self, _tools: &[ToolDefinition]) -> u64 {
+        todo!()
+    }
+
+    fn calculate_message_prompt_len(&self, _message: &Message) -> u64 {
+        todo!()
+    }
 }
 
 fn convert_response(
Original file line number	Diff line number	Diff line change
`@@ -7,6 +7,7 @@ pub struct LogicConfig {`
`7`	`7`	`/// unit: ms`
`8`	`8`	`silence_voice_timeout: Option<i64>,`
`9`	`9`	`system_prompt: Option<String>,`
	`10`	`+ max_prompt_len: Option<u64>,`
`10`	`11`	`}`
`11`	`12`
`12`	`13`	`impl LogicConfig {`
`@@ -17,6 +18,7 @@ impl LogicConfig {`
`17`	`18`	`system_prompt: Some(String::from(`
`18`	`19`	`"你是一个助手，所有回答必须使用纯文本自然语言，禁止使用任何Markdown符号如#、-、*等。",`
`19`	`20`	`)),`
	`21`	`+ max_prompt_len: Some(3000),`
`20`	`22`	`}`
`21`	`23`	`}`
`22`	`24`
`@@ -31,4 +33,8 @@ impl LogicConfig {`
`31`	`33`	`pub fn system_prompt(&self) -> &str {`
`32`	`34`	`self.system_prompt.as_deref().unwrap_or_default()`
`33`	`35`	`}`
	`36`	`+`
	`37`	`+ pub fn max_prompt_len(&self) -> u64 {`
	`38`	`+ self.max_prompt_len.unwrap()`
	`39`	`+ }`
`34`	`40`	`}`