diff --git a/cortex-mem-cli/src/main.rs b/cortex-mem-cli/src/main.rs index be681b1..6e77563 100644 --- a/cortex-mem-cli/src/main.rs +++ b/cortex-mem-cli/src/main.rs @@ -241,6 +241,7 @@ async fn main() -> Result<()> { &config.embedding.model_name, config.qdrant.embedding_dim, None, // user_id parameter + config.cortex.enable_intent_analysis, ) .await?; diff --git a/cortex-mem-config/src/lib.rs b/cortex-mem-config/src/lib.rs index c47b6b7..359e0a8 100644 --- a/cortex-mem-config/src/lib.rs +++ b/cortex-mem-config/src/lib.rs @@ -20,6 +20,26 @@ pub struct CortexConfig { /// If not specified, will use system application data directory #[serde(default)] pub data_dir: Option, + + /// Whether to enable LLM-based intent analysis before vector search. + /// + /// When enabled (default), each search call makes an LLM request to: + /// 1. Rewrite the query for better vector matching + /// 2. Detect intent type (entity_lookup / factual / temporal / search / ...) + /// 3. Dynamically tune L0 threshold and L0/L1/L2 scoring weights + /// + /// Disable this (`enable_intent_analysis = false`) to skip the LLM call + /// and use the raw query directly. Vector search latency drops from ~15-25s + /// to < 500ms; recall quality is slightly lower without query rewriting. + /// + /// Recommended: `false` for latency-sensitive interactive use (e.g. chat plugins), + /// `true` for batch / offline recall where quality matters most. + #[serde(default = "default_enable_intent_analysis")] + pub enable_intent_analysis: bool, +} + +fn default_enable_intent_analysis() -> bool { + true } impl CortexConfig { @@ -135,6 +155,7 @@ impl Default for CortexConfig { fn default() -> Self { CortexConfig { data_dir: None, // Use None to trigger smart default + enable_intent_analysis: true, } } } diff --git a/cortex-mem-core/src/search/vector_engine.rs b/cortex-mem-core/src/search/vector_engine.rs index 6cfbf2e..ce57156 100644 --- a/cortex-mem-core/src/search/vector_engine.rs +++ b/cortex-mem-core/src/search/vector_engine.rs @@ -64,6 +64,10 @@ pub struct VectorSearchEngine { memory_event_tx: Option>, /// Optional index manager for archived-memory filtering index_manager: Option>, + /// Whether to call the LLM for intent analysis before each search. + /// When `false`, the raw query is used directly (skips rewriting/threshold tuning). + /// Default: `true`. + enable_intent_analysis: bool, } impl VectorSearchEngine { @@ -80,6 +84,7 @@ impl VectorSearchEngine { llm_client: None, memory_event_tx: None, index_manager: None, + enable_intent_analysis: true, } } @@ -97,9 +102,19 @@ impl VectorSearchEngine { llm_client: Some(llm_client), memory_event_tx: None, index_manager: None, + enable_intent_analysis: true, } } + /// Control whether LLM intent analysis is performed before each search. + /// + /// Set to `false` to skip the LLM round-trip and use the raw query directly. + /// Reduces search latency from ~15-25s to <500ms at the cost of no query rewriting. + pub fn with_intent_analysis(mut self, enabled: bool) -> Self { + self.enable_intent_analysis = enabled; + self + } + /// Set the memory event sender for access tracking (enables forgetting mechanism) pub fn with_memory_event_tx(mut self, tx: mpsc::UnboundedSender) -> Self { self.memory_event_tx = Some(tx); @@ -589,11 +604,16 @@ impl VectorSearchEngine { /// 统一意图分析(优先使用 LLM 单次调用,LLM 不可用时使用最小 fallback) async fn analyze_intent(&self, query: &str) -> Result { - if let Some(llm) = &self.llm_client { - match self.analyze_intent_with_llm(llm.as_ref(), query).await { - Ok(intent) => return Ok(intent), - Err(e) => warn!("LLM intent analysis failed, using fallback: {}", e), + // Skip LLM call when intent analysis is disabled via config + if self.enable_intent_analysis { + if let Some(llm) = &self.llm_client { + match self.analyze_intent_with_llm(llm.as_ref(), query).await { + Ok(intent) => return Ok(intent), + Err(e) => warn!("LLM intent analysis failed, using fallback: {}", e), + } } + } else { + debug!("Intent analysis disabled, using raw query directly"); } // Fallback:LLM 不可用时的基础处理(不含规则判断,仅做基本分词) diff --git a/cortex-mem-mcp/src/main.rs b/cortex-mem-mcp/src/main.rs index b72879d..289a2a6 100644 --- a/cortex-mem-mcp/src/main.rs +++ b/cortex-mem-mcp/src/main.rs @@ -98,6 +98,7 @@ async fn main() -> Result<()> { &config.embedding.model_name, config.qdrant.embedding_dim, cli.user, // explicit user_id; None → "default" (see MemoryOperations::new) + config.cortex.enable_intent_analysis, ).await?; let operations = Arc::new(operations); diff --git a/cortex-mem-rig/src/lib.rs b/cortex-mem-rig/src/lib.rs index 7d5210d..adc4603 100644 --- a/cortex-mem-rig/src/lib.rs +++ b/cortex-mem-rig/src/lib.rs @@ -82,6 +82,39 @@ pub async fn create_memory_tools_with_tenant_and_vector( embedding_model_name: &str, embedding_dim: Option, user_id: Option, +) -> Result> { + create_memory_tools_with_config( + data_dir, + tenant_id, + llm_client, + qdrant_url, + qdrant_collection, + qdrant_api_key, + embedding_api_base_url, + embedding_api_key, + embedding_model_name, + embedding_dim, + user_id, + true, // enable_intent_analysis default + ).await +} + +/// Create memory tools with full features (LLM + Vector Search) and explicit config +/// +/// Use this when you want to control intent analysis behaviour from config. +pub async fn create_memory_tools_with_config( + data_dir: impl AsRef, + tenant_id: impl Into, + llm_client: Arc, + qdrant_url: &str, + qdrant_collection: &str, + qdrant_api_key: Option<&str>, + embedding_api_base_url: &str, + embedding_api_key: &str, + embedding_model_name: &str, + embedding_dim: Option, + user_id: Option, + enable_intent_analysis: bool, ) -> Result> { let operations = MemoryOperations::new( data_dir.as_ref().to_str().unwrap(), @@ -95,6 +128,7 @@ pub async fn create_memory_tools_with_tenant_and_vector( embedding_model_name, embedding_dim, user_id, + enable_intent_analysis, ) .await?; Ok(MemoryTools::new(Arc::new(operations))) diff --git a/cortex-mem-service/src/state.rs b/cortex-mem-service/src/state.rs index 59e4b1e..6e1383b 100644 --- a/cortex-mem-service/src/state.rs +++ b/cortex-mem-service/src/state.rs @@ -37,6 +37,9 @@ pub struct AppState { /// AutomationManager's tx handle — updated on tenant switch so AutomationManager /// routes VectorSyncNeeded to the correct tenant coordinator. pub automation_tx_handle: Option>>>>, + /// Whether to use LLM intent analysis before each search (from config.toml [cortex] section). + /// When false, raw query is used directly — much faster but no query rewriting. + pub enable_intent_analysis: bool, } impl AppState { @@ -52,6 +55,11 @@ impl AppState { // 获取配置(优先从config.toml,否则从环境变量) let (llm_client, embedding_config, qdrant_config) = Self::load_configs()?; + // 读取 cortex section 配置(enable_intent_analysis 等) + let enable_intent_analysis = cortex_mem_config::Config::load("config.toml") + .map(|c| c.cortex.enable_intent_analysis) + .unwrap_or(true); + // 构建Cortex Memory let mut builder = CortexMemBuilder::new(&cortex_dir); @@ -115,6 +123,7 @@ impl AppState { engine = engine.with_memory_event_tx(tx.clone()); } engine = engine.with_index_manager(index_manager.clone()); + engine = engine.with_intent_analysis(enable_intent_analysis); Some(Arc::new(engine)) } else { None @@ -133,6 +142,7 @@ impl AppState { current_tenant_id: Arc::new(RwLock::new(None)), memory_event_tx: Arc::new(RwLock::new(memory_event_tx)), automation_tx_handle: cortex_automation_tx, + enable_intent_analysis, }) } @@ -385,7 +395,8 @@ impl AppState { ) .with_index_manager(Arc::new(MemoryIndexManager::new( tenant_filesystem.clone(), - ))), + ))) + .with_intent_analysis(self.enable_intent_analysis), ); let mut engine = self.vector_engine.write().await; diff --git a/cortex-mem-tools/src/operations.rs b/cortex-mem-tools/src/operations.rs index c6eaa0d..9188932 100644 --- a/cortex-mem-tools/src/operations.rs +++ b/cortex-mem-tools/src/operations.rs @@ -109,6 +109,7 @@ impl MemoryOperations { embedding_model_name: &str, embedding_dim: Option, user_id: Option, + enable_intent_analysis: bool, ) -> Result { let tenant_id = tenant_id.into(); let filesystem = Arc::new(CortexFilesystem::with_tenant(data_dir, &tenant_id)); @@ -219,7 +220,8 @@ impl MemoryOperations { llm_client.clone(), ) .with_memory_event_tx(memory_event_tx.clone()) - .with_index_manager(index_manager.clone()), + .with_index_manager(index_manager.clone()) + .with_intent_analysis(enable_intent_analysis), ); tracing::info!("Vector search engine created with LLM, event tracking, and archived filter"); diff --git a/examples/cortex-mem-tars/src/agent.rs b/examples/cortex-mem-tars/src/agent.rs index 780d2b9..1f699d9 100644 --- a/examples/cortex-mem-tars/src/agent.rs +++ b/examples/cortex-mem-tars/src/agent.rs @@ -1,6 +1,6 @@ use anyhow::Result; use chrono::{DateTime, Local}; -use cortex_mem_rig::create_memory_tools_with_tenant_and_vector; +use cortex_mem_rig::create_memory_tools_with_config; use cortex_mem_tools::MemoryOperations; use futures::StreamExt; use rig::agent::MultiTurnStreamItem; @@ -80,7 +80,7 @@ pub async fn create_memory_agent( config.embedding.model_name, config.qdrant.embedding_dim ); - let memory_tools = create_memory_tools_with_tenant_and_vector( + let memory_tools = create_memory_tools_with_config( data_dir, agent_id, cortex_llm_client, @@ -92,6 +92,7 @@ pub async fn create_memory_agent( &config.embedding.model_name, config.qdrant.embedding_dim, Some(user_id.to_string()), + config.cortex.enable_intent_analysis, ) .await?; diff --git a/examples/cortex-mem-tars/src/infrastructure.rs b/examples/cortex-mem-tars/src/infrastructure.rs index fc1e1fb..3cf321b 100644 --- a/examples/cortex-mem-tars/src/infrastructure.rs +++ b/examples/cortex-mem-tars/src/infrastructure.rs @@ -46,6 +46,7 @@ impl Infrastructure { &config.embedding.model_name, config.qdrant.embedding_dim, None, // user_id = None,使用tenant_id作为user_id + config.cortex.enable_intent_analysis, ) .await .context("Failed to initialize MemoryOperations")?;