Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions cortex-mem-cli/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,7 @@ async fn main() -> Result<()> {
&config.embedding.model_name,
config.qdrant.embedding_dim,
None, // user_id parameter
config.cortex.enable_intent_analysis,
)
.await?;

Expand Down
21 changes: 21 additions & 0 deletions cortex-mem-config/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,26 @@ pub struct CortexConfig {
/// If not specified, will use system application data directory
#[serde(default)]
pub data_dir: Option<String>,

/// Whether to enable LLM-based intent analysis before vector search.
///
/// When enabled (default), each search call makes an LLM request to:
/// 1. Rewrite the query for better vector matching
/// 2. Detect intent type (entity_lookup / factual / temporal / search / ...)
/// 3. Dynamically tune L0 threshold and L0/L1/L2 scoring weights
///
/// Disable this (`enable_intent_analysis = false`) to skip the LLM call
/// and use the raw query directly. Vector search latency drops from ~15-25s
/// to < 500ms; recall quality is slightly lower without query rewriting.
///
/// Recommended: `false` for latency-sensitive interactive use (e.g. chat plugins),
/// `true` for batch / offline recall where quality matters most.
#[serde(default = "default_enable_intent_analysis")]
pub enable_intent_analysis: bool,
}

fn default_enable_intent_analysis() -> bool {
true
}

impl CortexConfig {
Expand Down Expand Up @@ -135,6 +155,7 @@ impl Default for CortexConfig {
fn default() -> Self {
CortexConfig {
data_dir: None, // Use None to trigger smart default
enable_intent_analysis: true,
}
}
}
28 changes: 24 additions & 4 deletions cortex-mem-core/src/search/vector_engine.rs
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,10 @@ pub struct VectorSearchEngine {
memory_event_tx: Option<mpsc::UnboundedSender<MemoryEvent>>,
/// Optional index manager for archived-memory filtering
index_manager: Option<Arc<MemoryIndexManager>>,
/// Whether to call the LLM for intent analysis before each search.
/// When `false`, the raw query is used directly (skips rewriting/threshold tuning).
/// Default: `true`.
enable_intent_analysis: bool,
}

impl VectorSearchEngine {
Expand All @@ -80,6 +84,7 @@ impl VectorSearchEngine {
llm_client: None,
memory_event_tx: None,
index_manager: None,
enable_intent_analysis: true,
}
}

Expand All @@ -97,9 +102,19 @@ impl VectorSearchEngine {
llm_client: Some(llm_client),
memory_event_tx: None,
index_manager: None,
enable_intent_analysis: true,
}
}

/// Control whether LLM intent analysis is performed before each search.
///
/// Set to `false` to skip the LLM round-trip and use the raw query directly.
/// Reduces search latency from ~15-25s to <500ms at the cost of no query rewriting.
pub fn with_intent_analysis(mut self, enabled: bool) -> Self {
self.enable_intent_analysis = enabled;
self
}

/// Set the memory event sender for access tracking (enables forgetting mechanism)
pub fn with_memory_event_tx(mut self, tx: mpsc::UnboundedSender<MemoryEvent>) -> Self {
self.memory_event_tx = Some(tx);
Expand Down Expand Up @@ -589,11 +604,16 @@ impl VectorSearchEngine {

/// 统一意图分析(优先使用 LLM 单次调用,LLM 不可用时使用最小 fallback)
async fn analyze_intent(&self, query: &str) -> Result<EnhancedQueryIntent> {
if let Some(llm) = &self.llm_client {
match self.analyze_intent_with_llm(llm.as_ref(), query).await {
Ok(intent) => return Ok(intent),
Err(e) => warn!("LLM intent analysis failed, using fallback: {}", e),
// Skip LLM call when intent analysis is disabled via config
if self.enable_intent_analysis {
if let Some(llm) = &self.llm_client {
match self.analyze_intent_with_llm(llm.as_ref(), query).await {
Ok(intent) => return Ok(intent),
Err(e) => warn!("LLM intent analysis failed, using fallback: {}", e),
}
}
} else {
debug!("Intent analysis disabled, using raw query directly");
}

// Fallback:LLM 不可用时的基础处理(不含规则判断,仅做基本分词)
Expand Down
1 change: 1 addition & 0 deletions cortex-mem-mcp/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ async fn main() -> Result<()> {
&config.embedding.model_name,
config.qdrant.embedding_dim,
cli.user, // explicit user_id; None → "default" (see MemoryOperations::new)
config.cortex.enable_intent_analysis,
).await?;

let operations = Arc::new(operations);
Expand Down
34 changes: 34 additions & 0 deletions cortex-mem-rig/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,39 @@ pub async fn create_memory_tools_with_tenant_and_vector(
embedding_model_name: &str,
embedding_dim: Option<usize>,
user_id: Option<String>,
) -> Result<MemoryTools, Box<dyn std::error::Error>> {
create_memory_tools_with_config(
data_dir,
tenant_id,
llm_client,
qdrant_url,
qdrant_collection,
qdrant_api_key,
embedding_api_base_url,
embedding_api_key,
embedding_model_name,
embedding_dim,
user_id,
true, // enable_intent_analysis default
).await
}

/// Create memory tools with full features (LLM + Vector Search) and explicit config
///
/// Use this when you want to control intent analysis behaviour from config.
pub async fn create_memory_tools_with_config(
data_dir: impl AsRef<std::path::Path>,
tenant_id: impl Into<String>,
llm_client: Arc<dyn LLMClient>,
qdrant_url: &str,
qdrant_collection: &str,
qdrant_api_key: Option<&str>,
embedding_api_base_url: &str,
embedding_api_key: &str,
embedding_model_name: &str,
embedding_dim: Option<usize>,
user_id: Option<String>,
enable_intent_analysis: bool,
) -> Result<MemoryTools, Box<dyn std::error::Error>> {
let operations = MemoryOperations::new(
data_dir.as_ref().to_str().unwrap(),
Expand All @@ -95,6 +128,7 @@ pub async fn create_memory_tools_with_tenant_and_vector(
embedding_model_name,
embedding_dim,
user_id,
enable_intent_analysis,
)
.await?;
Ok(MemoryTools::new(Arc::new(operations)))
Expand Down
13 changes: 12 additions & 1 deletion cortex-mem-service/src/state.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,9 @@ pub struct AppState {
/// AutomationManager's tx handle — updated on tenant switch so AutomationManager
/// routes VectorSyncNeeded to the correct tenant coordinator.
pub automation_tx_handle: Option<Arc<RwLock<Option<tokio::sync::mpsc::UnboundedSender<MemoryEvent>>>>>,
/// Whether to use LLM intent analysis before each search (from config.toml [cortex] section).
/// When false, raw query is used directly — much faster but no query rewriting.
pub enable_intent_analysis: bool,
}

impl AppState {
Expand All @@ -52,6 +55,11 @@ impl AppState {
// 获取配置(优先从config.toml,否则从环境变量)
let (llm_client, embedding_config, qdrant_config) = Self::load_configs()?;

// 读取 cortex section 配置(enable_intent_analysis 等)
let enable_intent_analysis = cortex_mem_config::Config::load("config.toml")
.map(|c| c.cortex.enable_intent_analysis)
.unwrap_or(true);

// 构建Cortex Memory
let mut builder = CortexMemBuilder::new(&cortex_dir);

Expand Down Expand Up @@ -115,6 +123,7 @@ impl AppState {
engine = engine.with_memory_event_tx(tx.clone());
}
engine = engine.with_index_manager(index_manager.clone());
engine = engine.with_intent_analysis(enable_intent_analysis);
Some(Arc::new(engine))
} else {
None
Expand All @@ -133,6 +142,7 @@ impl AppState {
current_tenant_id: Arc::new(RwLock::new(None)),
memory_event_tx: Arc::new(RwLock::new(memory_event_tx)),
automation_tx_handle: cortex_automation_tx,
enable_intent_analysis,
})
}

Expand Down Expand Up @@ -385,7 +395,8 @@ impl AppState {
)
.with_index_manager(Arc::new(MemoryIndexManager::new(
tenant_filesystem.clone(),
))),
)))
.with_intent_analysis(self.enable_intent_analysis),
);

let mut engine = self.vector_engine.write().await;
Expand Down
4 changes: 3 additions & 1 deletion cortex-mem-tools/src/operations.rs
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ impl MemoryOperations {
embedding_model_name: &str,
embedding_dim: Option<usize>,
user_id: Option<String>,
enable_intent_analysis: bool,
) -> Result<Self> {
let tenant_id = tenant_id.into();
let filesystem = Arc::new(CortexFilesystem::with_tenant(data_dir, &tenant_id));
Expand Down Expand Up @@ -219,7 +220,8 @@ impl MemoryOperations {
llm_client.clone(),
)
.with_memory_event_tx(memory_event_tx.clone())
.with_index_manager(index_manager.clone()),
.with_index_manager(index_manager.clone())
.with_intent_analysis(enable_intent_analysis),
);
tracing::info!("Vector search engine created with LLM, event tracking, and archived filter");

Expand Down
5 changes: 3 additions & 2 deletions examples/cortex-mem-tars/src/agent.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use anyhow::Result;
use chrono::{DateTime, Local};
use cortex_mem_rig::create_memory_tools_with_tenant_and_vector;
use cortex_mem_rig::create_memory_tools_with_config;
use cortex_mem_tools::MemoryOperations;
use futures::StreamExt;
use rig::agent::MultiTurnStreamItem;
Expand Down Expand Up @@ -80,7 +80,7 @@ pub async fn create_memory_agent(
config.embedding.model_name,
config.qdrant.embedding_dim
);
let memory_tools = create_memory_tools_with_tenant_and_vector(
let memory_tools = create_memory_tools_with_config(
data_dir,
agent_id,
cortex_llm_client,
Expand All @@ -92,6 +92,7 @@ pub async fn create_memory_agent(
&config.embedding.model_name,
config.qdrant.embedding_dim,
Some(user_id.to_string()),
config.cortex.enable_intent_analysis,
)
.await?;

Expand Down
1 change: 1 addition & 0 deletions examples/cortex-mem-tars/src/infrastructure.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ impl Infrastructure {
&config.embedding.model_name,
config.qdrant.embedding_dim,
None, // user_id = None,使用tenant_id作为user_id
config.cortex.enable_intent_analysis,
)
.await
.context("Failed to initialize MemoryOperations")?;
Expand Down
Loading