diff --git a/cortex-mem-cli/src/main.rs b/cortex-mem-cli/src/main.rs
index be681b1..6e77563 100644
--- a/cortex-mem-cli/src/main.rs
+++ b/cortex-mem-cli/src/main.rs
@@ -241,6 +241,7 @@ async fn main() -> Result<()> {
         &config.embedding.model_name,
         config.qdrant.embedding_dim,
         None,  // user_id parameter
+        config.cortex.enable_intent_analysis,
     )
     .await?;
 
diff --git a/cortex-mem-config/src/lib.rs b/cortex-mem-config/src/lib.rs
index c47b6b7..359e0a8 100644
--- a/cortex-mem-config/src/lib.rs
+++ b/cortex-mem-config/src/lib.rs
@@ -20,6 +20,26 @@ pub struct CortexConfig {
     /// If not specified, will use system application data directory
     #[serde(default)]
     pub data_dir: Option<String>,
+
+    /// Whether to enable LLM-based intent analysis before vector search.
+    ///
+    /// When enabled (default), each search call makes an LLM request to:
+    ///   1. Rewrite the query for better vector matching
+    ///   2. Detect intent type (entity_lookup / factual / temporal / search / ...)
+    ///   3. Dynamically tune L0 threshold and L0/L1/L2 scoring weights
+    ///
+    /// Disable this (`enable_intent_analysis = false`) to skip the LLM call
+    /// and use the raw query directly.  Vector search latency drops from ~15-25s
+    /// to < 500ms; recall quality is slightly lower without query rewriting.
+    ///
+    /// Recommended: `false` for latency-sensitive interactive use (e.g. chat plugins),
+    ///              `true` for batch / offline recall where quality matters most.
+    #[serde(default = "default_enable_intent_analysis")]
+    pub enable_intent_analysis: bool,
+}
+
+fn default_enable_intent_analysis() -> bool {
+    true
 }
 
 impl CortexConfig {
@@ -135,6 +155,7 @@ impl Default for CortexConfig {
     fn default() -> Self {
         CortexConfig {
             data_dir: None,  // Use None to trigger smart default
+            enable_intent_analysis: true,
         }
     }
 }
diff --git a/cortex-mem-core/src/search/vector_engine.rs b/cortex-mem-core/src/search/vector_engine.rs
index 6cfbf2e..ce57156 100644
--- a/cortex-mem-core/src/search/vector_engine.rs
+++ b/cortex-mem-core/src/search/vector_engine.rs
@@ -64,6 +64,10 @@ pub struct VectorSearchEngine {
     memory_event_tx: Option<mpsc::UnboundedSender<MemoryEvent>>,
     /// Optional index manager for archived-memory filtering
     index_manager: Option<Arc<MemoryIndexManager>>,
+    /// Whether to call the LLM for intent analysis before each search.
+    /// When `false`, the raw query is used directly (skips rewriting/threshold tuning).
+    /// Default: `true`.
+    enable_intent_analysis: bool,
 }
 
 impl VectorSearchEngine {
@@ -80,6 +84,7 @@ impl VectorSearchEngine {
             llm_client: None,
             memory_event_tx: None,
             index_manager: None,
+            enable_intent_analysis: true,
         }
     }
 
@@ -97,9 +102,19 @@ impl VectorSearchEngine {
             llm_client: Some(llm_client),
             memory_event_tx: None,
             index_manager: None,
+            enable_intent_analysis: true,
         }
     }
 
+    /// Control whether LLM intent analysis is performed before each search.
+    ///
+    /// Set to `false` to skip the LLM round-trip and use the raw query directly.
+    /// Reduces search latency from ~15-25s to <500ms at the cost of no query rewriting.
+    pub fn with_intent_analysis(mut self, enabled: bool) -> Self {
+        self.enable_intent_analysis = enabled;
+        self
+    }
+
     /// Set the memory event sender for access tracking (enables forgetting mechanism)
     pub fn with_memory_event_tx(mut self, tx: mpsc::UnboundedSender<MemoryEvent>) -> Self {
         self.memory_event_tx = Some(tx);
@@ -589,11 +604,16 @@ impl VectorSearchEngine {
 
     /// 统一意图分析（优先使用 LLM 单次调用，LLM 不可用时使用最小 fallback）
     async fn analyze_intent(&self, query: &str) -> Result<EnhancedQueryIntent> {
-        if let Some(llm) = &self.llm_client {
-            match self.analyze_intent_with_llm(llm.as_ref(), query).await {
-                Ok(intent) => return Ok(intent),
-                Err(e) => warn!("LLM intent analysis failed, using fallback: {}", e),
+        // Skip LLM call when intent analysis is disabled via config
+        if self.enable_intent_analysis {
+            if let Some(llm) = &self.llm_client {
+                match self.analyze_intent_with_llm(llm.as_ref(), query).await {
+                    Ok(intent) => return Ok(intent),
+                    Err(e) => warn!("LLM intent analysis failed, using fallback: {}", e),
+                }
             }
+        } else {
+            debug!("Intent analysis disabled, using raw query directly");
         }
 
         // Fallback：LLM 不可用时的基础处理（不含规则判断，仅做基本分词）
diff --git a/cortex-mem-mcp/src/main.rs b/cortex-mem-mcp/src/main.rs
index b72879d..289a2a6 100644
--- a/cortex-mem-mcp/src/main.rs
+++ b/cortex-mem-mcp/src/main.rs
@@ -98,6 +98,7 @@ async fn main() -> Result<()> {
         &config.embedding.model_name,
         config.qdrant.embedding_dim,
         cli.user,  // explicit user_id; None → "default" (see MemoryOperations::new)
+        config.cortex.enable_intent_analysis,
     ).await?;
     
     let operations = Arc::new(operations);
diff --git a/cortex-mem-rig/src/lib.rs b/cortex-mem-rig/src/lib.rs
index 7d5210d..adc4603 100644
--- a/cortex-mem-rig/src/lib.rs
+++ b/cortex-mem-rig/src/lib.rs
@@ -82,6 +82,39 @@ pub async fn create_memory_tools_with_tenant_and_vector(
     embedding_model_name: &str,
     embedding_dim: Option<usize>,
     user_id: Option<String>,
+) -> Result<MemoryTools, Box<dyn std::error::Error>> {
+    create_memory_tools_with_config(
+        data_dir,
+        tenant_id,
+        llm_client,
+        qdrant_url,
+        qdrant_collection,
+        qdrant_api_key,
+        embedding_api_base_url,
+        embedding_api_key,
+        embedding_model_name,
+        embedding_dim,
+        user_id,
+        true, // enable_intent_analysis default
+    ).await
+}
+
+/// Create memory tools with full features (LLM + Vector Search) and explicit config
+///
+/// Use this when you want to control intent analysis behaviour from config.
+pub async fn create_memory_tools_with_config(
+    data_dir: impl AsRef<std::path::Path>,
+    tenant_id: impl Into<String>,
+    llm_client: Arc<dyn LLMClient>,
+    qdrant_url: &str,
+    qdrant_collection: &str,
+    qdrant_api_key: Option<&str>,
+    embedding_api_base_url: &str,
+    embedding_api_key: &str,
+    embedding_model_name: &str,
+    embedding_dim: Option<usize>,
+    user_id: Option<String>,
+    enable_intent_analysis: bool,
 ) -> Result<MemoryTools, Box<dyn std::error::Error>> {
     let operations = MemoryOperations::new(
         data_dir.as_ref().to_str().unwrap(),
@@ -95,6 +128,7 @@ pub async fn create_memory_tools_with_tenant_and_vector(
         embedding_model_name,
         embedding_dim,
         user_id,
+        enable_intent_analysis,
     )
     .await?;
     Ok(MemoryTools::new(Arc::new(operations)))
diff --git a/cortex-mem-service/src/state.rs b/cortex-mem-service/src/state.rs
index 59e4b1e..6e1383b 100644
--- a/cortex-mem-service/src/state.rs
+++ b/cortex-mem-service/src/state.rs
@@ -37,6 +37,9 @@ pub struct AppState {
     /// AutomationManager's tx handle — updated on tenant switch so AutomationManager
     /// routes VectorSyncNeeded to the correct tenant coordinator.
     pub automation_tx_handle: Option<Arc<RwLock<Option<tokio::sync::mpsc::UnboundedSender<MemoryEvent>>>>>,
+    /// Whether to use LLM intent analysis before each search (from config.toml [cortex] section).
+    /// When false, raw query is used directly — much faster but no query rewriting.
+    pub enable_intent_analysis: bool,
 }
 
 impl AppState {
@@ -52,6 +55,11 @@ impl AppState {
         // 获取配置（优先从config.toml，否则从环境变量）
         let (llm_client, embedding_config, qdrant_config) = Self::load_configs()?;
 
+        // 读取 cortex section 配置（enable_intent_analysis 等）
+        let enable_intent_analysis = cortex_mem_config::Config::load("config.toml")
+            .map(|c| c.cortex.enable_intent_analysis)
+            .unwrap_or(true);
+
         // 构建Cortex Memory
         let mut builder = CortexMemBuilder::new(&cortex_dir);
 
@@ -115,6 +123,7 @@ impl AppState {
                 engine = engine.with_memory_event_tx(tx.clone());
             }
             engine = engine.with_index_manager(index_manager.clone());
+            engine = engine.with_intent_analysis(enable_intent_analysis);
             Some(Arc::new(engine))
         } else {
             None
@@ -133,6 +142,7 @@ impl AppState {
             current_tenant_id: Arc::new(RwLock::new(None)),
             memory_event_tx: Arc::new(RwLock::new(memory_event_tx)),
             automation_tx_handle: cortex_automation_tx,
+            enable_intent_analysis,
         })
     }
 
@@ -385,7 +395,8 @@ impl AppState {
                         )
                         .with_index_manager(Arc::new(MemoryIndexManager::new(
                             tenant_filesystem.clone(),
-                        ))),
+                        )))
+                        .with_intent_analysis(self.enable_intent_analysis),
                     );
 
                     let mut engine = self.vector_engine.write().await;
diff --git a/cortex-mem-tools/src/operations.rs b/cortex-mem-tools/src/operations.rs
index c6eaa0d..9188932 100644
--- a/cortex-mem-tools/src/operations.rs
+++ b/cortex-mem-tools/src/operations.rs
@@ -109,6 +109,7 @@ impl MemoryOperations {
         embedding_model_name: &str,
         embedding_dim: Option<usize>,
         user_id: Option<String>,
+        enable_intent_analysis: bool,
     ) -> Result<Self> {
         let tenant_id = tenant_id.into();
         let filesystem = Arc::new(CortexFilesystem::with_tenant(data_dir, &tenant_id));
@@ -219,7 +220,8 @@ impl MemoryOperations {
                 llm_client.clone(),
             )
             .with_memory_event_tx(memory_event_tx.clone())
-            .with_index_manager(index_manager.clone()),
+            .with_index_manager(index_manager.clone())
+            .with_intent_analysis(enable_intent_analysis),
         );
         tracing::info!("Vector search engine created with LLM, event tracking, and archived filter");
 
diff --git a/examples/cortex-mem-tars/src/agent.rs b/examples/cortex-mem-tars/src/agent.rs
index 780d2b9..1f699d9 100644
--- a/examples/cortex-mem-tars/src/agent.rs
+++ b/examples/cortex-mem-tars/src/agent.rs
@@ -1,6 +1,6 @@
 use anyhow::Result;
 use chrono::{DateTime, Local};
-use cortex_mem_rig::create_memory_tools_with_tenant_and_vector;
+use cortex_mem_rig::create_memory_tools_with_config;
 use cortex_mem_tools::MemoryOperations;
 use futures::StreamExt;
 use rig::agent::MultiTurnStreamItem;
@@ -80,7 +80,7 @@ pub async fn create_memory_agent(
         config.embedding.model_name,
         config.qdrant.embedding_dim
     );
-    let memory_tools = create_memory_tools_with_tenant_and_vector(
+    let memory_tools = create_memory_tools_with_config(
         data_dir,
         agent_id,
         cortex_llm_client,
@@ -92,6 +92,7 @@ pub async fn create_memory_agent(
         &config.embedding.model_name,
         config.qdrant.embedding_dim,
         Some(user_id.to_string()),
+        config.cortex.enable_intent_analysis,
     )
     .await?;
 
diff --git a/examples/cortex-mem-tars/src/infrastructure.rs b/examples/cortex-mem-tars/src/infrastructure.rs
index fc1e1fb..3cf321b 100644
--- a/examples/cortex-mem-tars/src/infrastructure.rs
+++ b/examples/cortex-mem-tars/src/infrastructure.rs
@@ -46,6 +46,7 @@ impl Infrastructure {
             &config.embedding.model_name,
             config.qdrant.embedding_dim,
             None,  // user_id = None，使用tenant_id作为user_id
+            config.cortex.enable_intent_analysis,
         )
         .await
         .context("Failed to initialize MemoryOperations")?;