import-ai · ginkgo-daddy · Dec 15, 2025 · Dec 15, 2025 · Dec 15, 2025 · Dec 15, 2025
diff --git a/omnibox_wizard/resources/prompt_templates/ask.j2 b/omnibox_wizard/resources/prompt_templates/ask.j2
@@ -10,6 +10,8 @@ Your name is OmniBox（中文名：小黑）, built by import.ai, responsible fo
 
 You will receive a user's question and are expected to answer it concisely, accurately, and clearly.
 
+IMPORTANT: When you need to call tools, do NOT mention tool names like "get_resources", "private_search", etc.
+
 {% include "user_input_description.j2" %}
 
 {% endif %}
@@ -37,6 +39,7 @@ if you are not sure about user’s request, use user selected tools to search an
 - Your answers must be correct and accurate, written with an expert's tone, and maintain a professional and unbiased style.
 - Do not provide information unrelated to the question, nor repeat content.
 - Except for code, specific names, and citations, your answer must be in user's preference language.
+- NEVER include internal resource IDs (like "mZXFx7HkfEDx6T2A", etc.) in your responses. These IDs are for internal tool calls only. When referring to documents or folders, use their names/titles instead.
 
 {% include "meta_info.j2" %}
 

diff --git a/omnibox_wizard/resources/prompt_templates/tools.j2 b/omnibox_wizard/resources/prompt_templates/tools.j2
@@ -10,4 +10,51 @@ Available system tools are provided with function signatures within <tools></too
 For each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:
 <tool_call>
 {"name": <function-name>, "arguments": <args-json-object>}
-</tool_call>
+</tool_call>
+
+## CRITICAL Tool Calling Rules
+
+1. **ALWAYS include <tool_call> tags when you need information**: If you say "I will call...", "I need to get...", "Let me check...", or similar phrases, you MUST include the actual `<tool_call>` tags in the SAME response. NEVER end your response with an intention to call a tool without actually calling it.
+
+2. **Multi-step tasks require multiple tool calls**: Complex tasks (like summarizing a folder) often need multiple rounds:
+   - First call: Get folder structure (get_children)
+   - After receiving results: Read document contents (get_resources)
+   - Continue until you have all needed information
+
+3. **NEVER stop halfway**: If tool results show you need more information to complete the task, immediately call the next tool. Do not wait for user confirmation.
+
+4. **ALL resource types are readable**: The system has pre-processed all resources (doc, file, link, audio, video). Use `get_resources` to read ANY resource - content has been extracted/transcribed. Do NOT skip resources based on their type.
+
+## Example: Correct Multi-Step Tool Usage
+
+**User**: Summarize the contents of folder f1
+
+**Assistant (Turn 1)**: I'll help you summarize folder f1. Let me first check its contents.
+<tool_call>
+{"name": "get_children", "arguments": {"resource_id": "f1"}}
+</tool_call>
+
+**[Tool returns: folder contains r1 (doc), r2 (file), r3 (link)]**
+
+**Assistant (Turn 2)**: The folder contains 3 resources. Let me read ALL their contents (all types are readable).
+<tool_call>
+{"name": "get_resources", "arguments": {"resource_ids": ["r1", "r2", "r3"]}}
+</tool_call>
+
+**[Tool returns: document contents, transcribed audio, extracted link content]**
+
+**Assistant (Turn 3)**: Based on the contents, here is the summary: [actual summary with citations]
+
+## Common Mistakes to AVOID
+
+❌ WRONG - Ending with intention but no action:
+"I found 3 documents. Let me read them to provide a summary." (NO <tool_call> tags)
+
+❌ WRONG - Skipping resources based on type:
+"I can only read doc types, so I'll skip the audio and link files." (ALL types are readable!)
+
+✅ CORRECT - Always include the tool call and read ALL resources:
+"I found 3 resources (1 doc, 1 audio, 1 link). Let me read all their contents.
+<tool_call>
+{"name": "get_resources", "arguments": {"resource_ids": ["r1", "r2", "r3"]}}
+</tool_call>"
diff --git a/omnibox_wizard/wizard/config.py b/omnibox_wizard/wizard/config.py
@@ -6,6 +6,10 @@
 from wizard_common.config import OpenAIConfig
 
 
+class BackendConfig(BaseModel):
+    base_url: str
+
+
 class VectorConfig(BaseModel):
     embedding: OpenAIConfig
     host: str
@@ -62,10 +66,10 @@ class SearXNGConfig(BaseModel):
     base_url: str
     engines: str | None = Field(default=None)
 
-
 class ToolsConfig(BaseModel):
     searxng: SearXNGConfig
     reranker: RerankerConfig = Field(default_factory=RerankerConfig)
+    resource_api: BackendConfig = Field(default_factory=BackendConfig)
 
 
 class Config(BaseModel):

diff --git a/omnibox_wizard/wizard/grimoire/agent/agent.py b/omnibox_wizard/wizard/grimoire/agent/agent.py
@@ -38,6 +38,8 @@
     Resource,
     ALL_TOOLS,
     PrivateSearchResourceType,
+    BaseResourceTool,
+    RESOURCE_TOOLS,
 )
 from omnibox_wizard.wizard.grimoire.retriever.base import BaseRetriever
 from omnibox_wizard.wizard.grimoire.retriever.meili_vector_db import (
@@ -49,6 +51,16 @@
     Reranker,
 )
 from omnibox_wizard.wizard.grimoire.retriever.searxng import SearXNG
+from omnibox_wizard.wizard.grimoire.client.resource_api import ResourceAPIClient
+from omnibox_wizard.wizard.grimoire.retriever.resource import (
+    BaseResourceHandler,
+    GetResourcesHandler,
+    GetChildrenHandler,
+    GetParentHandler,
+    FilterByTimeHandler,
+    FilterByTagHandler,
+    FilterByKeywordHandler,
+)
 
 DEFAULT_TOOL_NAME: str = "private_search"
 json_dumps = partial(jsonlib.dumps, ensure_ascii=False, separators=(",", ":"))
@@ -174,6 +186,11 @@ def parse_selected_resources(
     @classmethod
     def parse_selected_tools(cls, attrs: MessageAttrs) -> list[str]:
         tools = [tool.name for tool in attrs.tools or []]
+
+        # if private_search is selected，resource tools are automatically available
+        if "private_search" in tools:
+            tools = tools + [t for t in RESOURCE_TOOLS if t not in tools]
+
         return [
             "\n".join(
                 [
@@ -189,17 +206,75 @@ def parse_selected_tools(cls, attrs: MessageAttrs) -> list[str]:
             )
         ]
 
+    @classmethod
+    def parse_visible_resources(
+        cls, options: ChatRequestOptions, original_tools: list | None = None
+    ) -> list[str]:
+        """Parse visible_resources from resource tools and format for LLM context.
+
+        This provides the LLM with a list of available resources and their short IDs,
+        so it knows what folders/documents exist and can use the appropriate tools.
+
+        Args:
+            options: The chat request options (may have serialized tools without visible_resources)
+            original_tools: Original tools list with visible_resources populated (optional)
+        """
+        tools_list = original_tools if original_tools is not None else (options.tools or [])
+        tools = ToolDict(tools_list)
+
+        if tool := tools.get("private_search",None):
+            visible_resources = tool.visible_resources
+        else:
+            return []
+
+        # Separate folders and documents for clarity
+        folders = [r for r in visible_resources if r.type == "folder"]
+        documents = [r for r in visible_resources if r.type == "resource"]
+
+        # Format for LLM with clear guidance
+        lines = [
+            "<available_resources>",
+            "User's available folders and documents:",
+            "",
+        ]
+
+        if folders:
+            lines.append("Folders:")
+            for f in folders:
+                lines.append(f"  - {f.id}: {f.name}")
+
+        if documents:
+            lines.append("")
+            lines.append("Documents:")
+            for d in documents:
+                lines.append(f"  - {d.id}: {d.name}")
+
+        lines.extend([
+            "",
+            "Tool Usage Guide:",
+            "- To see folder contents: get_children(resource_id) e.g., get_children('f1')",
+            "- To read document content: get_resources([resource_ids]) e.g., get_resources(['r1', 'r2'])",
+            "- For time-based queries ('recent', 'this week'): use filter_by_time",
+            "- For tag-based queries: use filter_by_tag",
+            "- private_search is for keyword search across all documents",
+            "</available_resources>",
+        ])
+
+        return ["\n".join(lines)]
+
     @classmethod
     def parse_user_query(
         cls,
         query: str,
         attrs: MessageAttrs,
+        original_tools: list | None = None,
     ) -> str:
         return remove_continuous_break_lines(
             "\n\n".join(
                 [
                     "\n".join(["<query>", query, "</query>"]),
                     *cls.parse_selected_resources(attrs),
+                    *cls.parse_visible_resources(attrs, original_tools=original_tools),
                     *cls.parse_selected_tools(attrs),
                 ]
             )
@@ -217,34 +292,51 @@ def parse_message(cls, message: MessageDto) -> dict:
         return openai_message
 
     @classmethod
-    def parse_context(cls, attrs: MessageAttrs) -> str:
+    def parse_context(
+        cls, attrs: MessageAttrs,
+        original_tools: list | None = None
+    ) -> str:
         return remove_continuous_break_lines(
             "\n\n".join(
                 [
                     *cls.parse_selected_resources(attrs),
+                    *cls.parse_visible_resources(attrs, original_tools=original_tools),
                     *cls.parse_selected_tools(attrs),
                 ]
             )
         )
 
     @classmethod
     def message_dtos_to_openai_messages(
-        cls, dtos: list[MessageDto]
+        cls, dtos: list[MessageDto], original_tools: list | None = None
     ) -> list[dict[str, str]]:
         messages: list[dict[str, str]] = []
 
-        for dto in dtos:
+        # 找到最后一个 user message 的索引
+        last_user_idx = -1
+        for i, dto in enumerate(dtos):
+            if dto.message["role"] == "user":
+                last_user_idx = i
+
+        for i, dto in enumerate(dtos):
             messages.append(dto.message)
-            if dto.message["role"] == "user" and dto.attrs:
+            # 只对最后一个 user message 添加 context
+            if i == last_user_idx and dto.message["role"] == "user" and dto.attrs:
                 messages.append(
-                    {"role": "system", "content": cls.parse_context(dto.attrs)}
+                    {
+                        "role": "system",
+                        "content": cls.parse_context(
+                            dto.attrs, original_tools=original_tools
+                        ),
+                    }
                 )
 
         return messages
 
 
 class BaseSearchableAgent(BaseStreamable, ABC):
     def __init__(self, config: Config):
+        # Search tools (existing)
         self.knowledge_database_retriever = MeiliVectorRetriever(config=config.vector)
         self.web_search_retriever = SearXNG(
             base_url=config.tools.searxng.base_url, engines=config.tools.searxng.engines
@@ -257,38 +349,60 @@ def __init__(self, config: Config):
             for each in [self.knowledge_database_retriever, self.web_search_retriever]
         }
 
+        # Resource tools (new)
+        self.resource_api_client = ResourceAPIClient(config.tools.resource_api)
+        self.resource_handlers: dict[str, BaseResourceHandler] = {
+            "get_resources": GetResourcesHandler(self.resource_api_client),
+            "get_children": GetChildrenHandler(self.resource_api_client),
+            "get_parent": GetParentHandler(self.resource_api_client),
+            "filter_by_time": FilterByTimeHandler(self.resource_api_client),
+            "filter_by_tag": FilterByTagHandler(self.resource_api_client),
+            "filter_by_keyword": FilterByKeywordHandler(self.resource_api_client),
+        }
+
+        # Combine all tool schemas
         self.all_tools: list[dict] = [
             retriever.get_schema() for retriever in self.retriever_mapping.values()
-        ]
-        assert all(t in self.retriever_mapping for t in ALL_TOOLS), (
-            "All tools must be registered in retriever mapping."
-        )
+        ] + [handler.get_schema() for handler in self.resource_handlers.values()]
 
     def get_tool_executor(
         self,
         options: ChatRequestOptions,
         trace_info: TraceInfo,
         wrap_reranker: bool = True,
     ) -> ToolExecutor:
-        tool_executor_config_list: list[ToolExecutorConfig] = [
-            self.retriever_mapping[tool.name].get_tool_executor_config(
-                tool, trace_info=trace_info.get_child(tool.name)
-            )
-            for tool in options.tools or []
-        ]
+        search_tool_config_list: list[ToolExecutorConfig] = []
+        resource_tool_config_list: list[ToolExecutorConfig] = []
+
+        for tool in options.tools or []:
+            if tool.name in self.retriever_mapping:
+                # Search tools
+                config = self.retriever_mapping[tool.name].get_tool_executor_config(
+                    tool, trace_info=trace_info.get_child(tool.name)
+                )
+                search_tool_config_list.append(config)
+            elif tool.name in self.resource_handlers:
+                # Resource tools
+                config = self.resource_handlers[tool.name].get_tool_executor_config(
+                    tool, trace_info=trace_info.get_child(tool.name)
+                )
+                resource_tool_config_list.append(config)
 
-        if options.merge_search:
-            tool_executor_config_list = [
-                get_tool_executor_config(tool_executor_config_list, self.reranker)
+        # Apply reranker only to search tools
+        if options.merge_search and search_tool_config_list:
+            search_tool_config_list = [
+                get_tool_executor_config(search_tool_config_list, self.reranker)
             ]
         elif wrap_reranker:
-            for tool_executor_config in tool_executor_config_list:
-                tool_executor_config["func"] = self.reranker.wrap(
-                    func=tool_executor_config["func"],
+            for tool_config in search_tool_config_list:
+                tool_config["func"] = self.reranker.wrap(
+                    func=tool_config["func"],
                     trace_info=trace_info.get_child("reranker"),
                 )
 
-        tool_executor_config: dict = {c["name"]: c for c in tool_executor_config_list}
+        # Combine all tool configs
+        all_tool_config_list = search_tool_config_list + resource_tool_config_list
+        tool_executor_config: dict = {c["name"]: c for c in all_tool_config_list}
         tool_executor = ToolExecutor(tool_executor_config)
         return tool_executor
 
@@ -534,6 +648,8 @@ async def astream(
                             exclude_none=True, exclude={"conversation_id"}
                         )
                     ),
+                    "all_tools": f"{self.all_tools}",
+                    "custom_tool_call": f"{self.custom_tool_call}"
                 }
             )
             trace_info.info({"request": agent_request.model_dump(exclude_none=True)})
@@ -549,7 +665,6 @@ async def astream(
                             "search", get_merged_description(all_tools)
                         )
                     ]
-
                 assert all_tools, "all_tools must not be empty"
 
                 if self.custom_tool_call:
@@ -602,7 +717,7 @@ async def astream(
             while messages[-1].message["role"] != "assistant":
                 async for chunk in self.chat(
                     messages=UserQueryPreprocessor.message_dtos_to_openai_messages(
-                        messages
+                        messages, original_tools=agent_request.tools
                     ),
                     enable_thinking=agent_request.enable_thinking,
                     tools=tool_executor.tools,

diff --git a/omnibox_wizard/wizard/grimoire/agent/ask.py b/omnibox_wizard/wizard/grimoire/agent/ask.py
@@ -1,7 +1,4 @@
-from omnibox_wizard.wizard.config import Config
-from omnibox_wizard.wizard.grimoire.agent.agent import Agent
+from omnibox_wizard.wizard.grimoire.agent.ask_langgraph import AskLangGraph
 
-
-class Ask(Agent):
-    def __init__(self, config: Config):
-        super().__init__(config=config, system_prompt_template_name="ask.j2")
+# Use LangGraph implementation
+Ask = AskLangGraph