perf(agent): eliminate coordinator re-delegation waste — 50%+ latency reduction

osvalois · claude · osvalois · commit c199a2f5f5cc · 2026-02-23T11:17:07.000-06:00
Root cause (Minecraft benchmark): after sub-agent successfully executed file_write
(134s), the coordinator made a redundant R2 API call (131s) that re-called file_write,
followed by a 45s permission timeout. Total waste: 176s = 51% of 343s session.

Fixes applied:

1. mod.rs — Remove delegation-completed tools from coordinator's cached_tools
   After sub-agent results are recorded, tools successfully executed by sub-agents
   (file_write, bash, etc.) are removed from coordinator's cached_tools. The model
   physically cannot call them again — eliminates the hallucination at protocol level.
   Previous approach (synthetic "Task completed" message) was insufficient; deepseek-chat
   ignored it and still called file_write with 6,196 output tokens.

2. mod.rs — Anti-re-delegation warning in sub-agent result injection
   When file_write/bash/patch_apply were executed by sub-agents, inject explicit
   CRITICAL warning into coordinator context: "do NOT call these tools again".
   Belt-and-suspenders with Fix 1.

3. post_batch.rs — Force no-tools when only synthesis steps remain
   After delegation, if all pending plan steps have no tool_name (synthesis-only),
   suppress tools for coordinator's next round via tool_decision.set_force_next().
   Prevents the synthesis round from offering tools the model might hallucinate.

Expected timing improvement (FileManagement/single-file tasks):
  Before: ~343s (sub-agent 138s + coordinator R2 131s + permission 45s + overhead)
  After:  ~150s (sub-agent 138s + synthesis 5s + overhead)
  Reduction: ~56%

3404 tests pass.

Co-Authored-By: Claude Sonnet 4.6 &lt;noreply@anthropic.com&gt;
diff --git a/crates/halcon-cli/src/repl/agent/mod.rs b/crates/halcon-cli/src/repl/agent/mod.rs
@@ -655,7 +655,7 @@ pub async fn run_agent_loop(ctx: AgentContext<'_>) -> Result<AgentLoopResult> {
     // Conversational intent (greetings, simple Q&A) returns vec![] — the model responds
     // directly in 1 round without any tool call overhead.
     let is_conversational_intent;
-    let cached_tools = {
+    let mut cached_tools = {
         let all_tools = request.tools.clone();
         let tool_selector = super::tool_selector::ToolSelector::new(
             tool_selection_enabled,
@@ -939,13 +939,49 @@ pub async fn run_agent_loop(ctx: AgentContext<'_>) -> Result<AgentLoopResult> {
                     })
                     .collect();
 
+                // Collect tools successfully executed by sub-agents before injection.
+                // Used below to (1) add an anti-re-delegation warning and (2) remove those
+                // tools from the coordinator's cached_tools so the model cannot call them
+                // again even if it hallucinates — root cause of the 131s wasted R2 round.
+                let delegated_ok_tools: Vec<String> = orch_result.sub_results
+                    .iter()
+                    .filter(|r| r.success)
+                    .flat_map(|r| r.agent_result.tools_used.iter().cloned())
+                    .collect();
+
                 if !sub_outputs.is_empty() {
                     render_sink.loop_guard_action(
                         "sub_agent_results",
                         &format!("{} sub-agent outputs collected — injecting into coordinator context", sub_outputs.len()),
                     );
+
+                    // FIX: When destructive tools (file_write, bash) were already executed by
+                    // sub-agents, inject a strong directive so the coordinator does NOT re-execute
+                    // them. Without this, deepseek-chat hallucinates a second file_write call
+                    // containing the full file content (~6K tokens, ~131s) even after the
+                    // sub-agent already wrote the file. This was the #1 source of wasted time
+                    // (176s = 51% of total session duration in the Minecraft benchmark).
+                    let anti_redo_note = if delegated_ok_tools.iter().any(|t| {
+                        matches!(t.as_str(), "file_write" | "bash" | "shell" | "patch_apply")
+                    }) {
+                        format!(
+                            "\n⚠️  CRITICAL: The following tools were already executed by sub-agents \
+                             and must NOT be called again: [{}]. \
+                             Your ONLY task now is to synthesize the results and confirm to the user \
+                             what was created. Do NOT regenerate or re-write any files.\n",
+                            delegated_ok_tools
+                                .iter()
+                                .filter(|t| matches!(t.as_str(), "file_write" | "bash" | "shell" | "patch_apply"))
+                                .cloned()
+                                .collect::<Vec<_>>()
+                                .join(", ")
+                        )
+                    } else {
+                        String::new()
+                    };
+
                     let results_context = format!(
-                        "[Sub-Agent Results — please synthesize these into your final response]\n\n{}\n",
+                        "[Sub-Agent Results — please synthesize these into your final response]{anti_redo_note}\n\n{}\n",
                         sub_outputs.join("\n\n")
                     );
                     messages.push(ChatMessage {
@@ -987,6 +1023,39 @@ pub async fn run_agent_loop(ctx: AgentContext<'_>) -> Result<AgentLoopResult> {
                     elapsed,
                 );
 
+                // FIX: Remove delegation-completed tools from coordinator's cached_tools.
+                // Prevents the coordinator from calling a tool (e.g. file_write) that a
+                // sub-agent already executed. In the Minecraft benchmark, deepseek-chat
+                // ignored the "don't redo" injection and called file_write a second time
+                // (131s wasted API + 45s permission timeout = 176s = 51% of total time).
+                // By removing the tool from the tool list, the model physically cannot
+                // call it — eliminating the hallucination at the protocol level.
+                if !delegated_ok_tools.is_empty() {
+                    let before = cached_tools.len();
+                    cached_tools.retain(|t| !delegated_ok_tools.contains(&t.name));
+                    let removed = before - cached_tools.len();
+                    if removed > 0 {
+                        tracing::info!(
+                            removed_tools = ?delegated_ok_tools,
+                            removed_count = removed,
+                            remaining_tools = cached_tools.len(),
+                            "Post-delegation: removed completed tools from coordinator tool list"
+                        );
+                        if !silent {
+                            render_sink.info(&format!(
+                                "[post-delegation] removed {} completed tool(s) from coordinator list: [{}]",
+                                removed,
+                                delegated_ok_tools
+                                    .iter()
+                                    .filter(|n| !cached_tools.iter().any(|ct| &ct.name == *n))
+                                    .cloned()
+                                    .collect::<Vec<_>>()
+                                    .join(", ")
+                            ));
+                        }
+                    }
+                }
+
                 let delegated_count = matched.len();
                 if delegated_count > 0 {
                     tracing::info!(delegated_count, "Steps delegated to sub-agents");
diff --git a/crates/halcon-cli/src/repl/agent/post_batch.rs b/crates/halcon-cli/src/repl/agent/post_batch.rs
@@ -381,6 +381,33 @@ pub(super) async fn run(
                 state.loop_guard.force_synthesis();
             }
 
+            // FIX: When all remaining non-terminal steps have no tool_name (i.e. only
+            // synthesis/confirmation steps remain), force no-tools for the next round.
+            // Without this, the coordinator makes an API call with all tools available
+            // and the model may hallucinate a tool call (e.g. re-calling file_write)
+            // instead of just synthesizing. This saves one full API round (~131s for
+            // large file generation tasks like the Minecraft benchmark).
+            {
+                let plan = tracker.plan();
+                let pending_are_all_synthesis = plan.steps.iter()
+                    .enumerate()
+                    .filter(|(_, s)| {
+                        // A step is still "active" if its outcome is None (not yet completed).
+                        s.outcome.is_none()
+                    })
+                    .all(|(_, s)| s.tool_name.is_none());
+
+                if pending_are_all_synthesis && !plan.steps.is_empty()
+                    && plan.steps.iter().any(|s| s.outcome.is_none())
+                {
+                    tracing::info!(
+                        "All remaining plan steps are synthesis-only (no tool_name) — \
+                         suppressing tools for coordinator synthesis round"
+                    );
+                    state.tool_decision.set_force_next();
+                }
+            }
+
             // Planning V3: Early convergence check after each tool round.
             // Computes progress_delta vs previous round to detect diminishing returns.
             let (completed, total, _) = tracker.progress();