Skip to content

Commit c199a2f

Browse files
osvaloisclaude
andcommitted
perf(agent): eliminate coordinator re-delegation waste — 50%+ latency reduction
Root cause (Minecraft benchmark): after sub-agent successfully executed file_write (134s), the coordinator made a redundant R2 API call (131s) that re-called file_write, followed by a 45s permission timeout. Total waste: 176s = 51% of 343s session. Fixes applied: 1. mod.rs — Remove delegation-completed tools from coordinator's cached_tools After sub-agent results are recorded, tools successfully executed by sub-agents (file_write, bash, etc.) are removed from coordinator's cached_tools. The model physically cannot call them again — eliminates the hallucination at protocol level. Previous approach (synthetic "Task completed" message) was insufficient; deepseek-chat ignored it and still called file_write with 6,196 output tokens. 2. mod.rs — Anti-re-delegation warning in sub-agent result injection When file_write/bash/patch_apply were executed by sub-agents, inject explicit CRITICAL warning into coordinator context: "do NOT call these tools again". Belt-and-suspenders with Fix 1. 3. post_batch.rs — Force no-tools when only synthesis steps remain After delegation, if all pending plan steps have no tool_name (synthesis-only), suppress tools for coordinator's next round via tool_decision.set_force_next(). Prevents the synthesis round from offering tools the model might hallucinate. Expected timing improvement (FileManagement/single-file tasks): Before: ~343s (sub-agent 138s + coordinator R2 131s + permission 45s + overhead) After: ~150s (sub-agent 138s + synthesis 5s + overhead) Reduction: ~56% 3404 tests pass. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent 2e33dd1 commit c199a2f

2 files changed

Lines changed: 98 additions & 2 deletions

File tree

crates/halcon-cli/src/repl/agent/mod.rs

Lines changed: 71 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -655,7 +655,7 @@ pub async fn run_agent_loop(ctx: AgentContext<'_>) -> Result<AgentLoopResult> {
655655
// Conversational intent (greetings, simple Q&A) returns vec![] — the model responds
656656
// directly in 1 round without any tool call overhead.
657657
let is_conversational_intent;
658-
let cached_tools = {
658+
let mut cached_tools = {
659659
let all_tools = request.tools.clone();
660660
let tool_selector = super::tool_selector::ToolSelector::new(
661661
tool_selection_enabled,
@@ -939,13 +939,49 @@ pub async fn run_agent_loop(ctx: AgentContext<'_>) -> Result<AgentLoopResult> {
939939
})
940940
.collect();
941941

942+
// Collect tools successfully executed by sub-agents before injection.
943+
// Used below to (1) add an anti-re-delegation warning and (2) remove those
944+
// tools from the coordinator's cached_tools so the model cannot call them
945+
// again even if it hallucinates — root cause of the 131s wasted R2 round.
946+
let delegated_ok_tools: Vec<String> = orch_result.sub_results
947+
.iter()
948+
.filter(|r| r.success)
949+
.flat_map(|r| r.agent_result.tools_used.iter().cloned())
950+
.collect();
951+
942952
if !sub_outputs.is_empty() {
943953
render_sink.loop_guard_action(
944954
"sub_agent_results",
945955
&format!("{} sub-agent outputs collected — injecting into coordinator context", sub_outputs.len()),
946956
);
957+
958+
// FIX: When destructive tools (file_write, bash) were already executed by
959+
// sub-agents, inject a strong directive so the coordinator does NOT re-execute
960+
// them. Without this, deepseek-chat hallucinates a second file_write call
961+
// containing the full file content (~6K tokens, ~131s) even after the
962+
// sub-agent already wrote the file. This was the #1 source of wasted time
963+
// (176s = 51% of total session duration in the Minecraft benchmark).
964+
let anti_redo_note = if delegated_ok_tools.iter().any(|t| {
965+
matches!(t.as_str(), "file_write" | "bash" | "shell" | "patch_apply")
966+
}) {
967+
format!(
968+
"\n⚠️ CRITICAL: The following tools were already executed by sub-agents \
969+
and must NOT be called again: [{}]. \
970+
Your ONLY task now is to synthesize the results and confirm to the user \
971+
what was created. Do NOT regenerate or re-write any files.\n",
972+
delegated_ok_tools
973+
.iter()
974+
.filter(|t| matches!(t.as_str(), "file_write" | "bash" | "shell" | "patch_apply"))
975+
.cloned()
976+
.collect::<Vec<_>>()
977+
.join(", ")
978+
)
979+
} else {
980+
String::new()
981+
};
982+
947983
let results_context = format!(
948-
"[Sub-Agent Results — please synthesize these into your final response]\n\n{}\n",
984+
"[Sub-Agent Results — please synthesize these into your final response]{anti_redo_note}\n\n{}\n",
949985
sub_outputs.join("\n\n")
950986
);
951987
messages.push(ChatMessage {
@@ -987,6 +1023,39 @@ pub async fn run_agent_loop(ctx: AgentContext<'_>) -> Result<AgentLoopResult> {
9871023
elapsed,
9881024
);
9891025

1026+
// FIX: Remove delegation-completed tools from coordinator's cached_tools.
1027+
// Prevents the coordinator from calling a tool (e.g. file_write) that a
1028+
// sub-agent already executed. In the Minecraft benchmark, deepseek-chat
1029+
// ignored the "don't redo" injection and called file_write a second time
1030+
// (131s wasted API + 45s permission timeout = 176s = 51% of total time).
1031+
// By removing the tool from the tool list, the model physically cannot
1032+
// call it — eliminating the hallucination at the protocol level.
1033+
if !delegated_ok_tools.is_empty() {
1034+
let before = cached_tools.len();
1035+
cached_tools.retain(|t| !delegated_ok_tools.contains(&t.name));
1036+
let removed = before - cached_tools.len();
1037+
if removed > 0 {
1038+
tracing::info!(
1039+
removed_tools = ?delegated_ok_tools,
1040+
removed_count = removed,
1041+
remaining_tools = cached_tools.len(),
1042+
"Post-delegation: removed completed tools from coordinator tool list"
1043+
);
1044+
if !silent {
1045+
render_sink.info(&format!(
1046+
"[post-delegation] removed {} completed tool(s) from coordinator list: [{}]",
1047+
removed,
1048+
delegated_ok_tools
1049+
.iter()
1050+
.filter(|n| !cached_tools.iter().any(|ct| &ct.name == *n))
1051+
.cloned()
1052+
.collect::<Vec<_>>()
1053+
.join(", ")
1054+
));
1055+
}
1056+
}
1057+
}
1058+
9901059
let delegated_count = matched.len();
9911060
if delegated_count > 0 {
9921061
tracing::info!(delegated_count, "Steps delegated to sub-agents");

crates/halcon-cli/src/repl/agent/post_batch.rs

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -381,6 +381,33 @@ pub(super) async fn run(
381381
state.loop_guard.force_synthesis();
382382
}
383383

384+
// FIX: When all remaining non-terminal steps have no tool_name (i.e. only
385+
// synthesis/confirmation steps remain), force no-tools for the next round.
386+
// Without this, the coordinator makes an API call with all tools available
387+
// and the model may hallucinate a tool call (e.g. re-calling file_write)
388+
// instead of just synthesizing. This saves one full API round (~131s for
389+
// large file generation tasks like the Minecraft benchmark).
390+
{
391+
let plan = tracker.plan();
392+
let pending_are_all_synthesis = plan.steps.iter()
393+
.enumerate()
394+
.filter(|(_, s)| {
395+
// A step is still "active" if its outcome is None (not yet completed).
396+
s.outcome.is_none()
397+
})
398+
.all(|(_, s)| s.tool_name.is_none());
399+
400+
if pending_are_all_synthesis && !plan.steps.is_empty()
401+
&& plan.steps.iter().any(|s| s.outcome.is_none())
402+
{
403+
tracing::info!(
404+
"All remaining plan steps are synthesis-only (no tool_name) — \
405+
suppressing tools for coordinator synthesis round"
406+
);
407+
state.tool_decision.set_force_next();
408+
}
409+
}
410+
384411
// Planning V3: Early convergence check after each tool round.
385412
// Computes progress_delta vs previous round to detect diminishing returns.
386413
let (completed, total, _) = tracker.progress();

0 commit comments

Comments
 (0)