Skip to content

Commit f5892ce

Browse files
osvaloisclaude
andcommitted
fix: eliminate silent failures — add observability to critical data paths
FSM transitions (loop_driver.rs): - Replace 9x `let _ = fsm.transition()` with `if let Err(e) = ... { debug!() }` - State machine violations are now visible in traces instead of silently ignored - Critical for debugging agent loops that stall or produce incorrect results Desktop UI (chat.rs): - Permission approval/denial: try_send failure now logs error!() (security-critical: dropped permission decisions could hang executor) - Chat message send: try_send failure logs error!() and resets streaming state (prevents ghost "sending" state when message is actually dropped) - Cancellation: try_send failure now logs warn!() API permission handler (chat.rs): - Permission decision forwarding now logs error!() when channel is closed (was silently dropping approve/deny decisions) Event bus (bus.rs): - Broadcast with zero subscribers now logs at trace level (was completely silent) Strategy/Memory persistence (loop_driver.rs): - Serialization failures now log warn!() before converting to None (was silently discarding strategy learning and memory data) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 8697d73 commit f5892ce

4 files changed

Lines changed: 39 additions & 20 deletions

File tree

crates/halcon-agent-core/src/loop_driver.rs

Lines changed: 17 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -276,7 +276,7 @@ pub async fn run_gdem_loop(user_message: &str, mut ctx: GdemContext) -> Result<G
276276

277277
// ── Transition to Executing ────────────────────────────────────────
278278
if !matches!(fsm.state(), AgentState::Executing) {
279-
let _ = fsm.transition(AgentState::Executing);
279+
if let Err(e) = fsm.transition(AgentState::Executing) { tracing::debug!(error = %e, "FSM transition to Executing failed"); }
280280
}
281281

282282
// ── Get current plan step ─────────────────────────────────────────
@@ -369,7 +369,7 @@ pub async fn run_gdem_loop(user_message: &str, mut ctx: GdemContext) -> Result<G
369369
}
370370

371371
// ── L4: Verify ────────────────────────────────────────────────────
372-
let _ = fsm.transition(AgentState::Verifying);
372+
if let Err(e) = fsm.transition(AgentState::Verifying) { tracing::debug!(error = %e, "FSM transition to Verifying failed"); }
373373
let verifier_decision = verifier.verify(&evidence);
374374
let post_confidence = verifier_decision.confidence().unwrap_or(pre_confidence);
375375

@@ -387,7 +387,7 @@ pub async fn run_gdem_loop(user_message: &str, mut ctx: GdemContext) -> Result<G
387387
confidence = post_confidence,
388388
"Goal ACHIEVED — exiting loop"
389389
);
390-
let _ = fsm.transition(AgentState::Converged);
390+
if let Err(e) = fsm.transition(AgentState::Converged) { tracing::debug!(error = %e, "FSM transition to Converged failed"); }
391391
break 'agent;
392392
}
393393

@@ -406,23 +406,23 @@ pub async fn run_gdem_loop(user_message: &str, mut ctx: GdemContext) -> Result<G
406406

407407
match critic_signal {
408408
CriticSignal::Continue => {
409-
let _ = fsm.transition(AgentState::Executing);
409+
if let Err(e) = fsm.transition(AgentState::Executing) { tracing::debug!(error = %e, "FSM transition to Executing failed"); }
410410
}
411411
CriticSignal::InjectHint { hint, .. } => {
412412
hint_injection = Some(hint);
413-
let _ = fsm.transition(AgentState::Executing);
413+
if let Err(e) = fsm.transition(AgentState::Executing) { tracing::debug!(error = %e, "FSM transition to Executing failed"); }
414414
}
415415
CriticSignal::Replan { reason, .. } => {
416416
warn!(round = round, reason = %reason, "Critic triggering replan");
417-
let _ = fsm.transition(AgentState::Replanning);
417+
if let Err(e) = fsm.transition(AgentState::Replanning) { tracing::debug!(error = %e, "FSM transition to Replanning failed"); }
418418
plan_tree = planner.replan(&goal, &plan_tree);
419419
critic.reset_stall();
420-
let _ = fsm.transition(AgentState::Planning);
421-
let _ = fsm.transition(AgentState::Executing);
420+
if let Err(e) = fsm.transition(AgentState::Planning) { tracing::debug!(error = %e, "FSM transition to Planning failed"); }
421+
if let Err(e) = fsm.transition(AgentState::Executing) { tracing::debug!(error = %e, "FSM transition to Executing failed"); }
422422
}
423423
CriticSignal::Terminate { reason } => {
424424
warn!(round = round, reason = %reason, "Critic terminating loop");
425-
let _ = fsm.transition(AgentState::Terminating);
425+
if let Err(e) = fsm.transition(AgentState::Terminating) { tracing::debug!(error = %e, "FSM transition to Terminating failed"); }
426426
break 'agent;
427427
}
428428
}
@@ -483,13 +483,19 @@ pub async fn run_gdem_loop(user_message: &str, mut ctx: GdemContext) -> Result<G
483483

484484
// ── Prepare persistence data ───────────────────────────────────────────
485485
let updated_strategy_learner_json = if ctx.config.enable_strategy_learning {
486-
strategy_learner.to_json().ok()
486+
strategy_learner.to_json().map_err(|e| {
487+
tracing::warn!("Strategy learner serialization failed: {e}");
488+
e
489+
}).ok()
487490
} else {
488491
None
489492
};
490493

491494
let updated_memory_bytes = if ctx.config.enable_memory {
492-
memory.to_bytes().ok()
495+
memory.to_bytes().map_err(|e| {
496+
tracing::warn!("Memory serialization failed: {e}");
497+
e
498+
}).ok()
493499
} else {
494500
None
495501
};

crates/halcon-api/src/server/handlers/chat.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -393,7 +393,9 @@ pub async fn resolve_permission(
393393

394394
// Forward decision to executor via perm_senders.
395395
if let Some(tx) = state.perm_senders.get(&session_id) {
396-
let _ = tx.send((request_id, approved));
396+
if let Err(e) = tx.send((request_id, approved)) {
397+
tracing::error!(session_id = %session_id, request_id = %request_id, "Permission decision channel closed: {e}");
398+
}
397399
}
398400

399401
state.broadcast(WsServerEvent::PermissionResolved {

crates/halcon-desktop/src/views/chat.rs

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -28,19 +28,23 @@ pub fn render(ui: &mut egui::Ui, state: &mut AppState, cmd_tx: &mpsc::Sender<UiC
2828
match outcome {
2929
PermissionOutcome::Approved => {
3030
state.chat.permission_modal = None;
31-
let _ = cmd_tx.try_send(UiCommand::ResolvePermission {
31+
if let Err(e) = cmd_tx.try_send(UiCommand::ResolvePermission {
3232
session_id,
3333
request_id: modal.request_id,
3434
approve: true,
35-
});
35+
}) {
36+
tracing::error!("Failed to send permission approval: {e}");
37+
}
3638
}
3739
PermissionOutcome::Denied => {
3840
state.chat.permission_modal = None;
39-
let _ = cmd_tx.try_send(UiCommand::ResolvePermission {
41+
if let Err(e) = cmd_tx.try_send(UiCommand::ResolvePermission {
4042
session_id,
4143
request_id: modal.request_id,
4244
approve: false,
43-
});
45+
}) {
46+
tracing::error!("Failed to send permission denial: {e}");
47+
}
4448
}
4549
PermissionOutcome::Pending => {}
4650
}
@@ -600,16 +604,21 @@ fn render_input_area(
600604
// Cache for potential retry if the turn fails with recoverable=true.
601605
state.chat.last_message = Some((content.clone(), orchestrate));
602606
state.chat.retry_count = 0;
603-
let _ = cmd_tx.try_send(UiCommand::SendChatMessage {
607+
if let Err(e) = cmd_tx.try_send(UiCommand::SendChatMessage {
604608
session_id,
605609
content,
606610
orchestrate,
607611
attachments,
608-
});
612+
}) {
613+
tracing::error!("Failed to send chat message: {e}");
614+
state.chat.is_streaming = false;
615+
}
609616
}
610617

611618
if is_busy && ui.button("Cancel").clicked() {
612-
let _ = cmd_tx.try_send(UiCommand::CancelChatExecution { session_id });
619+
if let Err(e) = cmd_tx.try_send(UiCommand::CancelChatExecution { session_id }) {
620+
tracing::warn!("Failed to send cancellation: {e}");
621+
}
613622
}
614623
});
615624
}

crates/halcon-runtime-events/src/bus.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,9 @@ impl EventBus {
9797
let event = RuntimeEvent::new(session_id, kind);
9898
// `send` returns Err if there are no receivers — that is fine; the bus
9999
// may have zero subscribers during early startup or in test runs.
100-
let _ = self.sender.send(event);
100+
if self.sender.send(event).is_err() {
101+
tracing::trace!("Event bus: no subscribers (dropped event)");
102+
}
101103
}
102104

103105
/// Subscribe to the event stream, receiving all future events.

0 commit comments

Comments
 (0)