diff --git a/docs/designs/2025-11-21-subagent-system-implementation.md b/docs/designs/2025-11-21-subagent-system-implementation.md new file mode 100644 index 00000000..f521feef --- /dev/null +++ b/docs/designs/2025-11-21-subagent-system-implementation.md @@ -0,0 +1,368 @@ +# Subagent 系统完整实现 + +**Date:** 2025-11-21 + +## Context + +本设计旨在为 neovate-code 项目实现完整的 subagent(子代理)技术方案,参考 Claude Code 的 Explore Subagent 和 Task Tool 实现。目标是让主 Agent 能够将复杂的多步骤任务委托给专门的子 Agent 处理,每个子 Agent 具有特定的能力和工具访问权限。 + +核心需求: +- 完整复刻 Claude Code 的 subagent 能力 +- 优先实现 Explore 和 Plan 两个核心子代理 +- 建立完整的 Agent 系统架构(而非简单集成) + +## Discussion + +### Phase 1: 理解需求 + +**关键决策:实现范围** +- 选择方案 A(完整复刻)而非轻量实现或定制化方案 +- 优先实现 Explore + Plan 子代理,跳过 General-Purpose 和 Statusline-Setup + +**技术基础评估** +- 现有工具系统部分完成:已有统一的 Tool 接口和基础工具实现 +- 缺少:工具权限过滤机制、子代理调用机制、上下文 Fork 机制 + +### Phase 2: 方案探索 + +评估了三个方案后选择 **方案 B(标准 Agent 系统方案)**: + +**方案对比**: +- 方案 A(轻量集成):改动小但扩展性弱 +- **方案 B(标准 Agent 系统)**:架构清晰,完全对标 Claude Code ✓ +- 方案 C(渐进式混合):风险可控但总周期长 + +**选择理由**: +1. 一次到位,避免反复重构 +2. 架构优雅,符合"完整复刻"目标 +3. 可维护性好,便于后续扩展 +4. 长期收益最高 + +### Phase 3: 设计细化 + +**关键设计点**: + +1. **工具名称规范** + - Task 工具名称:`task`(小写) + - 编辑工具:`edit`, `write`(小写) + +2. **模型配置策略** + - 采用工厂函数模式传入 context + - Explore Agent: `context.config.smallModel || context.config.model` + - Plan Agent: `context.config.planModel` + +3. **上下文 Fork 优化** + - 参考 `normalizeMessagesForCompact` 处理工具消息 + - 智能过滤不支持的工具调用,转换为摘要 + - 避免子 Agent 看到不可用工具的消息 + +## Approach + +### 核心架构 + +采用标准的 Agent 系统架构,建立清晰的分层设计: + +``` +src/ + agent/ + index.ts # AgentManager - 注册和调用入口 + types.ts # 类型定义 + executor.ts # AgentExecutor - 执行引擎 + toolFilter.ts # ToolFilter - 工具权限过滤 + contextFork.ts # ContextFork - 上下文继承 + builtin/ + index.ts # 内置 Agent 导出 + explore.ts # Explore Agent 工厂函数 + plan.ts # Plan Agent 工厂函数 + common.ts # 公共配置 + tools/ + task.ts # Task 工具实现 +``` + +### 数据流 + +``` +用户输入 + ↓ +runLoop (主 Agent) + ↓ +调用 Task 工具 + ↓ +AgentManager.executeTask() + ↓ +├─ 查找 Agent 定义 +├─ 过滤工具权限 (ToolFilter) +├─ Fork 上下文 (ContextFork, 可选) +└─ 执行子 Agent (AgentExecutor) + ↓ + runLoop (isSubAgent=true) + ↓ + 返回结果 +``` + +### 关键技术点 + +1. **工具权限过滤** + - 支持通配符 `["*"]` 表示所有工具 + - `disallowedTools` 优先级高于 `tools` + - 静态方法设计,无状态,易测试 + +2. **上下文 Fork 机制** + - 过滤孤立的 tool_use(没有对应 tool_result) + - 将不支持的工具调用转换为摘要 + - 添加明确的上下文分隔标记 + +3. **工厂函数模式** + - Agent 定义使用工厂函数:`createExploreAgent(opts)` + - 通过 `opts: { context: Context }` 传入上下文 + - 在函数内部访问 `context.config.smallModel` 等配置 + +## Architecture + +### 1. 类型系统 (`src/agent/types.ts`) + +```typescript +export interface AgentDefinition { + agentType: string; + whenToUse: string; + systemPrompt: string; + model: string; + source: 'built-in' | 'plugin' | 'user'; + tools?: string[]; + disallowedTools?: string[]; + forkContext?: boolean; + color?: string; +} + +export interface TaskToolInput { + description: string; + prompt: string; + subagent_type: string; + model?: string; + resume?: string; +} + +export interface AgentExecutionResult { + status: 'completed' | 'failed'; + agentId: string; + content: string; + totalToolCalls: number; + totalDuration: number; + usage: { + inputTokens: number; + outputTokens: number; + }; +} +``` + +### 2. 工具过滤系统 (`src/agent/toolFilter.ts`) + +**核心逻辑**: +- 构建禁用工具集合 +- 处理通配符(未定义 tools 或 `["*"]`) +- 显式工具列表处理 +- disallowedTools 优先过滤 + +**关键方法**: +```typescript +static filterTools(allTools: Tool[], agentDef: AgentDefinition): Tool[] +``` + +### 3. 上下文 Fork (`src/agent/contextFork.ts`) + +**核心功能**: +1. `prepareForkMessages()`: 准备 fork 的上下文消息 +2. `normalizeMessagesForSubAgent()`: 规范化父级消息 + - 过滤 assistant 消息中不支持的 tool_use + - 转换 tool 消息中不支持的工具结果为摘要 + - 混合处理:支持的保留,不支持的摘要 +3. `buildContextSeparatorMessage()`: 构建上下文分隔标记 + +**消息处理策略**: +- Assistant 消息:保留 text/reasoning,过滤不支持的 tool_use +- Tool 消息:支持的保留原样,不支持的转为 user 消息摘要 +- 添加明确的工具列表说明 + +### 4. Agent 执行引擎 (`src/agent/executor.ts`) + +**执行流程**: +1. 验证 Agent 定义 +2. 过滤工具(ToolFilter) +3. 准备消息(ContextFork 或简单消息) +4. 解析模型(优先级:调用时指定 > Agent 定义) +5. 执行 runLoop(标记 isSubAgent=true) +6. 处理结果和错误 + +**错误处理**: +- 工具过滤后为空 → 抛出友好错误 +- 模型未指定 → 验证并抛出错误 +- 所有异常 → 捕获并返回失败结果(不抛出) + +### 5. Agent 管理器 (`src/agent/index.ts`) + +**职责**: +- 注册和管理所有 Agent +- 查找 Agent 定义 +- 执行 Task 工具调用 +- 提供 Agent 描述(用于系统提示) + +**关键方法**: +```typescript +constructor(opts: { context: Context }) +registerAgent(definition: AgentDefinition) +getAgent(agentType: string): AgentDefinition | undefined +executeTask(input: TaskToolInput, context: {...}): Promise +``` + +### 6. 内置 Agent 定义 + +**Explore Agent** (`src/agent/builtin/explore.ts`): +```typescript +export function createExploreAgent(opts: { context: Context }): AgentDefinition { + return { + agentType: 'Explore', + model: context.config.smallModel || context.config.model, + disallowedTools: ['task', 'edit', 'write'], + forkContext: false, + color: 'blue', + // ... + }; +} +``` + +**特点**: +- 快速模型(smallModel) +- 只读工具(禁用 edit/write) +- 禁用 task 工具(防止递归) +- 支持彻底性级别(quick/medium/very thorough) + +**Plan Agent** (`src/agent/builtin/plan.ts`): +```typescript +export function createPlanAgent(opts: { context: Context }): AgentDefinition { + return { + agentType: 'Plan', + model: context.config.planModel, + disallowedTools: ['task', 'edit', 'write'], + forkContext: false, + color: 'purple', + // ... + }; +} +``` + +**特点**: +- 强大模型(planModel) +- 深度分析和规划能力 +- 相同的工具限制(只读 + 禁止递归) + +### 7. Task 工具 (`src/tools/task.ts`) + +**Schema 定义**(严格对标 Claude Code): +```typescript +{ + name: 'task', + description: 'Launch a new agent to handle complex, multi-step tasks autonomously', + parameters: { + description: 'A short (3-5 word) description of the task', + prompt: 'The task for the agent to perform', + subagent_type: 'The type of specialized agent to use for this task', + model: 'Optional model to use...', + resume: 'Optional agent ID to resume from...', + } +} +``` + +**执行逻辑**: +1. 获取当前对话历史(forkContext) +2. 调用 `agentManager.executeTask()` +3. 格式化返回结果(包含 Agent ID、工具调用次数、耗时等) + +**批准机制**: +- category: 'command' +- yolo 模式:无需批准 +- 其他模式:需要用户批准 + +### 8. 系统集成 + +**扩展 `src/tool.ts`**: +- 添加 `agentManager` 和 `getCurrentMessages` 可选参数 +- 条件性创建 Task 工具 + +**扩展 `src/context.ts`**: +- 添加 `agentManager: AgentManager` 属性 +- 在 `create()` 方法中初始化 AgentManager + +**扩展 `src/loop.ts`**: +- 添加 `isSubAgent?: boolean` 参数 +- 用于日志区分主/子 Agent(可选) + +### 9. 测试策略 + +**核心测试用例**: + +1. **ToolFilter 测试** + - 验证 disallowedTools 过滤功能 + +2. **ContextFork 测试** + - 验证上下文分隔符和任务消息添加 + +3. **集成测试** + - 验证内置 Agent 正确注册 + - 验证 Agent 类型和配置 + +**测试目标**:确保主流程正常工作,无需覆盖所有边界情况。 + +## Implementation Notes + +### 配置文件支持 + +用户可以在配置文件中设置 subagent 使用的模型: + +```json +{ + "model": "claude-3-5-sonnet-20241022", + "planModel": "claude-3-5-sonnet-20241022", + "smallModel": "claude-3-5-haiku-20241022" +} +``` + +### 使用场景 + +**✅ 应该使用 Task 工具**: +- 需要多轮搜索和探索的开放式任务 +- 不确定第一次尝试就能找到正确结果 +- 需要专门的快速搜索能力(Explore) +- 需要深度架构分析(Plan) + +**❌ 不应该使用 Task 工具**: +- 读取已知的特定文件 → 直接使用 read 工具 +- 搜索特定的类/函数名 → 直接使用 grep 工具 +- 在 2-3 个已知文件中搜索 → 直接使用 grep 工具 + +### 扩展性 + +未来可以支持: +- 自定义 Agent 注册(通过 `agentManager.registerAgent()`) +- Agent 优先级系统(类似 Claude Code 的 source 优先级) +- 插件系统集成 +- 异步执行(background execution) + +## References + +- `explore-subagent-analysis.md`: Claude Code 的 Explore Subagent 技术实现详解 +- `Task-Tool-Analysis.md`: Claude Code Task Tool 技术实现分析 +- `src/utils/messageNormalization.ts`: 消息规范化参考实现 +- `src/slash-commands/builtin/init.ts`: 工厂函数模式参考 + +## Next Steps + +1. 实现核心类型定义 (`src/agent/types.ts`) +2. 实现工具过滤系统 (`src/agent/toolFilter.ts`) +3. 实现上下文 Fork 机制 (`src/agent/contextFork.ts`) +4. 实现 Agent 执行引擎 (`src/agent/executor.ts`) +5. 实现 Agent 管理器 (`src/agent/index.ts`) +6. 实现内置 Agent (`src/agent/builtin/`) +7. 实现 Task 工具 (`src/tools/task.ts`) +8. 集成到现有系统 (`tool.ts`, `context.ts`, `loop.ts`) +9. 编写核心测试用例 +10. 更新文档 (`AGENTS.md`, 用户指南) diff --git a/docs/designs/2025-12-10-sub-agent-progress-display.md b/docs/designs/2025-12-10-sub-agent-progress-display.md new file mode 100644 index 00000000..33e0ef04 --- /dev/null +++ b/docs/designs/2025-12-10-sub-agent-progress-display.md @@ -0,0 +1,1926 @@ +# Sub-Agent 执行进度实时展示 + +**日期:** 2025-12-10 + +## 背景 + +当前 `src/tools/task.ts` 工具只在所有 sub-agent 执行完毕后才展示最终结果,用户体验不佳。用户希望参考 Claude Code 的实现方式,在 sub-agent 执行过程中实时展示: + +1. **Task** 的描述(例如:"Search for nodeBridge logic") +2. **每个工具调用**的详细信息(例如:Search 工具的参数和结果摘要) +3. **可展开的详细信息**(类似 "ctrl+o to expand" 和 "+13 more tool uses") + +核心诉求是给用户提供当前的执行过程反馈,而不是等待黑盒执行完成。 + +## 讨论过程 + +### Phase 1: 理解需求与架构调研 + +**关键问题:** +- 如何实时推送 sub-agent 的执行进度? +- 是否复用现有的消息机制还是创建新的事件通道? + +**探索的方案:** +- **方案 A(定期轮询)**: 定期查询 sub-agent 状态 → 被否决(复杂度高) +- **方案 B(流式回调)**: 利用现有 `onMessage`/`onToolUse` 回调 → 初步选定 +- **方案 C(独立事件通道)**: 新增 `onUIProgress` 回调 → 考虑中 + +### Phase 2: 上下文隔离的挑战 + +**关键发现:** +最初设计试图通过 `parentUuid` 分支机制来隔离 sub-agent 消息,但这会导致: +- Sub-agent 的消息混入主 agent 的 history +- LLM 上下文被污染,消耗大量 tokens +- 逻辑混乱,难以区分主线和分支 + +**正确的隔离机制(参考现有实现):** +- 每个 sub-agent 拥有独立的会话文件:`agent-{agentId}.jsonl` +- AgentID 通过 8 位十六进制随机生成(类似 `a1b2c3d4`) +- 完全独立的上下文,不与主 agent 混合 +- 通过 metadata 建立关联关系 + +### Phase 3: 数据查询逻辑的位置 + +**问题:** UI 层直接查询 sub-agent 消息是否合理? + +**探讨:** +- ❌ UI 层查询:违反分层原则 +- ⚠️ session.ts 工具函数:可行但可能过度设计 +- ✅ **NodeBridge RPC 接口**:统一数据访问,支持远程场景 + +**最终决定:** +- 数据查询逻辑放在 `nodeBridge.ts` +- 通过 RPC 接口 `agent.getMessages` 按需加载 +- UI 层只负责渲染,保持简洁 + +## 最终方案 + +### 关键决策记录(ADR) + +在设计过程中,我们对多个技术方案进行了讨论和选择。以下是关键决策及其理由: + +#### 决策 1: 上下文隔离方式 + +**问题**: Sub-agent 的消息如何与主 agent 隔离? + +**候选方案**: +- ❌ **方案 A**: 通过 `parentUuid` 分支机制 - 消息混入主 session,污染 LLM 上下文 +- ✅ **方案 B**: 独立 session 文件(`agent-{agentId}.jsonl`) - 完全隔离,上下文清晰 + +**最终选择**: 方案 B + +**理由**: +- 避免 LLM 上下文污染,节省 tokens +- 每个 agent 有独立的日志文件,便于调试 +- 符合"一个任务一个会话"的设计理念 +- 可扩展到递归 sub-agent + +--- + +#### 决策 2: 实时进度通信方式 + +**问题**: 如何将 SubAgent 的实时进度传递到 UI 层? + +**候选方案**: +- ❌ **方案 A**: 定期轮询 agent session 文件 - 延迟高,I/O 开销大 +- ❌ **方案 B**: UI 直接读取 agent log - 违反分层原则,难以支持远程 +- ✅ **方案 C**: onProgress 回调 + MessageBus 事件 - 事件驱动,支持跨端 + +**最终选择**: 方案 C + +**理由**: +- 延迟极低(本地模式 < 10ms) +- 复用现有的 MessageBus 架构,无需新增传输层 +- 支持本地和 Server 模式 +- 符合事件驱动的设计模式 + +--- + +#### 决策 3: onProgress 回调注入位置 + +**问题**: 在哪里为 SubAgent 注入 onProgress 回调? + +**候选方案**: +- ❌ **方案 A**: 在 loop.ts 的 toolRunner 处,为所有工具注入 - 影响面大,过度设计 +- ✅ **方案 B**: 在 agentManager.executeTask 处,只为 Task tool 提供 - 影响面小,易扩展 +- ❌ **方案 C**: 在 task.ts 的 execute 函数内部 - 耦合度高,难以测试 + +**最终选择**: 方案 B + +**理由**: +- 影响范围小,只修改 Task tool 相关代码 +- 符合单一职责原则 +- 未来可扩展到其他工具(如 bash tool 的实时输出) +- 易于测试和维护 + +--- + +#### 决策 4: 实时进度数据存储位置 + +**问题**: SubAgent 的实时进度数据存储在哪里? + +**候选方案**: +- ✅ **方案 A**: 存储在 appStore(内存) - 实时性好,不持久化 +- ❌ **方案 B**: 持久化到 session.jsonl - 可回溯,但增加文件大小和 LLM 成本 +- ❌ **方案 C**: 混合方式(内存 + 最终结果持久化) - 复杂度高 + +**最终选择**: 方案 A + +**理由**: +- 实时数据不需要持久化(重启后可从 log 恢复) +- 避免频繁 I/O,性能更好 +- appStore 按 toolUseID 索引,查询高效 +- 简化实现,降低复杂度 + +--- + +#### 决策 5: 跨端通信方案 + +**问题**: 如何支持本地模式和 Server 模式? + +**候选方案**: +- ❌ **方案 A**: 分别实现两套逻辑 - 维护成本高 +- ❌ **方案 B**: 只通过 RPC 请求加载数据 - 延迟高,无法实时更新 +- ✅ **方案 C**: 复用 MessageBus 的 transport 抽象 - 透明支持,统一 API + +**最终选择**: 方案 C + +**理由**: +- MessageBus 已经支持 DirectTransport 和 WebSocketTransport +- 无需修改核心逻辑,只需添加事件类型 +- 统一的 `emitEvent` / `onEvent` API +- 自动处理序列化和传输细节 + +--- + +#### 决策 6: UI 渲染触发方式 + +**问题**: UI 如何感知进度更新并重新渲染? + +**候选方案**: +- ❌ **方案 A**: 手动触发 React 的 forceUpdate - 反模式,难以维护 +- ✅ **方案 B**: 通过 Zustand 的 setState 自动触发 - 符合 React 最佳实践 +- ❌ **方案 C**: 使用 EventEmitter 监听 - 引入额外复杂度 + +**最终选择**: 方案 B + +**理由**: +- Zustand 的 setState 自动触发 React 重新渲染 +- 符合现有代码的状态管理模式 +- 简洁明了,易于理解 +- 支持细粒度订阅(只订阅 agentProgressMap 变化) + +--- + +### 核心设计原则 + +1. **独立会话隔离**: 每个 sub-agent 创建独立的 `agent-{agentId}.jsonl` 文件 +2. **上下文完全隔离**: Sub-agent 消息不进入主 agent 的 LLM 上下文 +3. **关联通过 metadata**: 主 agent 的 tool result 记录 `agentId` 建立关联 +4. **按需加载**: UI 展开时才通过 RPC 加载 agent messages + +### 数据流 + +``` +用户调用 Task Tool + ↓ +[1] task.ts execute() + - 调用 agentManager.executeTask() + ↓ +[2] executeAgent() + - 生成 agentId (8位十六进制) + - 创建独立的 JsonlLogger → agent-{agentId}.jsonl + - 执行 runLoop() + - Sub-agent 的所有消息写入独立文件 + ↓ +[3] 返回结果 + - AgentExecutionResult 包含 agentId + - ToolResult.metadata = { agentId, agentType } + ↓ +[4] 主 agent 保存 tool_result + - ToolResultPart 提升 agentId 字段 + - 写入主 session log + ↓ +[5] UI 渲染 + - 识别 Task tool + - 点击展开时调用 RPC: agent.getMessages(agentId) + - 加载并缓存 sub-agent messages + - 嵌套渲染工具调用 +``` + +### 消息存储结构 + +**主 Agent Session (`session-abc123.jsonl`)** +```json +{ "role": "user", "content": "帮我审查代码", "uuid": "msg-1", "parentUuid": null } +{ "role": "assistant", "content": [{ "type": "tool_use", "id": "task-1", "name": "Task" }], "uuid": "msg-2", "parentUuid": "msg-1" } +{ "role": "tool", "content": [{ + "type": "tool-result", + "toolCallId": "task-1", + "result": { "llmContent": "审查完成...", "metadata": { "agentId": "a1b2c3d4" } }, + "agentId": "a1b2c3d4", + "agentType": "code-reviewer" + }], "uuid": "msg-3", "parentUuid": "msg-2" } +{ "role": "assistant", "content": "已完成代码审查", "uuid": "msg-4", "parentUuid": "msg-3" } +``` + +**Sub-Agent Session (`agent-a1b2c3d4.jsonl`)** - 独立文件 +```json +{ "role": "user", "content": "审查以下代码...", "uuid": "agent-msg-1", "parentUuid": null, "metadata": { "agentId": "a1b2c3d4" } } +{ "role": "assistant", "content": [{ "type": "tool_use", "id": "read-1", "name": "read" }], "uuid": "agent-msg-2", "parentUuid": "agent-msg-1", "metadata": { "agentId": "a1b2c3d4" } } +{ "role": "tool", "content": [{ "type": "tool-result", "toolCallId": "read-1", "result": {...} }], "uuid": "agent-msg-3", "parentUuid": "agent-msg-2", "metadata": { "agentId": "a1b2c3d4" } } +{ "role": "assistant", "content": [{ "type": "tool_use", "id": "grep-1", "name": "grep" }], "uuid": "agent-msg-4", "parentUuid": "agent-msg-3", "metadata": { "agentId": "a1b2c3d4" } } +{ "role": "tool", "content": [{ "type": "tool-result", "toolCallId": "grep-1", "result": {...} }], "uuid": "agent-msg-5", "parentUuid": "agent-msg-4", "metadata": { "agentId": "a1b2c3d4" } } +{ "role": "assistant", "content": "发现以下问题...", "uuid": "agent-msg-6", "parentUuid": "agent-msg-5", "metadata": { "agentId": "a1b2c3d4" } } +``` + +### UI 交互流程 + +#### 执行中状态 + +**折叠状态(默认):** +``` +╭─ Agent: code-reviewer (审查代码质量) ─────────────╮ +│ │ +│ +5 more tool uses │ +│ Assistant: ↳ write REVIEW.md │ +│ Tool: ✓ Tool results │ +│ │ +│ In progress... · 8 tool uses · 1.2k tokens │ +╰─────────────────────────────── (ctrl+o to expand)╯ +``` + +**展开状态(ctrl+o):** +``` +╭─ Agent: code-reviewer (审查代码质量) ─────────────╮ +│ │ +│ User: 审查以下代码... │ +│ Assistant: ↳ read src/api.ts │ +│ Tool: ✓ Tool results │ +│ Assistant: ↳ grep "function" │ +│ Tool: ✓ Tool results │ +│ Assistant: ↳ analyze_complexity │ +│ Tool: ✓ Tool results │ +│ Assistant: ↳ write REVIEW.md │ +│ Tool: ✓ Tool results │ +│ │ +│ In progress... · 8 tool uses · 1.2k tokens │ +╰────────────────────────────── (ctrl+o to collapse)╯ +``` + +#### 完成状态 + +**折叠状态(默认):** +``` +✓ code-reviewer (审查代码质量) (8 tool uses · 1.2k tokens · 3.5s) ▶ Show details +``` + +**展开状态:** +``` +╭─ Done: code-reviewer (审查代码质量) ────────────────╮ +│ (8 tool uses · 1.2k tokens · 3.5s) │ +│ │ +│ User: 审查以下代码... │ +│ Assistant: ↳ read src/api.ts │ +│ Tool: ✓ Tool results │ +│ Assistant: ↳ grep "function" │ +│ Tool: ✓ Tool results │ +│ ... │ +│ │ +│ Response: │ +│ 发现以下问题: │ +│ 1. 缺少错误处理 │ +│ 2. 函数复杂度过高 │ +│ 建议重构... │ +│ │ +╰───────────────────────────────────── ▼ Hide details╯ +``` + +#### 并行执行状态 + +``` +Parallel Agents: +├─ Done code-reviewer (审查代码) +├─ Running test-runner (执行测试) +└─ Done documenter (生成文档) +``` + +**触发流程:** +1. Sub-agent 开始执行 → 创建 `agent-{agentId}.jsonl` +2. 每次工具调用 → 写入独立文件 + 通过 `onSubAgentMessage` 实时通知 +3. UI 收到通知 → 更新 `agentMessagesCache` → 重新渲染 +4. 用户切换展开/折叠 → 按需加载完整消息历史(如果未缓存) +5. Sub-agent 完成 → 切换到完成状态渲染 + +## 架构设计 + +### 1. 核心类型扩展 + +**src/tool.ts** +```typescript +export type ToolResult = { + llmContent: string; + isError: boolean; + uiContent?: string; + metadata?: { + agentId?: string; + agentType?: string; + [key: string]: any; + }; +}; +``` + +**src/message.ts** +```typescript +export type ToolResultPart = { + type: 'tool_result'; + id: string; + name: string; + input: Record; + result: ToolResult; + agentId?: string; // 提升自 result.metadata + agentType?: string; +}; + +export type ToolResultPart2 = { + type: 'tool-result'; + toolCallId: string; + toolName: string; + input: Record; + result: ToolResult; + agentId?: string; + agentType?: string; +}; +``` + +### 2. Paths 扩展 + +**src/paths.ts** +```typescript +export class Paths { + /** + * 获取 sub-agent 的独立 log 路径 + * 格式: ~/.neovate/sessions/agent-{agentId}.jsonl + */ + getAgentLogPath(agentId: string): string { + return path.join(this.sessionDir, `agent-${agentId}.jsonl`); + } + + /** + * 获取所有 agent sessions + */ + getAllAgentSessions(): Array<{ agentId: string; path: string; mtime: number }> { + // 实现逻辑... + } +} +``` + +### 3. Agent Executor 改造 + +**src/agent/executor.ts** +```typescript +export async function executeAgent( + options: AgentExecuteOptions, +): Promise { + // 1. 生成 agentId (8位十六进制) + const agentId = randomUUID().slice(0, 8); + + // 2. 创建独立的 session log + const agentLogPath = context.paths.getAgentLogPath(agentId); + const agentLogger = new JsonlLogger({ filePath: agentLogPath }); + + // 3. 执行 runLoop,消息写入独立文件 + const loopResult = await runLoop({ + // ... + onMessage: async (message) => { + const normalizedMessage = { + ...message, + metadata: { + ...message.metadata, + agentId, + agentType: definition.agentType, + }, + }; + + // 写入独立的 agent log + agentLogger.addMessage({ message: normalizedMessage }); + + // 可选:实时通知父 agent (用于 UI 实时展示) + await options.onSubAgentMessage?.(normalizedMessage); + }, + }); + + // 4. 返回 agentId + return { + status: 'completed', + agentId, + content: extractFinalContent(loopResult.data), + // ... + }; +} +``` + +### 4. Task Tool 改造 + +**src/tools/task.ts** +```typescript +execute: async (params) => { + const result = await agentManager.executeTask(params, { /* ... */ }); + + if (result.status === 'completed') { + return { + llmContent: `Sub-agent completed...`, + isError: false, + metadata: { + agentId: result.agentId, // 关键:记录 agentId + agentType: params.subagent_type, + }, + }; + } +} +``` + +### 5. Loop 消息保存改造 + +**src/loop.ts** +```typescript +// 保存 tool result 时,提升 metadata.agentId +await history.addMessage({ + role: 'tool', + content: toolResults.map((tr) => { + const resultPart: any = { + type: 'tool-result', + toolCallId: tr.toolCallId, + toolName: tr.toolName, + input: tr.input, + result: tr.result, + }; + + // 提升 agentId 到 tool-result 层级 + if (tr.result.metadata?.agentId) { + resultPart.agentId = tr.result.metadata.agentId; + resultPart.agentType = tr.result.metadata.agentType; + } + + return resultPart; + }), +}); +``` + +### 6. NodeBridge RPC 接口 + +**src/nodeBridge.ts** +```typescript +// 获取 agent session 的消息历史 +this.messageBus.registerHandler( + 'agent.getMessages', + async (data: { cwd: string; agentId: string }) => { + const { cwd, agentId } = data; + const context = await this.getContext(cwd); + const agentLogPath = context.paths.getAgentLogPath(agentId); + + if (!fs.existsSync(agentLogPath)) { + return { success: false, error: 'Agent session not found' }; + } + + const messages = loadSessionMessages({ logPath: agentLogPath }); + return { success: true, data: { messages, agentId } }; + } +); +``` + +### 7. UI Store 扩展 + +**src/ui/store.ts** +```typescript +interface AppState { + // 缓存已加载的 agent messages(按 agentId 索引) + agentMessagesCache: Record; +} + +interface AppActions { + loadAgentMessages: (agentId: string) => Promise; +} + +// 实现 +loadAgentMessages: async (agentId: string) => { + const { bridge, cwd, agentMessagesCache } = get(); + + if (agentMessagesCache[agentId]) return; // 已缓存 + + const response = await bridge.request('agent.getMessages', { cwd, agentId }); + + if (response.success) { + set({ + agentMessagesCache: { + ...agentMessagesCache, + [agentId]: response.data.messages, + }, + }); + } +} +``` + +### 8. 实时进度机制 + +#### 8.1 核心问题 + +当前设计已经实现了 SubAgent 消息的**独立存储**(`agent-{agentId}.jsonl`),但缺少**实时推送**机制。用户希望在 SubAgent 执行过程中看到实时进度,而不是等执行完毕后再加载。 + +**关键挑战:** +1. **消息隔离**: SubAgent 的消息写入独立文件,父 Agent 无法直接访问 +2. **跨端通信**: 需要支持本地模式和 Server 模式 +3. **UI 实时更新**: 如何将 SubAgent 的消息实时传递到 UI 层 + +#### 8.2 设计方案:onProgress 回调 + MessageBus 事件 + +我们采用**事件驱动模型**,通过以下机制实现实时进度展示: + +``` +SubAgent 产生消息 + ↓ +executeAgent 捕获消息 + ↓ +调用 onProgress 回调 + ↓ +MessageBus 发送 agent_progress 事件 + ↓ +UI 层接收事件并更新 appStore + ↓ +React 组件重新渲染进度条 +``` + +#### 8.3 详细实现流程 + +##### Step 1: Task Tool 传递 onProgress 回调 + +**src/tools/task.ts** +```typescript +export function createTaskTool(opts: { + context: Context; + tools: Tool[]; + signal?: AbortSignal; + onProgress?: (data: AgentProgressData) => void; // 新增参数 +}) { + const { signal, onProgress } = opts; + const { cwd, agentManager } = opts.context; + + return createTool({ + name: TOOL_NAMES.TASK, + // ... + execute: async (params, executionContext) => { + const startTime = Date.now(); + + if (!agentManager) { + return { llmContent: 'Agent manager not found', isError: true }; + } + + try { + const result = await agentManager.executeTask(params, { + cwd, + signal, + tools: opts.tools, + // 关键:传递 onProgress 回调 + onProgress: (message: NormalizedMessage, agentId: string) => { + // 构造进度数据包 + const progressData: AgentProgressData = { + toolUseID: executionContext.toolUseID, // 关联到哪个 tool use + agentId, + message, + timestamp: Date.now(), + }; + + // 调用回调(由 loop.ts 注入) + onProgress?.(progressData); + }, + }); + + // ... + } catch (error) { + // ... + } + }, + }); +} +``` + +**关键点:** +- `executionContext.toolUseID` 是当前工具调用的唯一标识(如 `"task-1"`) +- `onProgress` 回调由**上层调用方**(loop.ts)注入 +- 进度数据包含 `toolUseID` 用于 UI 层索引 + +##### Step 2: AgentManager 接受并传递回调 + +**src/agent/index.ts** +```typescript +export class AgentManager { + async executeTask( + input: TaskToolInput, + context: { + tools: Tool[]; + cwd: string; + signal?: AbortSignal; + forkContextMessages?: NormalizedMessage[]; + onProgress?: (message: NormalizedMessage, agentId: string) => void; // 新增 + }, + ): Promise { + const definition = this.agents.get(input.subagent_type); + if (!definition) { + throw new Error(`Agent type '${input.subagent_type}' not found`); + } + + const executeOptions: AgentExecuteOptions = { + definition, + prompt: input.prompt, + tools: context.tools, + context: this.context, + model: input.model, + forkContextMessages: definition.forkContext + ? context.forkContextMessages + : undefined, + cwd: context.cwd, + signal: context.signal, + onProgress: context.onProgress, // 传递给 executeAgent + }; + + return executeAgent(executeOptions); + } +} +``` + +##### Step 3: ExecuteAgent 捕获消息并调用回调 + +**src/agent/executor.ts** +```typescript +export async function executeAgent( + options: AgentExecuteOptions, +): Promise { + const { definition, prompt, context, onProgress } = options; + + // 1. 生成 agentId (8位十六进制) + const agentId = randomUUID().slice(0, 8); + + // 2. 创建独立的 session log + const agentLogPath = context.paths.getAgentLogPath(agentId); + const agentLogger = new JsonlLogger({ filePath: agentLogPath }); + + // 3. 执行 runLoop,捕获每条消息 + const loopResult = await runLoop({ + // ... + onMessage: async (message) => { + const normalizedMessage = { + ...message, + metadata: { + ...message.metadata, + agentId, + agentType: definition.agentType, + }, + }; + + // 写入独立的 agent log + agentLogger.addMessage({ message: normalizedMessage }); + + // 关键:实时通知父级 + if (onProgress) { + await onProgress(normalizedMessage, agentId); + } + }, + }); + + // 4. 返回 agentId + return { + status: 'completed', + agentId, + content: extractFinalContent(loopResult.data), + // ... + }; +} +``` + +**关键点:** +- `onMessage` 在 SubAgent 每产生一条消息时触发 +- 先写入独立 log 文件(持久化) +- 再调用 `onProgress`(实时通知) +- `onProgress` 是**异步**的,支持跨端通信 + +##### Step 4: Loop.ts 注入 onProgress 并通过 MessageBus 发送事件 + +**src/loop.ts (伪代码,核心逻辑)** +```typescript +export async function runLoop(options: RunLoopOptions) { + // ... + + // 工具执行逻辑 + for (const toolUse of toolUses) { + const tool = tools.get(toolUse.name); + + // 为 Task tool 注入 onProgress 回调 + const onProgress = toolUse.name === TOOL_NAMES.TASK + ? (progressData: AgentProgressData) => { + // 通过 MessageBus 发送进度事件 + await context.messageBus.emitEvent('agent_progress', { + sessionId: options.sessionId, + cwd: options.cwd, + progressData, + }); + } + : undefined; + + // 执行工具(传入 onProgress) + const result = await tool.execute(toolUse.input, { + context, + signal, + toolUseID: toolUse.id, + onProgress, // 注入回调 + }); + + // ... + } +} +``` + +**关键点:** +- `onProgress` 只在执行 **Task tool** 时注入 +- 通过 `MessageBus.emitEvent` 发送跨端事件 +- 事件类型为 `agent_progress` + +##### Step 5: NodeBridge 监听并转发事件(跨端支持) + +**src/nodeBridge.ts (已有机制,新增事件类型)** +```typescript +// NodeBridge 已经有完整的事件转发机制 +// 只需确保 'agent_progress' 事件能够被正确转发 + +// 在 session.send handler 中,loop 触发的事件会自动通过 messageBus.emitEvent 发送到 UI 层 +// 无需修改 nodeBridge.ts,因为 MessageBus 会自动处理所有 emitEvent 调用 +``` + +**说明:** +- NodeBridge 已经实现了完整的 MessageBus 事件转发机制 +- 所有通过 `messageBus.emitEvent` 发送的事件都会被转发到 UI 层 +- UI 层通过 `bridge.onEvent` 监听事件 + +##### Step 6: UI 层接收事件并更新 Store + +**src/ui/store.ts** +```typescript +interface AppState { + // 现有字段... + + // 新增:按 toolUseID 索引的进度数据 + agentProgressMap: Record; +} + +interface AppActions { + // 现有方法... + + // 新增:更新 agent 进度 + updateAgentProgress: (data: { + toolUseID: string; + agentId: string; + message: NormalizedMessage; + }) => void; + + // 新增:清理已完成的 agent 进度(可选) + clearAgentProgress: (toolUseID: string) => void; +} + +// 在 initialize 方法中添加事件监听 +initialize: async (opts) => { + // ...现有逻辑... + + // 监听 SubAgent 进度事件 + bridge.onEvent('agent_progress', (data) => { + const { progressData } = data; + get().updateAgentProgress({ + toolUseID: progressData.toolUseID, + agentId: progressData.agentId, + message: progressData.message, + }); + }); +}, + +// 实现更新逻辑 +updateAgentProgress: (data) => { + const { toolUseID, agentId, message } = data; + const { agentProgressMap } = get(); + + const existing = agentProgressMap[toolUseID]; + + set({ + agentProgressMap: { + ...agentProgressMap, + [toolUseID]: { + agentId, + messages: existing + ? [...existing.messages, message] + : [message], + lastUpdate: Date.now(), + }, + }, + }); +}, + +clearAgentProgress: (toolUseID) => { + const { agentProgressMap } = get(); + const newMap = { ...agentProgressMap }; + delete newMap[toolUseID]; + set({ agentProgressMap: newMap }); +}, +``` + +**关键点:** +- 进度数据按 `toolUseID` 索引(如 `"task-1"`) +- 每次收到新消息时追加到数组 +- `lastUpdate` 用于触发 React 重新渲染 + +##### Step 7: UI 组件消费进度数据 + +**src/ui/Messages.tsx** +```typescript +function SubAgentProgress({ + toolUse, + isVerbose +}: { + toolUse: ToolUsePart; + isVerbose: boolean; +}) { + const { agentProgressMap } = useAppStore(); + + // 从 store 中获取实时进度 + const progressData = agentProgressMap[toolUse.id]; + + if (!progressData) { + return Starting agent...; + } + + const { messages } = progressData; + const [expanded, setExpanded] = React.useState(isVerbose); + + // 统计信息 + const stats = React.useMemo(() => { + const toolCalls = messages.filter( + msg => msg.role === 'assistant' && + Array.isArray(msg.content) && + msg.content.some(p => p.type === 'tool_use') + ).length; + + const tokens = messages.reduce((sum, msg) => { + if (msg.role === 'assistant' && 'usage' in msg) { + const usage = (msg as AssistantMessage).usage; + return sum + usage.input_tokens + usage.output_tokens; + } + return sum; + }, 0); + + return { toolCalls, tokens }; + }, [messages]); + + // 智能截断:只显示最后 N 条 + const VISIBLE_LIMIT = 3; + const visibleMessages = expanded + ? messages + : messages.slice(-VISIBLE_LIMIT); + const hiddenCount = messages.length - visibleMessages.length; + + return ( + + {/* 标题栏 */} + + ╭─ Agent: {toolUse.description} + + + {/* 消息列表 */} + + {!expanded && hiddenCount > 0 && ( + + +{hiddenCount} more tool uses + + )} + + {visibleMessages.map((msg, idx) => ( + + ))} + + + {/* 状态栏 */} + + + │ In progress... · {stats.toolCalls} tool uses · {stats.tokens} tokens + + + + + ╰───────────────────────────── + (ctrl+o to {expanded ? 'collapse' : 'expand'}) + + + ); +} +``` + +**关键点:** +- 通过 `toolUse.id` 获取对应的进度数据 +- `useMemo` 缓存统计计算,优化性能 +- 每当 `messages` 变化时,React 自动重新渲染 + +#### 8.4 数据流总结 + +##### 完整序列图 + +```mermaid +sequenceDiagram + participant User + participant UI as UI Layer (Ink) + participant Store as AppStore (Zustand) + participant Bridge as UIBridge + participant Bus as MessageBus + participant Node as NodeBridge + participant Loop as Loop.ts + participant Task as Task Tool + participant Agent as AgentManager + participant Exec as ExecuteAgent + participant SubLoop as SubAgent runLoop + participant Log as JsonlLogger + + User->>UI: 发送消息 "Review code" + UI->>Bridge: bridge.request('session.send') + Bridge->>Bus: request via MessageBus + Bus->>Node: forward to NodeBridge + Node->>Loop: runLoop(options) + + Note over Loop: 识别 Task tool_use + Loop->>Task: tool.execute(params, { toolUseID, onProgress }) + Task->>Agent: agentManager.executeTask({ onProgress }) + Agent->>Exec: executeAgent({ onProgress }) + + Note over Exec: 生成 agentId = "a1b2c3d4" + Note over Exec: 创建 agent-a1b2c3d4.jsonl + + Exec->>SubLoop: runLoop({ onMessage }) + + loop SubAgent 每产生一条消息 + SubLoop->>Exec: onMessage(message) + Exec->>Log: agentLogger.addMessage(message) + Note over Log: 写入 agent-a1b2c3d4.jsonl + + Exec->>Agent: onProgress(message, agentId) + Agent->>Task: onProgress(message, agentId) + Task->>Loop: onProgress({ toolUseID, agentId, message }) + + Loop->>Bus: emitEvent('agent_progress', data) + Bus->>Bridge: forward event + Bridge->>Store: onEvent('agent_progress') + Store->>Store: updateAgentProgress({ toolUseID, message }) + + Store-->>UI: state changed (agentProgressMap) + UI-->>User: 实时更新进度条 🔄 + end + + SubLoop-->>Exec: 返回执行结果 + Exec-->>Agent: AgentExecutionResult { agentId } + Agent-->>Task: 返回结果 + Task-->>Loop: ToolResult { metadata: { agentId } } + Loop->>Log: 保存 tool_result (含 agentId) + Loop-->>Node: 完成 + + Node->>Bus: emitEvent('message', toolResult) + Bus->>Bridge: forward + Bridge->>Store: onEvent('message') + Store->>Store: addMessage(toolResult) + Store-->>UI: state changed (messages) + UI-->>User: 显示 "✓ code-reviewer completed" ✅ +``` + +##### 文字描述 + +``` +┌─────────────────────────────────────────────────────────────────┐ +│ 数据流图 │ +└─────────────────────────────────────────────────────────────────┘ + +[1] SubAgent 产生消息 + executeAgent → onMessage 回调 + ↓ (写入独立 log) + agentLogger.addMessage(message) + ↓ (实时通知) + onProgress(message, agentId) + +[2] 回调链传递 + executeAgent.onProgress + → agentManager.onProgress + → task.ts.onProgress + → loop.ts 注入的回调 + +[3] MessageBus 事件发送 + loop.ts: + messageBus.emitEvent('agent_progress', { + sessionId, + cwd, + progressData: { + toolUseID: "task-1", + agentId: "a1b2c3d4", + message: { role: 'assistant', ... }, + timestamp: 1234567890 + } + }) + +[4] 跨端传输(自动) + MessageBus → Transport → WebSocket/Direct → UI + +[5] UI 层接收 + bridge.onEvent('agent_progress', (data) => { + updateAgentProgress(data.progressData) + }) + +[6] Store 更新 + agentProgressMap["task-1"] = { + agentId: "a1b2c3d4", + messages: [msg1, msg2, msg3, ...], + lastUpdate: 1234567890 + } + +[7] React 重新渲染 + useAppStore() → agentProgressMap 变化 → 组件更新 +``` + +#### 8.5 跨端支持 + +**本地模式(DirectTransport):** +- MessageBus 通过 `DirectTransport` 直接传递事件 +- 延迟极低(setImmediate) +- 适合开发和调试 + +**Server 模式(WebSocketTransport):** +- MessageBus 通过 WebSocket 传递事件 +- 支持远程场景 +- 事件自动序列化/反序列化 + +**关键优势:** +- 无需修改 MessageBus 核心逻辑 +- 透明支持本地和远程模式 +- 统一的 API (`emitEvent` / `onEvent`) + +#### 8.6 类型定义 + +**src/agent/types.ts** +```typescript +export type AgentProgressData = { + toolUseID: string; // 关联的 tool use ID(如 "task-1") + agentId: string; // SubAgent 的 ID(如 "a1b2c3d4") + message: NormalizedMessage; // SubAgent 产生的消息 + timestamp: number; // 时间戳 +}; + +export type AgentExecuteOptions = { + // ...现有字段... + onProgress?: (message: NormalizedMessage, agentId: string) => void | Promise; +}; +``` + +**src/ui/store.ts** +```typescript +type AgentProgressState = { + agentId: string; + messages: NormalizedMessage[]; + lastUpdate: number; +}; + +interface AppState { + agentProgressMap: Record; +} +``` + +#### 8.7 性能优化 + +**1. 智能截断** +```typescript +const VISIBLE_LIMIT = 3; // 默认只显示最后 3 条 +const visibleMessages = expanded ? messages : messages.slice(-VISIBLE_LIMIT); +``` + +**2. useMemo 缓存计算** +```typescript +const stats = React.useMemo(() => calculateStats(messages), [messages]); +``` + +**3. 按需清理** +```typescript +// SubAgent 完成后,可选择性清理进度数据 +if (toolResult && !isVerbose) { + get().clearAgentProgress(toolUse.id); +} +``` + +**4. 事件节流(可选)** +```typescript +// 如果消息产生速度过快,可以在 loop.ts 中添加节流 +const throttledProgress = throttle(onProgress, 100); // 每 100ms 最多发送一次 +``` + +#### 8.8 错误处理 + +**1. 回调执行失败** +```typescript +try { + await onProgress(message, agentId); +} catch (error) { + // 记录错误但不中断 SubAgent 执行 + console.error('Failed to send progress:', error); +} +``` + +**2. MessageBus 断开连接** +```typescript +if (!messageBus.isConnected()) { + // 降级:只写入 log,不发送实时进度 + agentLogger.addMessage({ message }); + return; +} +``` + +**3. UI 层接收失败** +```typescript +bridge.onEvent('agent_progress', (data) => { + try { + get().updateAgentProgress(data.progressData); + } catch (error) { + console.error('Failed to update progress:', error); + } +}); +``` + +### 9. UI 渲染设计 + +#### 核心渲染机制 + +SubAgent 的消息作为父 Agent 界面中的**特殊工具调用(Tool Use)**来处理,而不是像普通文本消息那样直接追加。它嵌套在调用该 SubAgent 的工具块中,形成层级关系。 + +**关键设计原则:** +1. **嵌套渲染**: SubAgent 作为 Task tool 的子块渲染 +2. **折叠/展开**: 默认折叠,避免刷屏,支持 `ctrl+o` 切换 +3. **智能截断**: 非 verbose 模式下只显示最后 N 条记录 +4. **实时更新**: 执行过程中动态更新统计信息 + +#### 视觉层级结构 + +``` +╭─ Agent: code-reviewer (审查代码质量) ─────────────╮ +│ │ +│ User: 审查以下代码... │ +│ Assistant: ↳ read src/api.ts │ +│ Tool: ✓ Tool results │ +│ Assistant: ↳ grep "function" │ +│ Tool: ✓ Tool results │ +│ ... │ +│ +5 more tool uses │ ← 折叠的消息 +│ Assistant: ↳ write REVIEW.md │ +│ Tool: ✓ Tool results │ +│ │ +│ In progress... · 8 tool uses · 1.2k tokens │ ← 实时状态栏 +╰───────────────────────────────────────────────────╯ +``` + +#### 渲染状态机 + +SubAgent 有三种渲染状态,对应不同的 UI 组件: + +**1. 进行中 (In Progress) - `renderToolUseProgressMessage`** + +当 SubAgent 正在执行时: + +```typescript +// 实时进度渲染组件 +function SubAgentProgress({ + toolUse, + agentMessages, + isVerbose +}: { + toolUse: ToolUsePart; + agentMessages: NormalizedMessage[]; + isVerbose: boolean; +}) { + const [expanded, setExpanded] = React.useState(isVerbose); + + // 统计信息 + const stats = React.useMemo(() => { + const toolCalls = agentMessages.filter( + msg => msg.role === 'assistant' && + Array.isArray(msg.content) && + msg.content.some(p => p.type === 'tool_use') + ).length; + + const tokens = agentMessages.reduce((sum, msg) => { + if (msg.role === 'assistant' && 'usage' in msg) { + const usage = (msg as AssistantMessage).usage; + return sum + usage.input_tokens + usage.output_tokens; + } + return sum; + }, 0); + + return { toolCalls, tokens }; + }, [agentMessages]); + + // 智能截断:只显示最后 N 条 + const VISIBLE_LIMIT = 3; + const visibleMessages = expanded + ? agentMessages + : agentMessages.slice(-VISIBLE_LIMIT); + const hiddenCount = agentMessages.length - visibleMessages.length; + + return ( + + {/* 标题栏 */} + + ╭─ Agent: {toolUse.description} + + + {/* 消息列表 */} + + {!expanded && hiddenCount > 0 && ( + + +{hiddenCount} more tool uses + + )} + + {visibleMessages.map((msg, idx) => ( + + ))} + + + {/* 状态栏 */} + + + │ In progress... · {stats.toolCalls} tool uses · {stats.tokens} tokens + + + + + ╰───────────────────────────── + (ctrl+o to {expanded ? 'collapse' : 'expand'}) + + + ); +} +``` + +**2. 已完成 (Done) - `renderToolResultMessage`** + +当 SubAgent 执行完成后: + +```typescript +// 完成结果渲染组件 +function SubAgentCompleted({ + toolUse, + toolResult, + agentMessages, +}: { + toolUse: ToolUsePart; + toolResult: ToolResultPart; + agentMessages: NormalizedMessage[]; +}) { + const [expanded, setExpanded] = React.useState(false); + + // 统计信息(同上) + const stats = React.useMemo(() => { /* ... */ }, [agentMessages]); + + // 提取最终响应文本 + const finalResponse = React.useMemo(() => { + const lastAssistant = [...agentMessages] + .reverse() + .find(msg => msg.role === 'assistant' && typeof msg.content === 'string'); + + if (lastAssistant) { + return (lastAssistant as AssistantMessage).text; + } + return null; + }, [agentMessages]); + + return ( + + {/* 折叠状态:只显示摘要 */} + {!expanded && ( + + + {toolUse.description} + + {' '}({stats.toolCalls} tool uses · {stats.tokens} tokens · {toolResult.duration}ms) + + setExpanded(true)}> + {' '}▶ Show details + + + )} + + {/* 展开状态:显示详细内容 */} + {expanded && ( + + + + ╭─ Done: {toolUse.description} + + + {' '}({stats.toolCalls} tool uses · {stats.tokens} tokens · {toolResult.duration}ms) + + + + {/* 工具调用历史 */} + + {agentMessages.map((msg, idx) => ( + + ))} + + + {/* 最终响应 */} + {finalResponse && ( + + Response: + + + )} + + + ╰───────────────────────────── + setExpanded(false)}> + {' '}▼ Hide details + + + + )} + + ); +} +``` + +**3. 后台运行 (Backgrounded) - 未来扩展** + +当 SubAgent 被转入后台时: + +```typescript +function SubAgentBackgrounded({ toolUse }: { toolUse: ToolUsePart }) { + return ( + + + + {' '}Backgrounded agent: {toolUse.description} + + + {' '}(↓ to manage) + + + ); +} +``` + +#### 并行 SubAgent 渲染 - `renderGroupedToolUse` + +当同时启动多个 SubAgent 时,使用树状结构渲染: + +```typescript +function ParallelSubAgents({ + toolUses, + results +}: { + toolUses: ToolUsePart[]; + results: Map; +}) { + return ( + + Parallel Agents: + {toolUses.map((toolUse, idx) => { + const isLast = idx === toolUses.length - 1; + const prefix = isLast ? '└─' : '├─'; + const result = results.get(toolUse.id); + const status = result + ? (result.isError ? 'Failed' : 'Done') + : 'Running'; + const color = status === 'Done' ? 'green' : status === 'Failed' ? 'red' : 'yellow'; + + return ( + + {prefix} + {status} + {toolUse.description} + + ); + })} + + ); +} +``` + +渲染效果: +``` +Parallel Agents: +├─ Done code-reviewer (审查代码) +├─ Running test-runner (执行测试) +└─ Done documenter (生成文档) +``` + +#### 嵌套消息渲染组件 + +```typescript +function NestedAgentMessage({ message }: { message: NormalizedMessage }) { + if (message.role === 'user') { + return ( + + User: + {typeof message.content === 'string' ? message.content : '...'} + + ); + } + + if (message.role === 'assistant') { + const assistantMsg = message as AssistantMessage; + + // 文本响应 + if (typeof assistantMsg.content === 'string') { + return ( + + Assistant: + {assistantMsg.content} + + ); + } + + // 工具调用 + const toolUses = assistantMsg.content.filter(p => p.type === 'tool_use') as ToolUsePart[]; + const textParts = assistantMsg.content.filter(p => p.type === 'text') as TextPart[]; + + return ( + + {textParts.map((part, idx) => ( + + Assistant: + {part.text} + + ))} + {toolUses.map((toolUse, idx) => ( + + Assistant: ↳ + {toolUse.displayName || toolUse.name} + {toolUse.description && ( + ({toolUse.description}) + )} + + ))} + + ); + } + + if (message.role === 'tool') { + return ( + + Tool: ✓ + Tool results + + ); + } + + return null; +} +``` + +#### 主 ToolUse 组件集成 + +```typescript +function ToolUse({ pair, allMessages }: { + pair: ToolPair; + allMessages: NormalizedMessage[]; +}) { + const { toolUse, toolResult } = pair; + const { agentMessagesCache, loadAgentMessages } = useAppStore(); + + // 如果是 Task tool,使用特殊渲染 + if (toolUse.name === TOOL_NAMES.TASK) { + const agentId = toolResult?.agentId; + const agentMessages = agentId ? agentMessagesCache[agentId] : undefined; + + // 加载 agent messages(如果未加载) + React.useEffect(() => { + if (agentId && !agentMessages) { + loadAgentMessages(agentId); + } + }, [agentId]); + + // 进行中 + if (!toolResult && agentMessages) { + return ( + + ); + } + + // 已完成 + if (toolResult && agentMessages) { + return ( + + ); + } + + // 加载中 + return ( + + Loading agent messages... + + ); + } + + // 其他工具的正常渲染 + return ; +} +``` + +#### 快捷键支持 + +```typescript +// 在 App.tsx 或 TextInput 中添加全局快捷键 +function useSubAgentHotkeys() { + const { input } = useInput((input, key) => { + // ctrl+o: 切换展开/折叠 + if (key.ctrl && input === 'o') { + // 触发最近的 SubAgent 组件的 toggle + // 可以通过 context 或 store 实现 + toggleNearestSubAgent(); + } + }); +} +``` + +#### 性能优化策略 + +**1. 智能截断** +```typescript +const VISIBLE_LIMIT = 3; // 默认只显示最后 3 条 +const MAX_MESSAGES_BEFORE_VIRTUALIZE = 50; // 超过 50 条启用虚拟滚动 +``` + +**2. 按需渲染** +```typescript +// 只渲染可见区域的消息 +{isExpanded && agentMessages.length > MAX_MESSAGES_BEFORE_VIRTUALIZE ? ( + +) : ( + agentMessages.map(msg => ) +)} +``` + +**3. 缓存计算** +```typescript +// 使用 useMemo 缓存统计信息 +const stats = React.useMemo(() => calculateStats(agentMessages), [agentMessages]); +``` + +## 实现步骤 + +### Phase 1: 核心隔离机制(P0) + +- [ ] **Step 1.1**: 修改 `src/paths.ts` + - 添加 `getAgentLogPath(agentId: string): string` + - 添加 `getAllAgentSessions(): Array<{ agentId, path, mtime }>` + +- [ ] **Step 1.2**: 扩展类型定义 + - 修改 `src/tool.ts` 扩展 `ToolResult` 添加 `metadata` 字段 + - 修改 `src/message.ts` 扩展 `ToolResultPart` 添加 `agentId` 和 `agentType` 字段 + +- [ ] **Step 1.3**: 修改 `src/agent/executor.ts` + - 生成 8 位 agentId (`randomUUID().slice(0, 8)`) + - 创建独立的 JsonlLogger 写入 `agent-{agentId}.jsonl` + - 在 `onMessage` 回调中写入独立 log 文件 + - **不包含实时回调**(留到 Phase 2) + +- [ ] **Step 1.4**: 修改 `src/tools/task.ts` + - 返回 `metadata: { agentId, agentType }` 在 ToolResult 中 + +- [ ] **Step 1.5**: 修改 `src/loop.ts` + - 保存 tool result 时提升 `agentId` 到 `ToolResultPart` 层级 + +- [ ] **测试**: 验证 sub-agent 消息写入独立文件,不进入主 agent 上下文 + +--- + +### Phase 2: 实时进度机制(P0)⭐ **核心功能** + +- [ ] **Step 2.1**: 定义类型 + - 在 `src/agent/types.ts` 添加 `AgentProgressData` 类型 + - 扩展 `AgentExecuteOptions` 添加 `onProgress` 回调字段 + +- [ ] **Step 2.2**: 修改 `src/agent/executor.ts` + - 接受 `onProgress` 参数 + - 在 `onMessage` 回调中调用 `onProgress(message, agentId)` + - 添加错误处理(try-catch 包裹 onProgress 调用) + +- [ ] **Step 2.3**: 修改 `src/agent/index.ts` (AgentManager) + - `executeTask` 方法接受 `onProgress` 参数 + - 传递给 `executeAgent` + +- [ ] **Step 2.4**: 修改 `src/tools/task.ts` + - `createTaskTool` 接受 `onProgress` 参数 + - 在 `execute` 方法中构造进度数据包 + - 调用传入的 `onProgress` 回调(包含 toolUseID) + +- [ ] **Step 2.5**: 修改 `src/loop.ts` + - 为 Task tool 注入 `onProgress` 回调 + - 回调内部调用 `messageBus.emitEvent('agent_progress', ...)` + - 添加 MessageBus 连接检查(降级处理) + +- [ ] **Step 2.6**: 修改 `src/ui/store.ts` + - 添加 `agentProgressMap: Record` 状态 + - 添加 `updateAgentProgress()` 方法 + - 添加 `clearAgentProgress()` 方法 + - 在 `initialize` 中监听 `bridge.onEvent('agent_progress')` + +- [ ] **测试**: + - 启动 SubAgent 后立即在 UI 看到 "Starting agent..." + - SubAgent 执行过程中实时更新消息列表 + - 统计信息(tool calls、tokens)动态增加 + +--- + +### Phase 3: 数据访问层(P1) + +- [ ] **Step 3.1**: 修改 `src/nodeBridge.ts` 添加 RPC handlers + - `agent.getMessages(cwd, agentId)` - 读取 agent session 历史 + - `agent.listSessions(cwd)` - 列出所有 agent sessions + - `agent.deleteSession(cwd, agentId)` - 清理 agent session(可选) + +- [ ] **测试**: + - 通过 RPC 正确读取 agent session + - 返回的消息包含完整的 metadata + +--- + +### Phase 4: UI 展示层(P1) + +- [ ] **Step 4.1**: 修改 `src/ui/Messages.tsx` + - 实现 `SubAgentProgress` 组件(进行中状态) + - 从 `agentProgressMap[toolUse.id]` 获取实时数据 + - 智能截断(默认显示最后 3 条) + - 统计信息(tool calls、tokens) + - 支持展开/折叠 + - 实现 `SubAgentCompleted` 组件(完成状态) + - 折叠时显示摘要 + - 展开时显示完整历史 + - 实现 `ParallelSubAgents` 组件(并行状态) + - 树状符号(├─ └─) + - 状态颜色(Running=黄色、Done=绿色、Failed=红色) + - 实现 `NestedAgentMessage` 组件(嵌套消息) + - 支持 user、assistant、tool 三种消息类型 + - 适配 SubAgent 的消息格式 + - 修改 `ToolUse` 组件 + - 识别 Task tool (`toolUse.name === TOOL_NAMES.TASK`) + - 根据 `toolResult` 是否存在路由到对应组件 + - 进行中 → `SubAgentProgress` + - 已完成 → `SubAgentCompleted` + +- [ ] **Step 4.2**: 添加 `src/ui/constants.ts` + - `VISIBLE_LIMIT = 3`(折叠时显示的消息数) + - `MAX_MESSAGES_BEFORE_VIRTUALIZE = 50`(虚拟滚动阈值) + +- [ ] **测试**: + - 进行中状态:实时更新、统计正确、截断生效 + - 完成状态:正确显示摘要和最终响应 + - 并行状态:树状结构清晰,状态颜色正确 + +--- + +### Phase 5: 优化与完善(P2) + + +- [ ] **Step 5.1**: 快捷键支持 + - 在 `App.tsx` 或 `TextInput` 中添加全局监听 + - `ctrl+o`: 切换当前 SubAgent 的展开/折叠 + - 通过 context 或 store 传递切换信号 + +- [ ] **Step 5.2**: 样式美化 + - 使用 Box 组件的 `borderStyle="round"` 绘制边框 + - 使用树状符号 `├─`、`└─` 渲染并行 agent + - 根据状态使用不同颜色(进行中=黄色、完成=绿色、失败=红色) + - 添加动画效果(可选,如 spinner) + +- [ ] **Step 5.3**: 统计信息优化 + - 使用 `useMemo` 缓存计算结果 + - 显示 tokens、duration、tool calls 统计 + - 显示执行进度百分比(可选) + +- [ ] **Step 5.4**: 虚拟滚动优化 + - 当消息超过 50 条时启用虚拟列表 + - 使用 `react-window` 或 Ink 的虚拟滚动方案 + - 优化滚动性能 + +- [ ] **Step 5.5**: 事件节流(可选) + - 在 `loop.ts` 中添加节流逻辑 + - 每 100ms 最多发送一次进度事件 + - 避免高频消息导致的性能问题 + +- [ ] **Step 5.6**: 后台运行支持(未来) + - 实现 `SubAgentBackgrounded` 组件 + - 支持将长时间运行的 agent 转入后台 + - 提供后台任务管理界面 + +- [ ] **Step 5.7**: 递归支持 + - Sub-agent 调用 sub-agent(嵌套 Task) + - 多层级的树状渲染 + - 每层都有独立的进度跟踪 + +- [ ] **测试**: + - 快捷键响应正确 + - 样式美观,状态颜色准确 + - 虚拟滚动在大量消息时生效 + - 统计信息准确且性能良好 + +--- + +### Phase 6: 集成测试与文档(P2) + +- [ ] **Step 6.1**: 端到端测试 + - 测试本地模式(DirectTransport) + - 测试 Server 模式(WebSocketTransport) + - 测试并发多个 SubAgent + - 测试嵌套 SubAgent(递归) + +- [ ] **Step 6.2**: 性能测试 + - 测试大量消息时的性能(>100 条) + - 测试快速产生消息时的事件处理 + - 测试内存占用(长时间运行) + +- [ ] **Step 6.3**: 错误场景测试 + - MessageBus 断开连接时的降级 + - onProgress 回调抛出异常 + - UI 层更新失败的容错 + +- [ ] **Step 6.4**: 用户文档 + - 更新用户手册,说明 SubAgent 进度展示功能 + - 添加 GIF 演示图 + - 说明快捷键和交互方式 + +- [ ] **Step 6.5**: 开发者文档 + - 更新架构文档,说明实时进度机制 + - 添加序列图和数据流图 + - 提供扩展指南(如何为其他工具添加进度支持) + + +## 关键技术要点 + +### 1. AgentID 生成 + +```typescript +const agentId = randomUUID().slice(0, 8); // 例如: "a1b2c3d4" +``` + +### 2. 上下文隔离验证 + +**主 agent 的 history 应该只包含:** +- User 消息 +- Assistant 消息(含 `tool_use: Task`) +- Tool 消息(含 `tool_result`,metadata 有 agentId) +- Assistant 最终响应 + +**不应该包含:** +- ❌ Sub-agent 的任何 `tool_use` +- ❌ Sub-agent 的任何 `tool_result` +- ❌ Sub-agent 的任何 assistant 消息 + +### 3. Metadata 传递链 + +``` +ToolResult.metadata.agentId + → loop.ts 保存时提升 + → ToolResultPart.agentId + → UI 读取 agentId + → RPC: agent.getMessages(agentId) + → 渲染 sub-agent 消息 +``` + +### 4. 性能考虑 + +- **按需加载**: 只在展开时加载 agent messages +- **缓存机制**: `agentMessagesCache` 避免重复加载 +- **虚拟滚动**: 大量消息时使用虚拟列表(Phase 4) + +### 5. 扩展性 + +支持递归 sub-agent: +- Sub-agent 可以再调用 Task tool +- 嵌套渲染多层 agent +- 每层都有独立的 `agent-{agentId}.jsonl` + +## 总结 + +本设计通过**独立会话隔离 + 实时进度回调**的方式,彻底解决了 sub-agent 上下文污染问题,同时实现了用户友好的实时进度展示。核心优势: + +### 架构优势 + +1. ✅ **完全隔离**: Sub-agent 消息不影响主 agent 的 LLM 上下文 +2. ✅ **统一架构**: 复用现有的 session、message、jsonl logger 机制 +3. ✅ **实时反馈**: onProgress 回调 + MessageBus 事件驱动实现毫秒级更新 +4. ✅ **跨端支持**: 透明支持本地模式和 Server 模式,无需修改 MessageBus 核心 +5. ✅ **按需加载**: 性能友好,实时数据在内存,历史数据按需从磁盘加载 +6. ✅ **可扩展**: 支持递归 sub-agent、清理、导出等功能 +7. ✅ **易于调试**: 每个 agent 有独立的日志文件,便于排查问题 + +### UI 设计亮点 + +**1. 层级化渲染** +- SubAgent 作为嵌套块,不是独立消息 +- 清晰的视觉边界(圆角边框) +- 树状符号表示并行关系 + +**2. 智能截断** +- 默认只显示最后 N 条,避免刷屏 +- "+N more tool uses" 提示隐藏内容 +- `ctrl+o` 快速切换展开/折叠 + +**3. 实时反馈** +- 进行中状态:动态更新统计栏(tool calls、tokens) +- 完成状态:显示摘要和最终响应 +- 后台状态:压缩显示,不占空间 + +**4. 性能优化** +- 实时数据:存储在 appStore,避免频繁 I/O +- 按需加载:展开完成的 SubAgent 时才请求历史数据 +- 缓存机制:避免重复请求 +- 虚拟滚动:大量消息时启用 + +### 实时进度机制亮点 + +**1. 事件驱动模型** +``` +SubAgent 产生消息 → onProgress 回调 → MessageBus 事件 → UI 更新 +``` +- 延迟极低(本地模式 < 10ms) +- 支持跨端场景 +- 统一的 API 接口 + +**2. 数据流清晰** +- 消息持久化:写入 `agent-{agentId}.jsonl` +- 实时通知:通过 onProgress 回调 +- 状态管理:appStore 按 toolUseID 索引 +- UI 渲染:React 自动响应状态变化 + +**3. 错误容忍** +- onProgress 失败不影响 SubAgent 执行 +- MessageBus 断开时降级为只写 log +- UI 更新失败时记录错误但不崩溃 + +**4. 可扩展性** +- 回调注入点在 `agentManager.executeTask`,影响范围小 +- 未来可为其他工具(如 bash、fetch)添加类似机制 +- 支持多层嵌套(SubAgent 调用 SubAgent) + +### 实现复杂度评估 + +| Phase | 工作量 | 风险 | 依赖 | +|-------|--------|------|------| +| Phase 1: 核心隔离 | 中等(2-3天) | 低 | 无 | +| Phase 2: 实时进度 | 中等(3-4天) | **中** | Phase 1 | +| Phase 3: 数据访问 | 低(1天) | 低 | Phase 1 | +| Phase 4: UI 展示 | 高(4-5天) | 中 | Phase 2, 3 | +| Phase 5: 优化完善 | 中等(2-3天) | 低 | Phase 4 | +| Phase 6: 测试文档 | 中等(2-3天) | 低 | All | + +**总计**: 约 14-21 天(单人全职开发) + +**关键风险点**: +- Phase 2 的回调链路较长,需要仔细测试跨端场景 +- Phase 4 的 UI 组件较多,需要确保渲染性能 +- MessageBus 事件频率控制(可能需要节流) + +### 技术债务 + +**已知限制**: +1. 实时进度数据不持久化(重启后丢失,但可从 log 恢复) +2. 暂不支持暂停/恢复 SubAgent +3. 虚拟滚动在 Ink 中实现较复杂(Phase 5) + +**未来改进**: +1. 支持 SubAgent 的中断和恢复 +2. 支持进度持久化到 session config +3. 支持实时编辑 SubAgent 的 prompt +4. 支持进度数据的流式导出(用于调试) + +--- + +### 与现有架构的兼容性 + +设计遵循了项目现有的架构模式: + +✅ **MessageBus 事件机制**: 复用 `emitEvent` / `onEvent`,无需新增 transport 逻辑 +✅ **NodeBridge RPC**: 复用 `registerHandler` / `request` 模式 +✅ **Zustand Store**: 按照现有模式添加状态和 actions +✅ **Ink 组件**: 沿用 Box、Text、useMemo 等惯用法 +✅ **JsonlLogger**: 复用独立文件隔离模式 + +修改点集中且清晰,实现复杂度可控。UI 设计参考了 Claude Code 的最佳实践,提供了流畅的用户体验。 + diff --git a/src/agent/builtin/common.ts b/src/agent/builtin/common.ts new file mode 100644 index 00000000..fe7a1572 --- /dev/null +++ b/src/agent/builtin/common.ts @@ -0,0 +1,20 @@ +export const TASK_TOOL_NAME = 'task'; +export const EDIT_TOOLS = ['edit', 'write']; + +export function buildDisallowedTools(...toolGroups: string[][]): string[] { + return Array.from(new Set(toolGroups.flat())); +} + +export const CONTEXT_NOTES = ` +IMPORTANT NOTES: +- Return file paths as absolute paths in your final response +- For clear communication, avoid using emojis +- Complete the task efficiently and report your findings clearly +`.trim(); + +export const THOROUGHNESS_LEVELS = ` +Thoroughness levels: +- "quick": 1-2 search attempts, check common locations +- "medium": 3-5 search attempts, try multiple naming patterns +- "very thorough": Comprehensive search across multiple locations and conventions +`.trim(); diff --git a/src/agent/builtin/explore.ts b/src/agent/builtin/explore.ts new file mode 100644 index 00000000..e00f8a53 --- /dev/null +++ b/src/agent/builtin/explore.ts @@ -0,0 +1,63 @@ +import { AGENT_TYPE } from '../../constants'; +import type { Context } from '../../context'; +import type { AgentDefinition } from '../types'; +import { + buildDisallowedTools, + CONTEXT_NOTES, + EDIT_TOOLS, + TASK_TOOL_NAME, + THOROUGHNESS_LEVELS, +} from './common'; + +export function createExploreAgent(opts: { + context: Context; +}): AgentDefinition { + const { context } = opts; + + return { + agentType: AGENT_TYPE.EXPLORE, + + whenToUse: + 'Fast agent specialized for exploring codebases. Use this when you need to quickly ' + + 'find files by patterns (e.g., "src/components/**/*.tsx"), search code for keywords ' + + '(e.g., "API endpoints"), or answer questions about codebase (e.g., "how do API ' + + 'endpoints work?"). When calling this agent, specify the desired thoroughness level: ' + + '"quick" for basic searches, "medium" for moderate exploration, or "very thorough" ' + + 'for comprehensive analysis across multiple locations and naming conventions.', + + systemPrompt: `You are a file search specialist, excelling at thoroughly navigating and exploring codebases. + +Your strengths: +- Rapidly finding files using glob patterns (glob tool) +- Searching code and text with powerful regex patterns (grep tool) +- Reading and analyzing file contents (read tool) +- Listing directory contents to understand structure (ls tool) + +Guidelines: +- Use glob for broad file pattern matching (e.g., "**/*.ts", "src/api/**/*.js") +- Use grep for searching file contents with regex (e.g., "class.*Component", "function.*async") +- Use read when you know the specific file path you need to read +- Use ls for exploring directory structures +- Adapt your search approach based on the thoroughness level specified by the caller + +${THOROUGHNESS_LEVELS} + +${CONTEXT_NOTES} + +RESTRICTIONS: +- Do NOT create any files or run commands that modify the user's system state in any way +- You cannot use editing tools (edit, write) - you are read-only +- You cannot spawn sub-agents (task tool is disabled) +`, + + model: context.config.smallModel || context.config.model, + + source: 'built-in', + + disallowedTools: buildDisallowedTools([TASK_TOOL_NAME], EDIT_TOOLS), + + forkContext: false, + + color: 'blue', + }; +} diff --git a/src/agent/builtin/index.ts b/src/agent/builtin/index.ts new file mode 100644 index 00000000..071f5a3f --- /dev/null +++ b/src/agent/builtin/index.ts @@ -0,0 +1,10 @@ +import type { Context } from '../../context'; +import type { AgentDefinition } from '../types'; +import { createExploreAgent } from './explore'; +import { createPlanAgent } from './plan'; + +export function getBuiltinAgents(opts: { + context: Context; +}): AgentDefinition[] { + return [createExploreAgent(opts), createPlanAgent(opts)]; +} diff --git a/src/agent/builtin/plan.ts b/src/agent/builtin/plan.ts new file mode 100644 index 00000000..547d1174 --- /dev/null +++ b/src/agent/builtin/plan.ts @@ -0,0 +1,66 @@ +import type { Context } from '../../context'; +import type { AgentDefinition } from '../types'; +import { + buildDisallowedTools, + CONTEXT_NOTES, + EDIT_TOOLS, + TASK_TOOL_NAME, + THOROUGHNESS_LEVELS, +} from './common'; + +export function createPlanAgent(opts: { context: Context }): AgentDefinition { + const { context } = opts; + + return { + agentType: 'Plan', + + whenToUse: + 'Powerful planning agent for complex codebase analysis and strategic thinking. Use this ' + + 'when you need deep architectural understanding, comprehensive project planning, or ' + + 'detailed analysis that requires reasoning across multiple files and concepts. This agent ' + + 'uses a more capable model than Explore, making it suitable for tasks requiring synthesis ' + + 'and higher-level understanding. Supports thoroughness levels: "quick", "medium", or "very thorough".', + + systemPrompt: `You are a strategic planning and analysis specialist for codebases. + +Your strengths: +- Deep architectural analysis and understanding +- Complex pattern recognition across codebases +- Strategic planning for code changes and refactoring +- Synthesizing information from multiple sources +- Reasoning about dependencies and relationships + +Available tools: +- glob: Find files using patterns +- grep: Search code contents with regex +- read: Read specific files +- ls: List directory contents + +Guidelines: +- Take time to understand the broader context before diving into details +- Consider architectural implications and dependencies +- Provide comprehensive analysis with clear reasoning +- When exploring, use systematic approaches to ensure thorough coverage +- Adapt your depth of analysis based on the thoroughness level specified + +${THOROUGHNESS_LEVELS} + +${CONTEXT_NOTES} + +RESTRICTIONS: +- Do NOT create any files or run commands that modify the user's system state in any way +- You cannot use editing tools (edit, write) - you are read-only +- You cannot spawn sub-agents (task tool is disabled) +`, + + model: context.config.planModel, + + source: 'built-in', + + disallowedTools: buildDisallowedTools([TASK_TOOL_NAME], EDIT_TOOLS), + + forkContext: false, + + color: 'purple', + }; +} diff --git a/src/agent/contextFork.test.ts b/src/agent/contextFork.test.ts new file mode 100644 index 00000000..a07a4b3e --- /dev/null +++ b/src/agent/contextFork.test.ts @@ -0,0 +1,110 @@ +import { describe, expect, test } from 'vitest'; +import type { NormalizedMessage } from '../message'; +import { prepareForkMessages } from './contextFork'; + +describe('ContextFork', () => { + test('should add context separator and task message', () => { + const result = prepareForkMessages([], 'Find all TypeScript files', [ + 'glob', + 'read', + ]); + + const separator = result.find( + (m) => + typeof m.content === 'string' && + m.content.includes('FORKING CONVERSATION CONTEXT'), + ); + expect(separator).toBeDefined(); + }); + + test('should filter out orphan tool results when tool use is filtered', () => { + const messages: NormalizedMessage[] = [ + { + role: 'assistant', + content: [ + { + type: 'text', + text: 'I will use some tools.', + }, + { + type: 'tool_use', + id: 'tool-1', + name: 'supported_tool', + input: {}, + }, + { + type: 'tool_use', + id: 'tool-2', + name: 'unsupported_tool', + input: {}, + }, + ], + type: 'message', + timestamp: new Date().toISOString(), + uuid: 'msg-1', + parentUuid: null, + text: 'I will use some tools.', + model: 'model-id', + usage: { input_tokens: 0, output_tokens: 0 }, + }, + { + role: 'tool', + content: [ + { + type: 'tool-result', + toolCallId: 'tool-1', + toolName: 'supported_tool', + input: {}, + result: { + llmContent: [{ type: 'text', text: 'Result 1' }], + }, + }, + ], + type: 'message', + timestamp: new Date().toISOString(), + uuid: 'msg-2', + parentUuid: 'msg-1', + }, + { + role: 'tool', + content: [ + { + type: 'tool-result', + toolCallId: 'tool-2', + toolName: 'unsupported_tool', + input: {}, + result: { + llmContent: [{ type: 'text', text: 'Result 2' }], + }, + }, + ], + type: 'message', + timestamp: new Date().toISOString(), + uuid: 'msg-3', + parentUuid: 'msg-1', + }, + ]; + + const result = prepareForkMessages(messages, 'Task', ['supported_tool']); + + // Find the assistant message + const assistantMsg = result.find((m) => m.role === 'assistant'); + expect(assistantMsg).toBeDefined(); + if (assistantMsg && Array.isArray(assistantMsg.content)) { + const toolUses = assistantMsg.content.filter( + (c) => c.type === 'tool_use', + ); + expect(toolUses).toHaveLength(1); + expect(toolUses[0].name).toBe('supported_tool'); + } + + // Find tool messages + const toolMessages = result.filter((m) => m.role === 'tool'); + // Should only have the one corresponding to supported_tool + expect(toolMessages).toHaveLength(1); + + if (toolMessages.length > 0 && Array.isArray(toolMessages[0].content)) { + expect(toolMessages[0].content[0].toolCallId).toBe('tool-1'); + } + }); +}); diff --git a/src/agent/contextFork.ts b/src/agent/contextFork.ts new file mode 100644 index 00000000..5505a6ba --- /dev/null +++ b/src/agent/contextFork.ts @@ -0,0 +1,107 @@ +import type { NormalizedMessage } from '../message'; +import { randomUUID } from '../utils/randomUUID'; + +export function prepareForkMessages( + parentMessages: NormalizedMessage[], + taskPrompt: string, + availableToolNames: string[], +): NormalizedMessage[] { + const normalizedMessages = normalizeMessagesForSubAgent( + parentMessages, + availableToolNames, + ); + + const contextSeparator: NormalizedMessage = { + role: 'user', + content: buildContextSeparatorMessage(availableToolNames), + type: 'message', + timestamp: new Date().toISOString(), + uuid: randomUUID(), + parentUuid: null, + }; + + const taskMessage: NormalizedMessage = { + role: 'user', + content: taskPrompt, + type: 'message', + timestamp: new Date().toISOString(), + uuid: randomUUID(), + parentUuid: null, + }; + + return [...normalizedMessages, contextSeparator, taskMessage]; +} + +function normalizeMessagesForSubAgent( + messages: NormalizedMessage[], + availableToolNames: string[], +): NormalizedMessage[] { + const availableToolSet = new Set(availableToolNames); + const keptToolUseIds = new Set(); + + return messages + .map((message) => { + if (message.role === 'assistant' && Array.isArray(message.content)) { + const filteredContent = message.content.filter((part) => { + if (part.type === 'text' || part.type === 'reasoning') { + return true; + } + if (part.type === 'tool_use') { + const isAvailable = availableToolSet.has(part.name); + if (isAvailable) { + keptToolUseIds.add(part.id); + } + return isAvailable; + } + return false; + }); + + if (filteredContent.length === 0) { + return { + ...message, + content: [ + { + type: 'text' as const, + text: '[Parent agent performed operations]', + }, + ], + }; + } + + return { ...message, content: filteredContent }; + } + + if (message.role === 'tool' && Array.isArray(message.content)) { + const filteredContent = message.content.filter((part) => { + return keptToolUseIds.has(part.toolCallId); + }); + return { ...message, content: filteredContent }; + } + + return message; + }) + .filter((message) => { + if (typeof message.content === 'string') { + return message.content.trim().length > 0; + } + if (Array.isArray(message.content)) { + return message.content.length > 0; + } + return true; + }); +} + +function buildContextSeparatorMessage(availableTools: string[]): string { + return `### FORKING CONVERSATION CONTEXT ### +### ENTERING SUB-AGENT ROUTINE ### + +IMPORTANT CONTEXT NOTES: +- The messages above are from the parent conversation thread, provided for context only +- You are now in a sub-agent context with LIMITED TOOLS +- Your available tools are: ${availableTools.join(', ')} +- Do NOT attempt to use tools from the parent context that are not in your tool list +- Parent tool operations have been summarized for your reference +- Focus solely on completing the specific task assigned to you below + +---`; +} diff --git a/src/agent/executor.test.ts b/src/agent/executor.test.ts new file mode 100644 index 00000000..fa4ea7a2 --- /dev/null +++ b/src/agent/executor.test.ts @@ -0,0 +1,39 @@ +import { describe, expect, test } from 'vitest'; +import { Context } from '../context'; +import type { Tool } from '../tool'; +import { executeAgent } from './executor'; +import type { AgentDefinition } from './types'; + +describe('executeAgent', () => { + test('should return error if agent has no available tools', async () => { + const context = await Context.create({ + cwd: process.cwd(), + productName: 'test', + version: '1.0.0', + argvConfig: {}, + plugins: [], + }); + + const definition: AgentDefinition = { + agentType: 'Test', + whenToUse: 'Test', + systemPrompt: 'Test', + model: 'test-model', + source: 'built-in', + disallowedTools: ['read', 'write', 'glob', 'grep'], + }; + + const result = await executeAgent({ + definition, + prompt: 'Test', + tools: [{ name: 'read' } as Tool, { name: 'write' } as Tool], + context, + cwd: '/test', + }); + + expect(result.status).toBe('failed'); + expect(result.content).toContain('no available tools'); + + await context.destroy(); + }); +}); diff --git a/src/agent/executor.ts b/src/agent/executor.ts new file mode 100644 index 00000000..d5b69601 --- /dev/null +++ b/src/agent/executor.ts @@ -0,0 +1,185 @@ +import { JsonlLogger } from '../jsonl'; +import { runLoop } from '../loop'; +import type { NormalizedMessage } from '../message'; +import { resolveModelWithContext } from '../model'; +import { loadSessionMessages, Session } from '../session'; +import { Tools } from '../tool'; +import { randomUUID } from '../utils/randomUUID'; +import { prepareForkMessages } from './contextFork'; +import { filterTools } from './toolFilter'; +import type { + AgentDefinition, + AgentExecuteOptions, + AgentExecutionResult, +} from './types'; + +export async function executeAgent( + options: AgentExecuteOptions, +): Promise { + const { + definition, + prompt, + tools, + context, + model, + forkContextMessages, + cwd, + signal, + onProgress, + resume, + } = options; + + const startTime = Date.now(); + + const agentId = (() => { + if (resume) { + return resume; + } + return Session.createSessionId(); + })(); + + const agentLogPath = context.paths.getAgentLogPath(agentId); + const agentLogger = new JsonlLogger({ filePath: agentLogPath }); + + try { + // Validate Agent definition + if (!definition.agentType) { + throw new Error('Agent definition must have agentType'); + } + if (!definition.systemPrompt) { + throw new Error(`Agent '${definition.agentType}' must have systemPrompt`); + } + + // Filter tools + const filteredToolList = filterTools(tools, definition); + + if (filteredToolList.length === 0) { + throw new Error( + `Agent '${definition.agentType}' has no available tools after filtering.`, + ); + } + + const toolNames = filteredToolList.map((t) => t.name); + + // Prepare messages + const messages = [ + ...loadSessionMessages({ logPath: agentLogPath }), + ...prepareMessages(prompt, definition, forkContextMessages, toolNames), + ]; + + // Resolve model + const modelName = model || definition.model; + + if (!modelName) { + throw new Error(`No model specified for agent '${definition.agentType}'`); + } + + const resolvedModelResult = await resolveModelWithContext( + modelName, + context, + ); + + if (!resolvedModelResult.model) { + throw new Error( + `Failed to resolve model '${modelName}' for agent '${definition.agentType}'`, + ); + } + + // Execute loop + // TODO: Can we directly reuse project.send? + const loopResult = await runLoop({ + input: messages, + model: resolvedModelResult.model, + tools: new Tools(filteredToolList), + cwd, + systemPrompt: definition.systemPrompt, + signal, + maxTurns: 50, + onMessage: async (message) => { + const normalizedMessage: NormalizedMessage & { sessionId: string } = { + ...message, + sessionId: agentId, + metadata: { + ...(message.metadata || {}), + agentId, + agentType: definition.agentType, + }, + }; + + agentLogger.addMessage({ message: normalizedMessage }); + + if (onProgress) { + try { + await onProgress(normalizedMessage, agentId); + } catch (error) { + console.error('[executeAgent] Failed to send progress:', error); + } + } + }, + }); + + // Handle result + if (loopResult.success) { + return { + status: 'completed', + agentId, + content: extractFinalContent(loopResult.data), + totalToolCalls: loopResult.metadata.toolCallsCount, + totalDuration: Date.now() - startTime, + usage: { + inputTokens: loopResult.data.usage?.promptTokens || 0, + outputTokens: loopResult.data.usage?.completionTokens || 0, + }, + }; + } + return { + status: 'failed', + agentId, + content: `Agent execution failed: ${loopResult.error.message}`, + totalToolCalls: 0, + totalDuration: Date.now() - startTime, + usage: { inputTokens: 0, outputTokens: 0 }, + }; + } catch (error) { + return { + status: 'failed', + agentId, + content: `Agent execution error: ${error instanceof Error ? error.message : String(error)}`, + totalToolCalls: 0, + totalDuration: Date.now() - startTime, + usage: { inputTokens: 0, outputTokens: 0 }, + }; + } +} + +function prepareMessages( + prompt: string, + definition: AgentDefinition, + forkContextMessages: NormalizedMessage[] | undefined, + availableToolNames: string[], +): NormalizedMessage[] { + if (definition.forkContext && forkContextMessages) { + return prepareForkMessages(forkContextMessages, prompt, availableToolNames); + } + + return [ + { + role: 'user', + content: prompt, + type: 'message', + timestamp: new Date().toISOString(), + uuid: randomUUID(), + parentUuid: null, + }, + ]; +} + +function extractFinalContent(data: Record): string { + if (data.text && typeof data.text === 'string') { + return data.text; + } + if (data.content && typeof data.content === 'string') { + return data.content; + } + return 'Agent completed successfully'; +} diff --git a/src/agent/index.ts b/src/agent/index.ts new file mode 100644 index 00000000..f4d1e082 --- /dev/null +++ b/src/agent/index.ts @@ -0,0 +1,105 @@ +import type { Context } from '../context'; +import type { NormalizedMessage } from '../message'; +import type { Tool } from '../tool'; +import { getBuiltinAgents } from './builtin'; +import { executeAgent } from './executor'; +import type { + AgentDefinition, + AgentExecuteOptions, + AgentExecutionResult, + TaskToolInput, +} from './types'; + +export class AgentManager { + private agents: Map = new Map(); + private context: Context; + + constructor(opts: { context: Context }) { + this.context = opts.context; + this.registerBuiltinAgents(); + } + + private registerBuiltinAgents(): void { + const builtinAgents = getBuiltinAgents({ context: this.context }); + for (const agent of builtinAgents) { + this.agents.set(agent.agentType, agent); + } + } + + registerAgent(definition: AgentDefinition): void { + if (!definition.agentType) { + throw new Error('Agent definition must have agentType'); + } + if (!definition.systemPrompt) { + throw new Error('Agent definition must have systemPrompt'); + } + + this.agents.set(definition.agentType, definition); + } + + getAgent(agentType: string): AgentDefinition | undefined { + return this.agents.get(agentType); + } + + getAllAgents(): AgentDefinition[] { + return Array.from(this.agents.values()); + } + + getAgentTypes(): string[] { + return Array.from(this.agents.keys()); + } + + async executeTask( + input: TaskToolInput, + context: { + tools: Tool[]; + cwd: string; + signal?: AbortSignal; + forkContextMessages?: NormalizedMessage[]; + onProgress?: ( + message: NormalizedMessage, + agentId: string, + ) => void | Promise; + }, + ): Promise { + const definition = this.agents.get(input.subagent_type); + if (!definition) { + const availableTypes = this.getAgentTypes().join(', '); + throw new Error( + `Agent type '${input.subagent_type}' not found. Available agents: ${availableTypes}`, + ); + } + + const executeOptions: AgentExecuteOptions = { + definition, + prompt: input.prompt, + tools: context.tools, + context: this.context, + model: input.model, + resume: input.resume, + forkContextMessages: definition.forkContext + ? context.forkContextMessages + : undefined, + cwd: context.cwd, + signal: context.signal, + onProgress: context.onProgress, + }; + + return executeAgent(executeOptions); + } + + getAgentDescriptions(): string { + const descriptions = this.getAllAgents() + .map((agent) => { + return `- ${agent.agentType}: ${agent.whenToUse ?? 'This subagent should only be called manually by the user.'}`; + }) + .join('\n'); + + return `${descriptions}`; + } +} + +export { prepareForkMessages } from './contextFork'; +export { executeAgent } from './executor'; +export { filterTools } from './toolFilter'; +export * from './types'; diff --git a/src/agent/integration.test.ts b/src/agent/integration.test.ts new file mode 100644 index 00000000..fa774a43 --- /dev/null +++ b/src/agent/integration.test.ts @@ -0,0 +1,37 @@ +import { beforeEach, describe, expect, test } from 'vitest'; +import { Context } from '../context'; +import { AgentManager } from './index'; + +describe('Agent Integration', () => { + let context: Context; + let agentManager: AgentManager; + + beforeEach(async () => { + context = await Context.create({ + cwd: process.cwd(), + productName: 'test', + version: '1.0.0', + argvConfig: {}, + plugins: [], + }); + + agentManager = new AgentManager({ context }); + }); + + test('should register builtin agents', () => { + const exploreAgent = agentManager.getAgent('Explore'); + expect(exploreAgent).toBeDefined(); + expect(exploreAgent?.agentType).toBe('Explore'); + expect(exploreAgent?.disallowedTools).toContain('task'); + + const planAgent = agentManager.getAgent('Plan'); + expect(planAgent).toBeDefined(); + expect(planAgent?.agentType).toBe('Plan'); + }); + + test('should get agent descriptions', () => { + const descriptions = agentManager.getAgentDescriptions(); + expect(descriptions).toContain('Explore'); + expect(descriptions).toContain('Plan'); + }); +}); diff --git a/src/agent/toolFilter.test.ts b/src/agent/toolFilter.test.ts new file mode 100644 index 00000000..effcc413 --- /dev/null +++ b/src/agent/toolFilter.test.ts @@ -0,0 +1,26 @@ +import { describe, expect, test } from 'vitest'; +import type { Tool } from '../tool'; +import { filterTools } from './toolFilter'; +import type { AgentDefinition } from './types'; + +describe('ToolFilter', () => { + test('should filter out disallowed tools', () => { + const mockTools: Tool[] = [ + { name: 'read' } as Tool, + { name: 'write' } as Tool, + { name: 'task' } as Tool, + ]; + + const agentDef: AgentDefinition = { + agentType: 'Explore', + whenToUse: 'Test', + systemPrompt: 'Test', + model: 'test-model', + source: 'built-in', + disallowedTools: ['write', 'task'], + }; + + const filtered = filterTools(mockTools, agentDef); + expect(filtered.map((t) => t.name)).toEqual(['read']); + }); +}); diff --git a/src/agent/toolFilter.ts b/src/agent/toolFilter.ts new file mode 100644 index 00000000..16a86196 --- /dev/null +++ b/src/agent/toolFilter.ts @@ -0,0 +1,21 @@ +import type { Tool } from '../tool'; +import type { AgentDefinition } from './types'; + +export function filterTools( + allTools: Tool[], + agentDef: AgentDefinition, +): Tool[] { + const { tools, disallowedTools } = agentDef; + const disallowedSet = new Set(disallowedTools || []); + const hasWildcard = + tools === undefined || (tools.length === 1 && tools[0] === '*'); + + if (hasWildcard) { + return allTools.filter((tool) => !disallowedSet.has(tool.name)); + } + + const allowedSet = new Set(tools); + return allTools.filter( + (tool) => allowedSet.has(tool.name) && !disallowedSet.has(tool.name), + ); +} diff --git a/src/agent/types.ts b/src/agent/types.ts new file mode 100644 index 00000000..225bb645 --- /dev/null +++ b/src/agent/types.ts @@ -0,0 +1,78 @@ +import type { Context } from '../context'; +import type { NormalizedMessage } from '../message'; +import type { Tool } from '../tool'; + +export interface AgentDefinition { + agentType: string; + whenToUse: string; + systemPrompt: string; + model: string; + source: 'built-in' | 'plugin' | 'user'; + tools?: string[]; + disallowedTools?: string[]; + forkContext?: boolean; + color?: string; +} + +export interface TaskToolInput { + description: string; + prompt: string; + subagent_type: string; + model?: string; + resume?: string; +} + +export interface AgentExecutionResult { + status: 'completed' | 'failed'; + agentId: string; + content: string; + totalToolCalls: number; + totalDuration: number; + usage: { + inputTokens: number; + outputTokens: number; + }; +} + +export interface AgentExecuteOptions { + definition: AgentDefinition; + prompt: string; + tools: Tool[]; + context: Context; + model?: string; + forkContextMessages?: NormalizedMessage[]; + cwd: string; + signal?: AbortSignal; + resume?: string; + onProgress?: ( + message: NormalizedMessage, + agentId: string, + ) => void | Promise; +} + +/** + * Real-time progress data for SubAgent execution + * Used to track and display SubAgent progress in the UI + */ +export interface AgentProgressData { + /** The tool use ID that triggered this SubAgent (e.g., "task-1") */ + toolUseID: string; + /** Unique identifier for the SubAgent instance */ + agentId: string; + /** The latest message produced by the SubAgent */ + message: NormalizedMessage; + /** Timestamp when this progress update was created */ + timestamp: number; +} + +/** + * Event payload for agent_progress events sent through MessageBus + */ +export interface AgentProgressEvent { + /** Session ID of the parent agent */ + sessionId: string; + /** Current working directory */ + cwd: string; + /** Progress data payload */ + progressData: AgentProgressData; +} diff --git a/src/constants.ts b/src/constants.ts index 31d89661..17385ae3 100644 --- a/src/constants.ts +++ b/src/constants.ts @@ -23,10 +23,20 @@ export enum TOOL_NAMES { BASH = 'bash', BASH_OUTPUT = 'bash_output', KILL_BASH = 'kill_bash', + TASK = 'task', + READ = 'read', + GLOB = 'glob', + WRITE = 'write', GREP = 'grep', + EDIT = 'edit', ASK_USER_QUESTION = 'AskUserQuestion', } +export enum AGENT_TYPE { + EXPLORE = 'Explore', + PLAN = 'Plan', +} + export const BASH_EVENTS = { PROMPT_BACKGROUND: 'bash:prompt_background', MOVE_TO_BACKGROUND: 'bash:move_to_background', diff --git a/src/context.ts b/src/context.ts index 5ebfe0c4..06989550 100644 --- a/src/context.ts +++ b/src/context.ts @@ -2,18 +2,19 @@ import fs from 'fs'; import { createJiti } from 'jiti'; import path from 'pathe'; import resolve from 'resolve'; +import { AgentManager } from './agent'; import { BackgroundTaskManager } from './backgroundTaskManager'; import { type Config, ConfigManager } from './config'; import { MCPManager } from './mcp'; import type { MessageBus } from './messageBus'; import { Paths } from './paths'; -import { SkillManager } from './skill'; import { type Plugin, type PluginApplyOpts, PluginHookType, PluginManager, } from './plugin'; +import { SkillManager } from './skill'; type ContextOpts = { cwd: string; @@ -28,6 +29,7 @@ type ContextOpts = { backgroundTaskManager: BackgroundTaskManager; skillManager: SkillManager; messageBus?: MessageBus; + agentManager?: AgentManager; plugins: (string | Plugin)[]; }; @@ -54,8 +56,8 @@ export class Context { backgroundTaskManager: BackgroundTaskManager; skillManager: SkillManager; messageBus?: MessageBus; + agentManager?: AgentManager; plugins: (string | Plugin)[]; - constructor(opts: ContextOpts) { this.cwd = opts.cwd; this.productName = opts.productName; @@ -69,6 +71,7 @@ export class Context { this.backgroundTaskManager = opts.backgroundTaskManager; this.skillManager = opts.skillManager; this.messageBus = opts.messageBus; + this.agentManager = opts.agentManager; this.plugins = opts.plugins; } @@ -139,9 +142,12 @@ export class Context { }; const mcpManager = MCPManager.create(mcpServers); const backgroundTaskManager = new BackgroundTaskManager(); + + // Create Context first without AgentManager const skillManager = new SkillManager({ paths }); await skillManager.loadSkills(); - return new Context({ + + const context = new Context({ cwd, productName, productASCIIArt, @@ -156,6 +162,12 @@ export class Context { messageBus: opts.messageBus, plugins: pluginsConfigs, }); + + // Create and attach AgentManager + const agentManager = new AgentManager({ context }); + context.agentManager = agentManager; + + return context; } } @@ -186,7 +198,7 @@ function scanPlugins(pluginDir: string): string[] { return files .filter((file) => file.endsWith('.js') || file.endsWith('.ts')) .map((file) => path.join(pluginDir, file)); - } catch (error) { + } catch (_error) { return []; } } diff --git a/src/loop.ts b/src/loop.ts index 07f99ac7..7a17e64f 100644 --- a/src/loop.ts +++ b/src/loop.ts @@ -8,10 +8,11 @@ import type { import createDebug from 'debug'; import { At } from './at'; import { History, type OnMessage } from './history'; -import type { - AssistantContent, - NormalizedMessage, - ToolUsePart, +import { + type AssistantContent, + createToolResultPart2, + type NormalizedMessage, + type ToolUsePart, } from './message'; import type { ModelInfo } from './model'; import { addPromptCache } from './promptCache'; @@ -493,7 +494,12 @@ export async function runLoop(opts: RunLoopOpts): Promise { break; } - const toolResults: any[] = []; + const toolResults: { + toolCallId: string; + toolName: string; + input: Record; + result: ToolResult; + }[] = []; for (const toolCall of toolCalls) { let toolUse: ToolUse = { name: toolCall.toolName, @@ -553,16 +559,15 @@ export async function runLoop(opts: RunLoopOpts): Promise { }); await history.addMessage({ role: 'tool', - content: toolResults.map((tr) => { - return { - type: 'tool-result', - toolCallId: tr.toolCallId, - toolName: tr.toolName, - input: tr.input, - result: tr.result, - }; - }), - } as any); + content: toolResults.map((tr) => + createToolResultPart2( + tr.toolCallId, + tr.toolName, + tr.input, + tr.result, + ), + ), + }); return { success: false, error: { @@ -580,16 +585,15 @@ export async function runLoop(opts: RunLoopOpts): Promise { if (toolResults.length) { await history.addMessage({ role: 'tool', - content: toolResults.map((tr) => { - return { - type: 'tool-result', - toolCallId: tr.toolCallId, - toolName: tr.toolName, - input: tr.input, - result: tr.result, - }; - }), - } as any); + content: toolResults.map((tr) => + createToolResultPart2( + tr.toolCallId, + tr.toolName, + tr.input, + tr.result, + ), + ), + }); } } const duration = Date.now() - startTime; diff --git a/src/message.ts b/src/message.ts index 7767685f..8836e551 100644 --- a/src/message.ts +++ b/src/message.ts @@ -72,6 +72,8 @@ export type ToolResultPart2 = { toolName: string; input: Record; result: ToolResult; + agentId?: string; + agentType?: string; }; export type ToolContent = Array; export type ToolResultPart = { @@ -80,6 +82,8 @@ export type ToolResultPart = { name: string; input: Record; result: ToolResult; + agentId?: string; + agentType?: string; }; export type Message = @@ -94,6 +98,11 @@ export type NormalizedMessage = Message & { uuid: string; parentUuid: string | null; uiContent?: string; + metadata?: { + agentId?: string; + agentType?: string; + [key: string]: any; + }; }; export type SDKSystemMessage = { @@ -124,7 +133,31 @@ export function toolResultPart2ToToolResultPart( name: part.toolName, input: part.input, result: part.result, + agentId: part.agentId, + agentType: part.agentType, + }; +} + +export function createToolResultPart2( + toolCallId: string, + toolName: string, + input: Record, + result: ToolResult, +): ToolResultPart2 { + const part: ToolResultPart2 = { + type: 'tool-result', + toolCallId, + toolName, + input, + result, }; + + if (result.metadata?.agentId) { + part.agentId = result.metadata.agentId; + part.agentType = result.metadata.agentType; + } + + return part; } export function createUserMessage( diff --git a/src/nodeBridge.ts b/src/nodeBridge.ts index 10f91f78..ab4fd9d9 100644 --- a/src/nodeBridge.ts +++ b/src/nodeBridge.ts @@ -2354,7 +2354,6 @@ ${diff} const targetApps = appsToCheck || [...allApps]; const installedApps = targetApps.filter(checkApp); - return { success: true, data: { apps: installedApps } }; }); } diff --git a/src/paths.ts b/src/paths.ts index 39489ec4..884991a4 100644 --- a/src/paths.ts +++ b/src/paths.ts @@ -121,6 +121,14 @@ export class Paths { getGlobalDataPath() { return path.join(this.globalConfigDir, 'data.json'); } + + /** + * Get the dedicated log path for a sub-agent. + * Format: ~/.neovate/projects/{project}/agent-{agentId}.jsonl + */ + getAgentLogPath(agentId: string): string { + return path.join(this.globalProjectDir, `agent-${agentId}.jsonl`); + } } function normalizeSummary(summary: string): string { diff --git a/src/project.ts b/src/project.ts index 2425b0f5..d473a831 100644 --- a/src/project.ts +++ b/src/project.ts @@ -1,3 +1,4 @@ +import { TOOL_NAMES } from './constants'; import type { Context } from './context'; import { JsonlLogger, RequestLogger } from './jsonl'; import { LlmsContext } from './llmsContext'; @@ -56,6 +57,7 @@ export class Project { write: true, todo: true, askUserQuestion: !this.context.config.quiet, + signal: opts.signal, }); tools = await this.context.apply({ hook: 'tool', @@ -68,11 +70,13 @@ export class Project { this.context.config.outputStyle, this.context.cwd, ); + const hasTaskTool = tools.some((t) => t.name === TOOL_NAMES.TASK); let systemPrompt = generateSystemPrompt({ todo: this.context.config.todo!, productName: this.context.productName, language: this.context.config.language, outputStyle, + task: hasTaskTool, }); systemPrompt = await this.context.apply({ hook: 'systemPrompt', @@ -107,6 +111,7 @@ export class Project { write: false, todo: false, askUserQuestion: !this.context.config.quiet, + signal: opts.signal, }); tools = await this.context.apply({ hook: 'tool', diff --git a/src/system-integration.test.ts b/src/system-integration.test.ts new file mode 100644 index 00000000..e40f3f0f --- /dev/null +++ b/src/system-integration.test.ts @@ -0,0 +1,60 @@ +import { beforeEach, describe, expect, test } from 'vitest'; +import { Context } from './context'; +import { resolveTools } from './tool'; + +describe('System Integration', () => { + let context: Context; + + beforeEach(async () => { + context = await Context.create({ + cwd: process.cwd(), + productName: 'test', + version: '1.0.0', + argvConfig: {}, + plugins: [], + }); + }); + + test('should create context with agentManager', () => { + expect(context.agentManager).toBeDefined(); + expect(context.agentManager?.getAgentTypes()).toContain('Explore'); + expect(context.agentManager?.getAgentTypes()).toContain('Plan'); + }); + + test('should include task tool in resolveTools', async () => { + const tools = await resolveTools({ + context, + sessionId: 'test-session', + write: true, + todo: true, + }); + + const taskTool = tools.find((t) => t.name === 'task'); + expect(taskTool).toBeDefined(); + expect(taskTool?.description).toContain('Launch a new agent'); + }); + + test('task tool should not be included when agentManager is undefined', async () => { + const contextWithoutAgents = await Context.create({ + cwd: process.cwd(), + productName: 'test', + version: '1.0.0', + argvConfig: {}, + plugins: [], + }); + + // Remove agentManager to test fallback + contextWithoutAgents.agentManager = undefined; + + const tools = await resolveTools({ + context: contextWithoutAgents, + sessionId: 'test-session', + write: true, + }); + + const taskTool = tools.find((t) => t.name === 'task'); + expect(taskTool).toBeUndefined(); + + await contextWithoutAgents.destroy(); + }); +}); diff --git a/src/systemPrompt.ts b/src/systemPrompt.ts index 99cd17bd..6d504c45 100644 --- a/src/systemPrompt.ts +++ b/src/systemPrompt.ts @@ -1,4 +1,4 @@ -import { TOOL_NAMES } from './constants'; +import { AGENT_TYPE, TOOL_NAMES } from './constants'; import type { OutputStyle } from './outputStyle'; function getTasksPrompt(opts: { todo: boolean; productName: string }) { @@ -66,12 +66,38 @@ IMPORTANT: Always use the ${TOOL_NAMES.TODO_WRITE} tool to plan and track tasks `; } +function getToolUsagePolicyPrompt(task: boolean) { + const taskPolicy = task + ? ` +- When doing file search, prefer to use the ${TOOL_NAMES.TASK} tool in order to reduce context usage. +- You should proactively use the ${TOOL_NAMES.TASK} tool with specialized agents when the task at hand matches the agent's description. +- If the user specifies that they want you to run tools "in parallel", you MUST send a single message with multiple tool use content blocks. For example, if you need to launch multiple agents in parallel, send a single message with multiple ${TOOL_NAMES.TASK} tool calls. +- VERY IMPORTANT: When exploring the codebase to gather context or to answer a question that is not a needle query for a specific file/class/function, it is CRITICAL that you use the ${TOOL_NAMES.TASK} tool with subagent_type=${AGENT_TYPE.EXPLORE} instead of running search commands directly. + +user: Where are errors from the client handled? +assistant: [Uses the ${TOOL_NAMES.TASK} tool with subagent_type=${AGENT_TYPE.EXPLORE} to find the files that handle client errors instead of using ${TOOL_NAMES.GLOB} or ${TOOL_NAMES.GREP} directly] + + +user: What is the codebase structure? +assistant: [Uses the ${TOOL_NAMES.TASK} tool with subagent_type=${AGENT_TYPE.EXPLORE}] +` + : ''; + + return ` +# Tool usage policy${taskPolicy} +- When fetch returns a message about a redirect to a different host, you should immediately make a new fetch request with the redirect URL provided in the response. +- You can call multiple tools in a single response. If you intend to call multiple tools and there are no dependencies between them, make all independent tool calls in parallel. Maximize use of parallel tool calls where possible to increase efficiency. However, if some tool calls depend on previous calls to inform dependent values, do NOT call these tools in parallel and instead call them sequentially. For instance, if one operation must complete before another starts, run these operations sequentially instead. Never use placeholders or guess missing parameters in tool calls. +- Use specialized tools instead of bash commands when possible, as this provides a better user experience. For file operations, use dedicated tools: ${TOOL_NAMES.READ} for reading files instead of cat/head/tail, ${TOOL_NAMES.EDIT} for editing instead of sed/awk, and ${TOOL_NAMES.WRITE} for creating files instead of cat with heredoc or echo redirection. Reserve bash tools exclusively for actual system commands and terminal operations that require shell execution. NEVER use bash echo or other command-line tools to communicate thoughts, explanations, or instructions to the user. Output all communication directly in your response text instead. + `; +} + export function generateSystemPrompt(opts: { todo: boolean; productName: string; language?: string; appendSystemPrompt?: string; outputStyle: OutputStyle; + task?: boolean; }) { const { outputStyle } = opts; const isDefaultOutputStyle = outputStyle.isDefault(); @@ -143,6 +169,8 @@ ${getTasksPrompt(opts)}` : '' } +${getToolUsagePolicyPrompt(opts.task ?? true)} + ${opts.appendSystemPrompt ? opts.appendSystemPrompt : ''} `.trim(); } diff --git a/src/tool.test.ts b/src/tool.test.ts index 258eb925..ddeab0d7 100644 --- a/src/tool.test.ts +++ b/src/tool.test.ts @@ -5,6 +5,7 @@ import { resolveTools } from './tool'; describe('resolveTools with tools config', () => { const createMockContext = ( toolsConfig?: Record, + agentManager?: any, ): Context => { return { cwd: '/test', @@ -25,6 +26,7 @@ describe('resolveTools with tools config', () => { tools: toolsConfig, }, backgroundTaskManager: {} as any, + agentManager: agentManager, messageBus: { onEvent: vi.fn(), } as any, @@ -144,4 +146,42 @@ describe('resolveTools with tools config', () => { expect(toolNames).toContain('read'); expect(toolNames).toContain('write'); }); + + test('should filter out task tool if disabled', async () => { + const context = createMockContext( + { + task: false, + }, + { + getAgentDescriptions: () => 'test agent', + }, + ); + const tools = await resolveTools({ + context, + sessionId: 'test-session', + write: true, + todo: true, + }); + + const toolNames = tools.map((t) => t.name); + expect(toolNames).not.toContain('task'); + }); + + test('should include task tool if enabled and agentManager present', async () => { + const context = createMockContext( + {}, + { + getAgentDescriptions: () => 'test agent', + }, + ); + const tools = await resolveTools({ + context, + sessionId: 'test-session', + write: true, + todo: true, + }); + + const toolNames = tools.map((t) => t.name); + expect(toolNames).toContain('task'); + }); }); diff --git a/src/tool.ts b/src/tool.ts index 65f89801..4afcf195 100644 --- a/src/tool.ts +++ b/src/tool.ts @@ -2,7 +2,7 @@ import type { LanguageModelV2FunctionTool } from '@ai-sdk/provider'; import path from 'pathe'; import * as z from 'zod'; import type { Context } from './context'; -import type { ImagePart, TextPart } from './message'; +import type { ImagePart, NormalizedMessage, TextPart } from './message'; import { resolveModelWithContext } from './model'; import { createAskUserQuestionTool } from './tools/askUserQuestion'; import { @@ -17,6 +17,7 @@ import { createGrepTool } from './tools/grep'; import { createLSTool } from './tools/ls'; import { createReadTool } from './tools/read'; import { createSkillTool } from './tools/skill'; +import { createTaskTool } from './tools/task'; import { createTodoTool, type TodoItem } from './tools/todo'; import { createWriteTool } from './tools/write'; @@ -25,7 +26,9 @@ type ResolveToolsOpts = { sessionId: string; write?: boolean; todo?: boolean; + getCurrentMessages?: () => NormalizedMessage[]; askUserQuestion?: boolean; + signal?: AbortSignal; }; export async function resolveTools(opts: ResolveToolsOpts) { @@ -77,6 +80,7 @@ export async function resolveTools(opts: ResolveToolsOpts) { }), ] : []; + const mcpTools = await getMcpTools(opts.context); const allTools = [ @@ -89,15 +93,32 @@ export async function resolveTools(opts: ResolveToolsOpts) { ]; const toolsConfig = opts.context.config.tools; - if (!toolsConfig || Object.keys(toolsConfig).length === 0) { - return allTools; - } + const availableTools = (() => { + if (!toolsConfig || Object.keys(toolsConfig).length === 0) { + return allTools; + } + return allTools.filter((tool) => { + // Check if the tool is disabled (only explicitly set to false will disable) + const isDisabled = toolsConfig[tool.name] === false; + return !isDisabled; + }); + })(); - return allTools.filter((tool) => { - // Check if the tool is disabled (only explicitly set to false will disable) - const isDisabled = toolsConfig[tool.name] === false; - return !isDisabled; - }); + const taskTools = (() => { + if (!opts.context.agentManager) return []; + const tool = createTaskTool({ + context: opts.context, + tools: availableTools, + sessionId: opts.sessionId, + signal: opts.signal, + }); + if (toolsConfig && toolsConfig[tool.name] === false) { + return []; + } + return [tool]; + })(); + + return [...availableTools, ...taskTools]; } async function getMcpTools(context: Context): Promise { @@ -171,7 +192,7 @@ export class Tools { : 0; const desc = limit > 0 && tool.description.length > limit - ? tool.description.slice(0, limit - 3) + '...' + ? `${tool.description.slice(0, limit - 3)}...` : tool.description; return { type: 'function', @@ -283,6 +304,11 @@ export type ToolResult = { llmContent: string | (TextPart | ImagePart)[]; returnDisplay?: ReturnDisplay; isError?: boolean; + metadata?: { + agentId?: string; + agentType?: string; + [key: string]: any; + }; }; export function createTool(config: { diff --git a/src/tools/glob.ts b/src/tools/glob.ts index bae21c30..4e9f3a54 100644 --- a/src/tools/glob.ts +++ b/src/tools/glob.ts @@ -1,5 +1,6 @@ import { glob } from 'glob'; import { z } from 'zod'; +import { TOOL_NAMES } from '../constants'; import { createTool } from '../tool'; import { safeStringify } from '../utils/safeStringify'; @@ -7,7 +8,7 @@ const LIMIT = 100; export function createGlobTool(opts: { cwd: string }) { return createTool({ - name: 'glob', + name: TOOL_NAMES.GLOB, description: ` Glob - Fast file pattern matching tool that works with any codebase size diff --git a/src/tools/grep.ts b/src/tools/grep.ts index 39554d93..1f318272 100644 --- a/src/tools/grep.ts +++ b/src/tools/grep.ts @@ -11,7 +11,15 @@ const DEFAULT_LIMIT = 1000; export function createGrepTool(opts: { cwd: string }) { return createTool({ name: TOOL_NAMES.GREP, - description: `Search for a pattern in a file or directory.`, + description: ` + Search for a pattern in a file or directory. + + Usage: + - ALWAYS use ${TOOL_NAMES.GREP} for search tasks. NEVER invoke \`grep\` or \`rg\` as a ${TOOL_NAMES.BASH} command. The ${TOOL_NAMES.GREP} tool has been optimized for correct permissions and access. + - Supports full regex syntax (e.g., "log.*Error", "function\\s+\\w+") + - Filter files with glob parameter (e.g., "*.js", "**/*.tsx") or type parameter (e.g., "js", "py", "rust") + - Use ${TOOL_NAMES.TASK} tool for open-ended searches requiring multiple rounds + `, parameters: z.object({ pattern: z.string().describe('The pattern to search for'), search_path: z.string().optional().describe('The path to search in'), diff --git a/src/tools/read.ts b/src/tools/read.ts index 55bb0c98..b58f26ec 100644 --- a/src/tools/read.ts +++ b/src/tools/read.ts @@ -2,7 +2,7 @@ import fs from 'fs'; import { countTokens } from 'gpt-tokenizer'; import path from 'pathe'; import { z } from 'zod'; -import { IMAGE_EXTENSIONS } from '../constants'; +import { IMAGE_EXTENSIONS, TOOL_NAMES } from '../constants'; import { createTool, type ToolResult } from '../tool'; import { MaxFileReadLengthExceededError, @@ -86,7 +86,7 @@ const MAX_TOKENS = 25000; export function createReadTool(opts: { cwd: string; productName: string }) { const productName = opts.productName.toLowerCase(); return createTool({ - name: 'read', + name: TOOL_NAMES.READ, description: ` Reads a file from the local filesystem. You can access any file directly by using this tool. diff --git a/src/tools/task.test.ts b/src/tools/task.test.ts new file mode 100644 index 00000000..010d0a33 --- /dev/null +++ b/src/tools/task.test.ts @@ -0,0 +1,168 @@ +import { describe, expect, test, vi } from 'vitest'; +import type { AgentManager } from '../agent'; +import type { Context } from '../context'; +import { createTaskTool } from './task'; + +describe('Task Tool', () => { + test('should call agentManager.executeTask with correct parameters', async () => { + const mockAgentManager = { + executeTask: vi.fn().mockResolvedValue({ + status: 'completed', + agentId: 'test-id', + content: 'Task completed', + totalToolCalls: 1, + totalDuration: 100, + usage: { inputTokens: 10, outputTokens: 5 }, + }), + getAgentDescriptions: vi.fn().mockReturnValue('Mock agent descriptions'), + } as unknown as AgentManager; + + const mockContext = { + agentManager: mockAgentManager, + cwd: '/test', + } as unknown as Context; + + const taskTool = createTaskTool({ + context: mockContext, + tools: [], + sessionId: 'test-session-id', + }); + + const result = await taskTool.execute({ + subagent_type: 'Explore', + description: 'Test task', + prompt: 'Test prompt', + }); + + expect(result.isError).toBe(false); + expect(result.llmContent).toContain('completed successfully'); + expect(mockAgentManager.executeTask).toHaveBeenCalledWith( + { + subagent_type: 'Explore', + description: 'Test task', + prompt: 'Test prompt', + }, + expect.objectContaining({ + cwd: '/test', + }), + ); + }); + + test('should handle task failure', async () => { + const mockAgentManager = { + executeTask: vi.fn().mockResolvedValue({ + status: 'failed', + agentId: 'test-id', + content: 'Task failed', + totalToolCalls: 0, + totalDuration: 50, + usage: { inputTokens: 5, outputTokens: 0 }, + }), + getAgentDescriptions: vi.fn().mockReturnValue('Mock agent descriptions'), + } as unknown as AgentManager; + + const mockContext = { + agentManager: mockAgentManager, + cwd: '/test', + } as unknown as Context; + + const taskTool = createTaskTool({ + context: mockContext, + tools: [], + sessionId: 'test-session-id', + }); + + const result = await taskTool.execute({ + subagent_type: 'Explore', + description: 'Test task', + prompt: 'Test prompt', + }); + + expect(result.isError).toBe(true); + expect(result.llmContent).toContain('failed'); + }); + + test('should emit agent_progress completed event on success', async () => { + const mockAgentManager = { + executeTask: vi.fn().mockResolvedValue({ + status: 'completed', + agentId: 'test-id', + content: 'Task completed', + totalToolCalls: 1, + totalDuration: 100, + usage: { inputTokens: 10, outputTokens: 5 }, + }), + getAgentDescriptions: vi.fn().mockReturnValue('Mock agent descriptions'), + } as unknown as AgentManager; + + const mockMessageBus = { + emitEvent: vi.fn(), + }; + + const mockContext = { + agentManager: mockAgentManager, + messageBus: mockMessageBus, + cwd: '/test', + } as unknown as Context; + + const taskTool = createTaskTool({ + context: mockContext, + tools: [], + sessionId: 'test-session-id', + }); + + await taskTool.execute({ + subagent_type: 'Explore', + description: 'Test task', + prompt: 'Test prompt', + }); + + expect(mockMessageBus.emitEvent).toHaveBeenCalledWith( + 'agent_progress', + expect.objectContaining({ + status: 'completed', + agentId: 'test-id', + agentType: 'Explore', + }), + ); + }); + + test('should propagate cancellation signal', async () => { + const mockAgentManager = { + executeTask: vi.fn().mockImplementation((_input, ctx) => { + // Verify signal is passed + expect(ctx.signal).toBeDefined(); + return Promise.resolve({ + status: 'failed', + agentId: 'test-id', + content: 'Operation was canceled', + totalToolCalls: 0, + totalDuration: 50, + usage: { inputTokens: 5, outputTokens: 0 }, + }); + }), + getAgentDescriptions: vi.fn().mockReturnValue('Mock agent descriptions'), + } as unknown as AgentManager; + + const mockContext = { + agentManager: mockAgentManager, + cwd: '/test', + } as unknown as Context; + + const abortController = new AbortController(); + const taskTool = createTaskTool({ + context: mockContext, + tools: [], + sessionId: 'test-session-id', + signal: abortController.signal, + }); + + await taskTool.execute({ + subagent_type: 'Explore', + description: 'Test task', + prompt: 'Test prompt', + }); + + expect(mockAgentManager.executeTask).toHaveBeenCalled(); + }); +}); diff --git a/src/tools/task.ts b/src/tools/task.ts new file mode 100644 index 00000000..d7dc3fee --- /dev/null +++ b/src/tools/task.ts @@ -0,0 +1,217 @@ +import { z } from 'zod'; +import { TOOL_NAMES } from '../constants'; +import type { Context } from '../context'; +import type { NormalizedMessage } from '../message'; +import { createTool, type Tool } from '../tool'; +import { randomUUID } from '../utils/randomUUID'; + +export function createTaskTool(opts: { + context: Context; + tools: Tool[]; + sessionId: string; + signal?: AbortSignal; +}) { + const { signal, sessionId } = opts; + const { cwd, agentManager, messageBus } = opts.context; + const agentDescriptions = agentManager?.getAgentDescriptions(); + + return createTool({ + name: TOOL_NAMES.TASK, + + description: `Launch a new agent to handle complex, multi-step tasks autonomously. +The ${TOOL_NAMES.TASK} tool launches specialized agents (subprocesses) that autonomously handle complex tasks. Each agent type has specific capabilities and tools available to it. +Available agent types and the tools they have access to: + +${agentDescriptions} + +When using the ${TOOL_NAMES.TASK} tool, you must specify a subagent_type parameter to select which agent type to use. + +When NOT to use the Agent tool: +- If you want to read a specific file path, use the ${TOOL_NAMES.READ} or ${TOOL_NAMES.GLOB} tool instead of the Agent tool, to find the match more quickly +- If you are searching for a specific class definition like "class Foo", use the ${TOOL_NAMES.GLOB} tool instead, to find the match more quickly +- If you are searching for code within a specific file or set of 2-3 files, use the ${TOOL_NAMES.READ} tool instead of the Agent tool, to find the match more quickly +- Other tasks that are not related to the agent descriptions above + + +Usage notes: +- Launch multiple agents concurrently whenever possible, to maximize performance; to do that, use a single message with multiple tool uses +- When the agent is done, it will return a single message back to you. The result returned by the agent is not visible to the user. To show the user the result, you should send a text message back to the user with a concise summary of the result. +- Each agent invocation is stateless. You will not be able to send additional messages to the agent, nor will the agent be able to communicate with you outside of its final report. Therefore, your prompt should contain a highly detailed task description for the agent to perform autonomously and you should specify exactly what information the agent should return back to you in its final and only message to you. +- The agent's outputs should generally be trusted +- Clearly tell the agent whether you expect it to write code or just to do research (search, file reads, web fetches, etc.), since it is not aware of the user's intent +- If the agent description mentions that it should be used proactively, then you should try your best to use it without the user having to ask for it first. Use your judgement. +- If the user specifies that they want you to run agents "in parallel", you MUST send a single message with multiple Task tool use content blocks. For example, if you need to launch both a code-reviewer agent and a test-runner agent in parallel, send a single message with both tool calls. + +Example usage: + + +"code-reviewer": use this agent after you are done writing a signficant piece of code +"greeting-responder": use this agent when to respond to user greetings with a friendly joke + + + +user: "Please write a function that checks if a number is prime" +assistant: Sure let me write a function that checks if a number is prime +assistant: First let me use the ${TOOL_NAMES.WRITE} tool to write a function that checks if a number is prime +assistant: I'm going to use the ${TOOL_NAMES.WRITE} tool to write the following code: + +function isPrime(n) { + if (n <= 1) return false + for (let i = 2; i * i <= n; i++) { + if (n % i === 0) return false + } + return true +} + + +Since a signficant piece of code was written and the task was completed, now use the code-reviewer agent to review the code + +assistant: Now let me use the code-reviewer agent to review the code +assistant: Uses the ${TOOL_NAMES.TASK} tool to launch the with the code-reviewer agent + + + +user: "Hello" + +Since the user is greeting, use the greeting-responder agent to respond with a friendly joke + +assistant: "I'm going to use the ${TOOL_NAMES.TASK} tool to launch the with the greeting-responder agent" + + `, + + parameters: z.object({ + description: z + .string() + .describe('A short (3-5 word) description of task'), + + prompt: z.string().describe('The task for the agent to perform'), + + subagent_type: z + .string() + .describe('The type of specialized agent to use for this task'), + + resume: z + .string() + .optional() + .describe( + 'Optional agent ID to resume from. If provided, the agent will continue from the previous execution transcript.', + ), + }), + + execute: async (params) => { + const startTime = Date.now(); + + if (!agentManager) { + return { + llmContent: 'Agent manager not found', + isError: true, + }; + } + + try { + const result = await agentManager.executeTask(params, { + cwd, + signal, + tools: opts.tools, + async onProgress(message: NormalizedMessage, agentId: string) { + try { + if (messageBus) { + await messageBus.emitEvent('agent_progress', { + sessionId, + cwd, + agentId, + agentType: params.subagent_type, + prompt: params.prompt, + message, + status: 'running', + timestamp: Date.now(), + }); + } + } catch (error) { + console.error( + '[createTaskTool] Failed to emit progress event:', + error, + ); + } + }, + // TODO: get forkContextMessages from context + // forkContextMessages: [], + }); + + // Emit completion event to close the UI overlay + if (messageBus) { + await messageBus.emitEvent('agent_progress', { + sessionId, + cwd, + agentId: result.agentId, + agentType: params.subagent_type, + prompt: params.prompt, + message: { + role: 'assistant', + content: + result.status === 'completed' + ? 'Task completed' + : 'Task failed', + type: 'message', + uuid: randomUUID(), + timestamp: new Date().toISOString(), + parentUuid: null, + } as NormalizedMessage, + status: result.status === 'completed' ? 'completed' : 'failed', + timestamp: Date.now(), + }); + } + + const duration = Date.now() - startTime; + + if (result.status === 'completed') { + return { + llmContent: `Sub-agent (${params.subagent_type}) completed successfully: + +${result.content} + +--- +Agent ID: ${result.agentId} +Tool Calls: ${result.totalToolCalls} +Duration: ${duration}ms +Tokens: ${result.usage.inputTokens} input, ${result.usage.outputTokens} output`, + isError: false, + metadata: { + agentId: result.agentId, + agentType: params.subagent_type, + }, + }; + } + return { + llmContent: `Sub-agent (${params.subagent_type}) failed: + +${result.content} + +--- +Agent ID: ${result.agentId} +Duration: ${duration}ms`, + isError: true, + metadata: { + agentId: result.agentId, + agentType: params.subagent_type, + }, + }; + } catch (error) { + return { + llmContent: `Failed to execute sub-agent: ${error instanceof Error ? error.message : String(error)}`, + isError: true, + }; + } + }, + + approval: { + category: 'command', + needsApproval: async (context) => { + if (context.approvalMode === 'yolo') { + return false; + } + return true; + }, + }, + }); +} diff --git a/src/tools/write.ts b/src/tools/write.ts index 8b634e35..ee5e69c1 100644 --- a/src/tools/write.ts +++ b/src/tools/write.ts @@ -1,11 +1,12 @@ import fs from 'fs'; import path from 'pathe'; import { z } from 'zod'; +import { TOOL_NAMES } from '../constants'; import { createTool } from '../tool'; export function createWriteTool(opts: { cwd: string }) { return createTool({ - name: 'write', + name: TOOL_NAMES.WRITE, description: 'Write a file to the local filesystem', parameters: z.object({ file_path: z.string(), diff --git a/src/ui/AgentProgressOverlay.tsx b/src/ui/AgentProgressOverlay.tsx new file mode 100644 index 00000000..63640e78 --- /dev/null +++ b/src/ui/AgentProgressOverlay.tsx @@ -0,0 +1,476 @@ +import { Box, Text, useInput } from 'ink'; +import React, { useMemo, useState } from 'react'; +import type { + AssistantMessage, + NormalizedMessage, + TextPart, + ToolResultPart2, + ToolUsePart, +} from '../message'; +import { UI_COLORS } from './constants'; +import { GradientText } from './GradientText'; +import { useAppStore } from './store'; +import { useTextGradientAnimation } from './useTextGradientAnimation'; + +/** + * Container component that displays all running SubAgents + * Completely independent from Messages, similar to BackgroundPrompt + */ +export function AgentProgressOverlay() { + const agentProgressMap = useAppStore((state) => state.agentProgressMap); + const [expanded, setExpanded] = useState(false); + + useInput((input, key) => { + if (key.ctrl && input === 'o') { + setExpanded((prev) => !prev); + } + }); + + // Filter running agents and sort by start time + const runningAgents = useMemo(() => { + return Object.values(agentProgressMap) + .filter((progress) => progress.status === 'running') + .sort((a, b) => a.startTime - b.startTime); + }, [agentProgressMap]); + + // Don't render anything if no agents are running + if (runningAgents.length === 0) return null; + + return ( + + {/* Progress list */} + + {runningAgents.map((agent, index) => { + return ( + + + + ); + })} + + + ); +} + +interface AgentProgressItemProps { + agentId: string; + agentType?: string; + isLast: boolean; + expanded: boolean; +} + +/** + * Component to display real-time progress of a single SubAgent + * Shows a boxed progress with stats and expandable message history + */ +export function AgentProgressItem({ + agentId, + agentType, + isLast, + expanded, +}: AgentProgressItemProps) { + const progress = useAppStore((state) => state.agentProgressMap[agentId]); + + // Calculate statistics + const stats = useMemo(() => { + if (!progress || progress.messages.length === 0) { + return { toolCalls: 0, tokens: 0 }; + } + + const toolCalls = progress.messages.filter((msg) => { + if (msg.role === 'assistant' && Array.isArray(msg.content)) { + return msg.content.some((p) => p.type === 'tool_use'); + } + return false; + }).length; + + const tokens = progress.messages.reduce((sum, msg) => { + if (msg.role === 'assistant' && 'usage' in msg) { + const usage = (msg as AssistantMessage).usage; + return sum + (usage?.input_tokens || 0) + (usage?.output_tokens || 0); + } + return sum; + }, 0); + + return { toolCalls, tokens }; + }, [progress]); + + const taskDescription = useMemo(() => { + if (!progress) return ''; + + if (progress.prompt) { + const firstLine = progress.prompt.split('\n')[0]; + return firstLine.length > 30 ? `${firstLine.slice(0, 30)}...` : firstLine; + } + + return ''; + }, [progress]); + + const latestActivity = useMemo(() => { + if (!progress || progress.messages.length === 0) return null; + const msgs = progress.messages; + + // Search backwards for the last tool use + for (let i = msgs.length - 1; i >= 0; i--) { + const msg = msgs[i]; + if (msg.role === 'assistant' && Array.isArray(msg.content)) { + const toolUse = msg.content.find( + (p) => p.type === 'tool_use', + ) as ToolUsePart; + if (toolUse) { + // Check if there is a result for this tool use + const resultMsg = msgs.find( + (m, index) => + index > i && + m.role === 'tool' && + Array.isArray(m.content) && + m.content.some( + (p) => p.type === 'tool-result' && p.toolCallId === toolUse.id, + ), + ); + + let resultPart: ToolResultPart2 | undefined; + if (resultMsg && Array.isArray(resultMsg.content)) { + resultPart = resultMsg.content.find( + (p) => p.type === 'tool-result' && p.toolCallId === toolUse.id, + ) as ToolResultPart2; + } + + return { toolUse, result: resultPart }; + } + } + } + return null; + }, [progress]); + + const progressText = useMemo(() => { + if (taskDescription) { + return taskDescription; + } + const typeLabel = agentType + ? `${agentType.charAt(0).toUpperCase() + agentType.slice(1)}` + : 'Agent'; + return typeLabel; + }, [agentType, taskDescription]); + + const highlightIndex = useTextGradientAnimation( + progressText, + progress !== undefined, + ); + + if (!progress) { + return ( + + Initializing... + + ); + } + + // Approximation of hidden tool uses (assuming 2 messages per tool use pair) + const hiddenToolUses = Math.max(0, stats.toolCalls - 1); + + return ( + + {/* Header Line */} + + + + + {/* Content box */} + + {/* Expanded View */} + {expanded ? ( + + {progress.messages.map((msg, idx) => ( + + ))} + + + (ctrl+o to collapse) + + + + ) : ( + /* Collapsed View */ + + {latestActivity ? ( + + {/* Tool Use Line */} + + + + {formatToolUse(latestActivity.toolUse)} + + + {/* Tool Result Line (if exists) */} + {latestActivity.result ? ( + + + {formatToolResult(latestActivity.result)} + + + ) : ( + // Show "Running..." if no result yet + + + Running... + + + )} + + ) : ( + + └ Starting... + + )} + + {/* Footer / More info */} + + + {hiddenToolUses > 0 ? `+${hiddenToolUses} more tool uses ` : ''} + (ctrl+o to expand) + + + + )} + + + ); +} + +export function formatToolUse(toolUse: ToolUsePart): string { + const name = toolUse.name; + const args = toolUse.input; + const capitalize = (s: string) => s.charAt(0).toUpperCase() + s.slice(1); + + if (name === 'read') { + const paths = + args.paths || + (args.file_path ? [args.file_path] : args.path ? [args.path] : []); + const pathStr = paths.length > 0 ? paths.join(', ') : ''; + const displayPath = + pathStr.length > 40 ? `...${pathStr.slice(-37)}` : pathStr; + return `Read(${displayPath})`; + } + + if (name === 'bash') { + const cmd = args.command || ''; + const displayCmd = cmd.length > 40 ? `${cmd.slice(0, 37)}...` : cmd; + return `Bash(${displayCmd})`; + } + + if (name === 'grep') { + return `Grep(${args.pattern || ''})`; + } + + if (name === 'glob') { + return `Glob(${args.pattern || ''})`; + } + + if (name === 'ls') { + return `Ls(${args.path || ''})`; + } + + if (name === 'fetch') { + return `Fetch(${args.url || ''})`; + } + + if (name === 'write') { + return `Write(${args.path || ''})`; + } + + if (name === 'edit') { + return `Edit(${args.path || ''})`; + } + + if (name === 'task') { + return `Task(${args.subagent_type || ''})`; + } + + return `${capitalize(name)}(...)`; +} + +function formatToolResult(result: ToolResultPart2): string { + if (result.result.isError) { + const errorMsg = + typeof result.result.llmContent === 'string' + ? result.result.llmContent + : 'Error'; + const cleanError = errorMsg.split('\n')[0] || 'Error'; // Take first line of error + return cleanError.length > 50 + ? `Failed: ${cleanError.slice(0, 50)}...` + : `Failed: ${cleanError}`; + } + + let content = ''; + const rawContent = result.result.llmContent; + if (typeof rawContent === 'string') { + content = rawContent; + } else if (Array.isArray(rawContent)) { + content = rawContent + .filter((p) => p.type === 'text') + .map((p) => (p as TextPart).text) + .join(''); + } + + const lines = content.split('\n'); + const lineCount = lines.length; + + if (lineCount > 1) { + return `Result(${lineCount} lines)`; // Generic for multiline output + } + + if (content.length > 50) { + return `${content.slice(0, 50)}...`; + } + + return content || 'Done'; +} + +/** + * Render a single nested agent message + */ +export function NestedAgentMessage({ + message, + messages, + index, +}: { + message: NormalizedMessage; + messages?: NormalizedMessage[]; + index?: number; + isLast?: boolean; +}) { + // Common left border style + // const border = ; + + if (message.role === 'user') { + const content = + typeof message.content === 'string' + ? message.content + : JSON.stringify(message.content); + return ( + + + User:{' '} + + + {content.length > 60 ? `${content.slice(0, 60)}...` : content} + + + ); + } + + if (message.role === 'assistant') { + const assistantMsg = message as AssistantMessage; + + // Text response + if (typeof assistantMsg.content === 'string') { + return ( + + + {assistantMsg.content.length > 60 + ? `${assistantMsg.content.slice(0, 60)}...` + : assistantMsg.content} + + + ); + } + + // Tool calls + if (Array.isArray(assistantMsg.content)) { + const toolUses = assistantMsg.content.filter( + (p) => p.type === 'tool_use', + ); + const textParts = assistantMsg.content.filter((p) => p.type === 'text'); + + return ( + + {textParts.map((part, idx) => { + if ('text' in part) { + const text = part.text.trim(); + if (!text) return null; + return ( + // biome-ignore lint/suspicious/noArrayIndexKey: text parts have no unique id + + + {text.length > 60 ? `${text.slice(0, 60)}...` : text} + + + ); + } + return null; + })} + {toolUses.map((toolUse) => { + if ('name' in toolUse) { + // Find result in subsequent messages + let resultText = ''; + if (messages && index !== undefined) { + const resultMsg = messages.find( + (m, i) => + i > index && + m.role === 'tool' && + Array.isArray(m.content) && + m.content.some( + (p) => + p.type === 'tool-result' && + p.toolCallId === (toolUse as ToolUsePart).id, + ), + ); + + if (resultMsg && Array.isArray(resultMsg.content)) { + const resultPart = resultMsg.content.find( + (p) => + p.type === 'tool-result' && + p.toolCallId === (toolUse as ToolUsePart).id, + ) as ToolResultPart2; + if (resultPart) { + resultText = formatToolResult(resultPart); + } + } + } + + return ( + + + + {formatToolUse(toolUse as ToolUsePart)} + + {resultText && ( + + {' '} + · {resultText} + + )} + + ); + } + return null; + })} + + ); + } + } + + // Tool messages are handled within the assistant message + if (message.role === 'tool') { + return null; + } + + return null; +} diff --git a/src/ui/App.tsx b/src/ui/App.tsx index 85c00999..10fbeb1f 100644 --- a/src/ui/App.tsx +++ b/src/ui/App.tsx @@ -2,6 +2,7 @@ import { Box, Text } from 'ink'; import SelectInput from 'ink-select-input'; import React, { useCallback } from 'react'; import { ActivityIndicator } from './ActivityIndicator'; +import { AgentProgressOverlay } from './AgentProgressOverlay'; import { ApprovalModal } from './ApprovalModal'; import { BackgroundPrompt } from './BackgroundPrompt'; import { ChatInput } from './ChatInput'; @@ -110,6 +111,7 @@ export function App() { key={`${forceRerender}-${forkParentUuid}-${forkCounter}`} > + diff --git a/src/ui/Messages.test.ts b/src/ui/Messages.test.ts index 6cf2f60a..7633505b 100644 --- a/src/ui/Messages.test.ts +++ b/src/ui/Messages.test.ts @@ -29,7 +29,7 @@ describe('splitMessages', () => { expect(result.pendingMessages).toHaveLength(0); }); - test('should put all in completed when all tools have results', () => { + test('should keep last tool group in pending even if completed', () => { const messages = [ createMockMessage( 'assistant', @@ -51,8 +51,8 @@ describe('splitMessages', () => { ), ]; const result = splitMessages(messages); - expect(result.completedMessages).toHaveLength(2); - expect(result.pendingMessages).toHaveLength(0); + expect(result.completedMessages).toHaveLength(0); + expect(result.pendingMessages).toHaveLength(2); }); test('should split when tools are pending', () => { @@ -140,8 +140,8 @@ describe('splitMessages', () => { ), ]; const result = splitMessages(messages); - expect(result.completedMessages).toHaveLength(4); - expect(result.pendingMessages).toHaveLength(0); + expect(result.completedMessages).toHaveLength(2); + expect(result.pendingMessages).toHaveLength(2); }); test('should split at last pending tool_use group', () => { diff --git a/src/ui/Messages.tsx b/src/ui/Messages.tsx index 366bfb7f..6489f5c9 100644 --- a/src/ui/Messages.tsx +++ b/src/ui/Messages.tsx @@ -1,6 +1,6 @@ -import { Box, Static, Text } from 'ink'; +import { Box, Static, Text, useInput } from 'ink'; import pc from 'picocolors'; -import React, { useMemo } from 'react'; +import React, { useMemo, useState } from 'react'; import type { AssistantMessage, NormalizedMessage, @@ -9,6 +9,7 @@ import type { ToolMessage, ToolMessage2, ToolResultPart, + ToolResultPart2, ToolUsePart, UserMessage, } from '../message'; @@ -145,14 +146,12 @@ export function splitMessages(messages: NormalizedMessage[]): { // 5. Check if all tools are completed const allToolsCompleted = toolUseIds.every((id) => toolResults.has(id)); - if (allToolsCompleted) { - return { completedMessages: messages, pendingMessages: [] }; - } else { - return { - completedMessages: messages.slice(0, lastToolUseIndex), - pendingMessages: messages.slice(lastToolUseIndex), - }; - } + // Always keep the last tool interaction sequence dynamic to allow interaction (e.g. expanding sub-agent details) + // Even if completed, we want to allow users to toggle expansion with ctrl+o + return { + completedMessages: messages.slice(0, lastToolUseIndex), + pendingMessages: messages.slice(lastToolUseIndex), + }; } export function pairToolsWithResults( @@ -433,9 +432,140 @@ function AssistantText({ ); } +function SubAgentToolResult({ toolResult }: { toolResult: ToolResultPart }) { + const { input, result } = toolResult; + const prompt = input['prompt'] || input['description']; + const [expanded, setExpanded] = useState(false); + + useInput((input, key) => { + if (key.ctrl && input === 'o') { + setExpanded((prev) => !prev); + } + }); + + // Extract content from llmContent + let response = result.llmContent; + const stats = { toolCalls: 0, tokens: '0' }; + let isFailed = result.isError; + let isCanceled = false; + + if (typeof response === 'string') { + if (response.includes('Operation was canceled')) { + isFailed = true; + isCanceled = true; + } + + // Try to extract clean content from the formatted string in task.ts + // Format: Sub-agent (...) completed successfully:\n\n${content}\n\n--- + const successMatch = response.match( + /Sub-agent \(.*\) completed successfully:\n\n([\s\S]*?)\n\n---/, + ); + const failMatch = response.match( + /Sub-agent \(.*\) failed:\n\n([\s\S]*?)\n\n---/, + ); + + if (successMatch) { + response = successMatch[1]; + } else if (failMatch) { + response = failMatch[1]; + isFailed = true; + } + + // Extract stats + if (typeof result.llmContent === 'string') { + const toolCallsMatch = result.llmContent.match(/Tool Calls: (\d+)/); + const tokensMatch = result.llmContent.match( + /Tokens: (\d+) input, (\d+) output/, + ); + + if (toolCallsMatch) { + stats.toolCalls = parseInt(toolCallsMatch[1], 10); + } + if (tokensMatch) { + const tokens = + parseInt(tokensMatch[1], 10) + parseInt(tokensMatch[2], 10); + stats.tokens = + tokens > 1000 ? `${(tokens / 1000).toFixed(1)}k` : `${tokens}`; + } + } + } else if (Array.isArray(response)) { + response = response + .map((p) => (p.type === 'text' ? p.text : '[Image]')) + .join(''); + } + + const statusText = isCanceled ? 'Canceled ' : isFailed ? 'Failed ' : 'Done '; + const statusColor = isFailed || isCanceled ? UI_COLORS.ERROR : 'white'; + + const header = ( + + + {statusText} + + {' '} + ({stats.toolCalls} tool uses · {stats.tokens} tokens) (ctrl+o to{' '} + {expanded ? 'collapse' : 'expand'}) + + + ); + + return ( + + {header} + {expanded && ( + + + + + Prompt: + + + + + {typeof prompt === 'string' ? prompt : JSON.stringify(prompt)} + + + + + + + Response: + + + + + {typeof response === 'string' + ? response + : JSON.stringify(response)} + + + + )} + + ); +} + function ToolUse({ part }: { part: ToolUsePart }) { - const { name, displayName } = part; + const { name, displayName, input } = part; const description = part.description; + + if (name === 'task' && input) { + const subagentType = input['subagent_type']; + + if (subagentType && typeof subagentType === 'string') { + const capitalizedType = + subagentType.charAt(0).toUpperCase() + subagentType.slice(1); + + return ( + + + Task({capitalizedType}) + + + ); + } + } + return ( @@ -449,6 +579,22 @@ function ToolUse({ part }: { part: ToolUsePart }) { } function ToolPair({ pair }: { pair: ToolPair }) { + if (pair.toolUse.name === 'task') { + return ( + + {/* Render ToolUse */} + + + {/* Render ToolResult if available */} + {pair.toolResult && ( + + + + )} + + ); + } + return ( {/* Render ToolUse */} @@ -570,6 +716,7 @@ function Thinking({ text }: { text: string }) { function ToolResultItem({ part }: { part: ToolResultPart }) { const { result, input } = part; + if (result.isError) { let text = result.returnDisplay || result.llmContent; if (typeof text !== 'string') { diff --git a/src/ui/agent-progress.test.ts b/src/ui/agent-progress.test.ts new file mode 100644 index 00000000..f7f65774 --- /dev/null +++ b/src/ui/agent-progress.test.ts @@ -0,0 +1,205 @@ +#!/usr/bin/env node + +/** + * End-to-End Test for SubAgent Progress Display + * + * This script tests the complete data flow: + * 1. SubAgent execution produces messages + * 2. Messages are sent through MessageBus + * 3. UI Store receives and updates agentProgressMap + * 4. AgentProgress component displays the progress + * + * Usage: npm test -- src/ui/agent-progress.test.ts + */ + +import { describe, expect, it } from 'vitest'; + +describe('SubAgent Progress Display - E2E', () => { + it('should have AgentProgressOverlay component exported', async () => { + const { AgentProgressOverlay } = await import('./AgentProgressOverlay'); + + expect(AgentProgressOverlay).toBeDefined(); + expect(typeof AgentProgressOverlay).toBe('function'); + }); + + it('should have store with agentProgressMap', async () => { + const { useAppStore } = await import('./store'); + + const store = useAppStore.getState(); + + expect(store.agentProgressMap).toBeDefined(); + expect(typeof store.agentProgressMap).toBe('object'); + expect(typeof store.updateAgentProgress).toBe('function'); + expect(typeof store.clearAgentProgress).toBe('function'); + }); + + it('should update agent progress correctly', async () => { + const { useAppStore } = await import('./store'); + + const testAgentId = 'test-agent-123'; + const testMessage: any = { + role: 'assistant' as const, + content: 'Test message', + type: 'message' as const, + timestamp: new Date().toISOString(), + uuid: 'test-uuid', + parentUuid: null, + }; + + // Update progress + useAppStore.getState().updateAgentProgress({ + agentId: testAgentId, + message: testMessage, + status: 'running', + }); + + // Verify update + const progress = useAppStore.getState().agentProgressMap[testAgentId]; + + expect(progress).toBeDefined(); + expect(progress.agentId).toBe(testAgentId); + expect(progress.messages).toHaveLength(1); + expect(progress.messages[0]).toEqual(testMessage); + expect(progress.lastUpdate).toBeDefined(); + + // Clean up + useAppStore.getState().clearAgentProgress(testAgentId); + }); + + it('should clear agent progress correctly', async () => { + const { useAppStore } = await import('./store'); + + const testAgentId = 'test-agent-456'; + const testMessage: any = { + role: 'assistant' as const, + content: 'Test message', + type: 'message' as const, + timestamp: new Date().toISOString(), + uuid: 'test-uuid-2', + parentUuid: null, + }; + + // Add progress + useAppStore.getState().updateAgentProgress({ + agentId: testAgentId, + message: testMessage, + status: 'running', + }); + + expect(useAppStore.getState().agentProgressMap[testAgentId]).toBeDefined(); + + // Clear progress + useAppStore.getState().clearAgentProgress(testAgentId); + + expect( + useAppStore.getState().agentProgressMap[testAgentId], + ).toBeUndefined(); + }); + + it('should accumulate multiple messages for same agent', async () => { + const { useAppStore } = await import('./store'); + + const testAgentId = 'test-agent-789'; + + const messages: any[] = [ + { + role: 'assistant' as const, + content: 'Message 1', + type: 'message' as const, + timestamp: new Date().toISOString(), + uuid: 'uuid-1', + parentUuid: null, + }, + { + role: 'assistant' as const, + content: 'Message 2', + type: 'message' as const, + timestamp: new Date().toISOString(), + uuid: 'uuid-2', + parentUuid: null, + }, + { + role: 'assistant' as const, + content: 'Message 3', + type: 'message' as const, + timestamp: new Date().toISOString(), + uuid: 'uuid-3', + parentUuid: null, + }, + ]; + + // Add messages sequentially + for (const message of messages) { + useAppStore.getState().updateAgentProgress({ + agentId: testAgentId, + message, + status: 'running', + }); + } + + // Verify all messages are accumulated + const progress = useAppStore.getState().agentProgressMap[testAgentId]; + + expect(progress).toBeDefined(); + expect(progress.messages).toHaveLength(3); + expect(progress.messages[0].content).toBe('Message 1'); + expect(progress.messages[1].content).toBe('Message 2'); + expect(progress.messages[2].content).toBe('Message 3'); + + // Clean up + useAppStore.getState().clearAgentProgress(testAgentId); + }); + + it('should handle multiple agents independently', async () => { + const { useAppStore } = await import('./store'); + + const agent1Id = 'agent-1'; + const agent2Id = 'agent-2'; + + const message1: any = { + role: 'assistant' as const, + content: 'Agent 1 message', + type: 'message' as const, + timestamp: new Date().toISOString(), + uuid: 'uuid-agent1', + parentUuid: null, + }; + + const message2: any = { + role: 'assistant' as const, + content: 'Agent 2 message', + type: 'message' as const, + timestamp: new Date().toISOString(), + uuid: 'uuid-agent2', + parentUuid: null, + }; + + // Update both agents + useAppStore.getState().updateAgentProgress({ + agentId: agent1Id, + message: message1, + status: 'running', + }); + + useAppStore.getState().updateAgentProgress({ + agentId: agent2Id, + message: message2, + status: 'running', + }); + + // Verify both are stored independently + const progress1 = useAppStore.getState().agentProgressMap[agent1Id]; + const progress2 = useAppStore.getState().agentProgressMap[agent2Id]; + + expect(progress1).toBeDefined(); + expect(progress2).toBeDefined(); + expect(progress1.messages[0].content).toBe('Agent 1 message'); + expect(progress2.messages[0].content).toBe('Agent 2 message'); + + // Clean up + useAppStore.getState().clearAgentProgress(agent1Id); + useAppStore.getState().clearAgentProgress(agent2Id); + }); +}); + +console.log('✅ All E2E tests defined. Run with: npm test'); diff --git a/src/ui/store.ts b/src/ui/store.ts index 3139467c..5be084fd 100644 --- a/src/ui/store.ts +++ b/src/ui/store.ts @@ -144,6 +144,20 @@ interface AppState { bashBackgroundPrompt: BashPromptBackgroundEvent | null; thinking: ThinkingConfig | undefined; + + // Agent progress tracking + agentProgressMap: Record< + string, + { + status: 'running' | 'completed' | 'failed'; + agentId: string; + agentType?: string; + prompt?: string; + messages: NormalizedMessage[]; + lastUpdate: number; + startTime: number; + } + >; } type InitializeOpts = { @@ -216,6 +230,16 @@ interface AppActions { setBashBackgroundPrompt: (prompt: BashPromptBackgroundEvent) => void; clearBashBackgroundPrompt: () => void; toggleThinking: () => void; + + // Agent progress actions + updateAgentProgress: (data: { + agentId: string; + agentType?: string; + prompt?: string; + message: NormalizedMessage; + status: 'running' | 'completed' | 'failed'; + }) => void; + clearAgentProgress: (agentId: string) => void; } export type AppStore = AppState & AppActions; @@ -274,6 +298,9 @@ export const useAppStore = create()( bashBackgroundPrompt: null, + // Agent progress state + agentProgressMap: {}, + // Actions initialize: async (opts) => { const { bridge } = opts; @@ -355,6 +382,19 @@ export const useAppStore = create()( set({ retryInfo: null }); } }); + + // Listen for SubAgent progress events + bridge.onEvent('agent_progress', (data) => { + const { agentId, agentType, prompt, message, status } = data; + get().updateAgentProgress({ + agentId, + agentType, + prompt, + message, + status: status || 'running', + }); + }); + setImmediate(async () => { if (opts.initialPrompt) { get().send(opts.initialPrompt); @@ -765,6 +805,7 @@ export const useAppStore = create()( if (!isExecuting(status)) { return; } + await bridge.request('session.cancel', { cwd, sessionId, @@ -775,6 +816,7 @@ export const useAppStore = create()( processingTokens: 0, retryInfo: null, bashBackgroundPrompt: null, + agentProgressMap: {}, }); }, @@ -1148,6 +1190,41 @@ export const useAppStore = create()( } set({ thinking: next }); }, + + // Agent progress methods + updateAgentProgress: (data) => { + const { agentId, agentType, prompt, message, status } = data; + const { agentProgressMap } = get(); + + const existing = agentProgressMap[agentId]; + + // Limit messages to last 100 to prevent memory issues + const MAX_MESSAGES = 100; + const newMessages = existing + ? [...existing.messages, message].slice(-MAX_MESSAGES) + : [message]; + + set({ + agentProgressMap: { + ...agentProgressMap, + [agentId]: { + status, + agentId, + agentType, + prompt: prompt || existing?.prompt, + messages: newMessages, + lastUpdate: Date.now(), + startTime: existing?.startTime || Date.now(), + }, + }, + }); + }, + clearAgentProgress: (agentId) => { + const { agentProgressMap } = get(); + const newMap = { ...agentProgressMap }; + delete newMap[agentId]; + set({ agentProgressMap: newMap }); + }, }), { name: 'app-store' }, ),