From 2f941ee4b2a6b7827a09fa5af8d86b9d297bb743 Mon Sep 17 00:00:00 2001 From: Dean Sharon Date: Wed, 1 Apr 2026 15:58:24 +0300 Subject: [PATCH 01/11] feat: agent baseUrl & model passthrough (task-2026-04-01_baseurl-model) - Add baseUrl and model to AgentConfig; saveAgentConfig accepts 'apiKey'|'baseUrl'|'model' - Add AGENT_BASE_URL_ENV mapping providers to their env var names - BaseAgentAdapter: resolveBaseUrl() injects env var from config when user hasn't set it; resolveModel(taskModel?) applies per-task then falls back to config default - ClaudeAdapter: overrides resolveBaseUrl() to also inject CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS=1 when a baseUrl is active - Codex/GeminiAdapter: buildArgs(prompt, model?) threads --model flag - ProcessSpawner/adapter: spawn() accepts optional model param (interface compliance) - EventDrivenWorkerPool: passes task.model to spawn() - TaskManager: preserves model on retry and resume - Database migration v16: adds model TEXT column to tasks table - TaskRepository: persist/restore model field; Zod schema + row type updated - Domain: model field on Task, TaskRequest, PipelineStepRequest, PipelineCreateRequest, ScheduleCreateRequest, ScheduledPipelineCreateRequest, LoopCreateRequest, OrchestratorCreateRequest; propagated in createTask, createLoop, createOrchestration factories - MCP: DelegateTask/ScheduleTask/CreatePipeline/SchedulePipeline/CreateLoop/ScheduleLoop schemas gain model field; ConfigureAgent gains baseUrl+model set/check; ListAgents exposes baseUrl/model; handleDelegateTask wires model to TaskRequest; all handlers thread model through - CLI: beat run --model/-m flag; agents config set accepts baseUrl/model keys; agents config show displays baseUrl and model; shell history warning only for apiKey - MCP instructions: document model resolution order and ConfigureAgent baseUrl/model usage --- src/adapters/mcp-adapter.ts | 132 ++++++++-- src/adapters/mcp-instructions.ts | 21 ++ src/cli.ts | 12 + src/cli/commands/agents.ts | 38 ++- src/cli/commands/run.ts | 3 + src/core/agents.ts | 19 +- src/core/configuration.ts | 24 +- src/core/domain.ts | 19 ++ src/core/interfaces.ts | 7 +- src/implementations/base-agent-adapter.ts | 47 +++- src/implementations/claude-adapter.ts | 27 +- src/implementations/codex-adapter.ts | 5 +- src/implementations/database.ts | 7 + .../event-driven-worker-pool.ts | 4 +- src/implementations/gemini-adapter.ts | 5 +- .../process-spawner-adapter.ts | 9 +- src/implementations/process-spawner.ts | 3 +- src/implementations/task-repository.ts | 19 +- src/services/handlers/schedule-handler.ts | 1 + src/services/schedule-manager.ts | 2 + src/services/task-manager.ts | 2 + tests/fixtures/test-data.ts | 1 + tests/unit/core/domain.test.ts | 18 ++ .../implementations/agent-adapters.test.ts | 231 ++++++++++++++++++ .../unit/implementations/agent-config.test.ts | 75 ++++++ .../event-driven-worker-pool.test.ts | 4 +- .../implementations/task-repository.test.ts | 68 ++++++ 27 files changed, 750 insertions(+), 53 deletions(-) diff --git a/src/adapters/mcp-adapter.ts b/src/adapters/mcp-adapter.ts index 7b0beff5..bb8911a2 100644 --- a/src/adapters/mcp-adapter.ts +++ b/src/adapters/mcp-adapter.ts @@ -62,6 +62,7 @@ const DelegateTaskSchema = z.object({ .enum(AGENT_PROVIDERS_TUPLE) .optional() .describe('AI agent to execute the task (uses configured default if omitted)'), + model: z.string().min(1).max(200).optional().describe('Model override for this task (overrides agent-config default)'), }); const TaskStatusSchema = z.object({ @@ -107,6 +108,7 @@ const ScheduleTaskSchema = z.object({ .enum(AGENT_PROVIDERS_TUPLE) .optional() .describe('AI agent to execute the task (uses configured default if omitted)'), + model: z.string().min(1).max(200).optional().describe('Model override for this task (overrides agent-config default)'), }); const ListSchedulesSchema = z.object({ @@ -147,6 +149,7 @@ const CreatePipelineSchema = z.object({ priority: z.enum(['P0', 'P1', 'P2']).optional().describe('Priority override for this step'), workingDirectory: z.string().optional().describe('Working directory override (absolute path)'), agent: z.enum(AGENT_PROVIDERS_TUPLE).optional().describe('Agent override for this step'), + model: z.string().min(1).max(200).optional().describe('Model override for this step'), }), ) .min(2, 'Pipeline requires at least 2 steps') @@ -164,6 +167,12 @@ const CreatePipelineSchema = z.object({ .enum(AGENT_PROVIDERS_TUPLE) .optional() .describe('Default agent for all steps (individual steps can override)'), + model: z + .string() + .min(1) + .max(200) + .optional() + .describe('Default model for all steps (individual steps can override)'), }); const SchedulePipelineSchema = z.object({ @@ -174,6 +183,7 @@ const SchedulePipelineSchema = z.object({ priority: z.enum(['P0', 'P1', 'P2']).optional().describe('Priority override for this step'), workingDirectory: z.string().optional().describe('Working directory override (absolute path)'), agent: z.enum(AGENT_PROVIDERS_TUPLE).optional().describe('Agent override for this step'), + model: z.string().min(1).max(200).optional().describe('Model override for this step'), }), ) .min(2, 'Pipeline requires at least 2 steps') @@ -196,6 +206,12 @@ const SchedulePipelineSchema = z.object({ .enum(AGENT_PROVIDERS_TUPLE) .optional() .describe('Default agent for all steps (individual steps can override)'), + model: z + .string() + .min(1) + .max(200) + .optional() + .describe('Default model for all steps (individual steps can override)'), }); // Orchestrator-related Zod schemas (v0.9.0 Orchestrator Mode) @@ -228,8 +244,10 @@ const ConfigureAgentSchema = z.object({ action: z .enum(['set', 'check', 'reset']) .default('check') - .describe('Action: set API key, check auth status, or reset stored key'), - apiKey: z.string().min(1).optional().describe('API key to store (required for set action)'), + .describe('Action: set config values, check auth status, or reset all stored config'), + apiKey: z.string().min(1).optional().describe('API key to store (set action)'), + baseUrl: z.string().url().optional().describe('Base URL override (set action, e.g. https://proxy.example.com/v1)'), + model: z.string().min(1).max(200).optional().describe('Default model override for this agent (set action)'), }); // Loop-related Zod schemas (v0.7.0 Task/Pipeline Loops) @@ -278,6 +296,7 @@ const CreateLoopSchema = z.object({ .describe('Pipeline step prompts (creates pipeline loop)'), priority: z.enum(['P0', 'P1', 'P2']).optional().describe('Task priority'), agent: z.enum(AGENT_PROVIDERS_TUPLE).optional().describe('Agent provider'), + model: z.string().min(1).max(200).optional().describe('Model override for each iteration task (overrides agent-config default)'), gitBranch: z.string().optional().describe('Git branch name for loop iteration work'), }); @@ -339,6 +358,7 @@ const ScheduleLoopSchema = z.object({ gitBranch: z.string().optional().describe('Git branch name for loop iteration work'), priority: z.enum(['P0', 'P1', 'P2']).optional().describe('Task priority'), agent: z.enum(AGENT_PROVIDERS_TUPLE).optional().describe('Agent provider'), + model: z.string().min(1).max(200).optional().describe('Model override for each iteration task (overrides agent-config default)'), // Schedule fields scheduleType: z.enum(['cron', 'one_time']).describe('Schedule type'), cronExpression: z.string().optional().describe('Cron expression (5-field) for recurring loops'), @@ -572,6 +592,12 @@ export class MCPAdapter { enum: [...AGENT_PROVIDERS], description: `AI agent to execute the task (${this.config.defaultAgent ? `default: ${this.config.defaultAgent}` : 'required if no default configured'})`, }, + model: { + type: 'string', + description: 'Model override for this task (overrides agent-config default)', + minLength: 1, + maxLength: 200, + }, }, required: ['prompt'], }, @@ -726,6 +752,10 @@ export class MCPAdapter { enum: [...AGENT_PROVIDERS], description: `AI agent to execute the task (${this.config.defaultAgent ? `default: ${this.config.defaultAgent}` : 'required if no default configured'})`, }, + model: { + type: 'string', + description: 'Model override for this task (overrides agent-config default)', + }, }, required: ['prompt', 'scheduleType'], }, @@ -854,6 +884,10 @@ export class MCPAdapter { enum: [...AGENT_PROVIDERS], description: 'Agent override for this step', }, + model: { + type: 'string', + description: 'Model override for this step', + }, }, required: ['prompt'], }, @@ -874,6 +908,10 @@ export class MCPAdapter { enum: [...AGENT_PROVIDERS], description: 'Default agent for all steps (individual steps can override)', }, + model: { + type: 'string', + description: 'Default model for all steps (individual steps can override)', + }, }, required: ['steps'], }, @@ -912,6 +950,10 @@ export class MCPAdapter { enum: [...AGENT_PROVIDERS], description: 'Agent override for this step', }, + model: { + type: 'string', + description: 'Model override for this step', + }, }, required: ['prompt'], }, @@ -965,6 +1007,10 @@ export class MCPAdapter { enum: [...AGENT_PROVIDERS], description: 'Default agent for all steps (individual steps can override)', }, + model: { + type: 'string', + description: 'Default model for all steps (individual steps can override)', + }, }, required: ['steps', 'scheduleType'], }, @@ -1055,6 +1101,10 @@ export class MCPAdapter { enum: [...AGENT_PROVIDERS], description: `AI agent to execute iterations (${this.config.defaultAgent ? `default: ${this.config.defaultAgent}` : 'required if no default configured'})`, }, + model: { + type: 'string', + description: 'Model override for each iteration task (overrides agent-config default)', + }, gitBranch: { type: 'string', description: 'Git branch name for loop iteration work (v0.8.0)', @@ -1203,6 +1253,7 @@ export class MCPAdapter { gitBranch: { type: 'string', description: 'Git branch name for loop iteration work' }, priority: { type: 'string', enum: ['P0', 'P1', 'P2'] }, agent: { type: 'string', enum: [...AGENT_PROVIDERS] }, + model: { type: 'string', description: 'Model override for each iteration task (overrides agent-config default)' }, scheduleType: { type: 'string', enum: ['cron', 'one_time'] }, cronExpression: { type: 'string', description: 'Cron expression (5-field)' }, scheduledAt: { type: 'string', description: 'ISO 8601 datetime for one-time loops' }, @@ -1326,7 +1377,7 @@ export class MCPAdapter { }, { name: 'ConfigureAgent', - description: 'Check auth status, store API key, or reset stored key for an agent', + description: 'Check auth status, store API key/baseUrl/model, or reset stored config for an agent', inputSchema: { type: 'object', properties: { @@ -1342,7 +1393,15 @@ export class MCPAdapter { }, apiKey: { type: 'string', - description: 'API key to store (required for set action)', + description: 'API key to store (set action)', + }, + baseUrl: { + type: 'string', + description: 'Base URL override for the agent API (set action, e.g. https://proxy.example.com/v1)', + }, + model: { + type: 'string', + description: 'Default model for this agent (set action, overridden by per-task model)', }, }, required: ['agent'], @@ -1400,6 +1459,7 @@ export class MCPAdapter { dependsOn: data.dependsOn ? data.dependsOn.map(TaskId) : undefined, continueFrom: data.continueFrom ? TaskId(data.continueFrom) : undefined, agent: data.agent as AgentProvider | undefined, + model: data.model, }; // Delegate task using our new architecture @@ -1490,6 +1550,7 @@ export class MCPAdapter { exitCode: task.exitCode, workingDirectory: task.workingDirectory, agent: task.agent ?? 'unknown', + ...(task.model && { model: task.model }), }), }, ], @@ -1735,6 +1796,7 @@ export class MCPAdapter { expiresAt: data.expiresAt, afterScheduleId: data.afterSchedule ? ScheduleId(data.afterSchedule) : undefined, agent: data.agent as AgentProvider | undefined, + model: data.model, }; const result = await this.scheduleService.createSchedule(request); @@ -2076,6 +2138,7 @@ export class MCPAdapter { priority: s.priority as Priority | undefined, workingDirectory: s.workingDirectory, agent: (s.agent ?? data.agent) as AgentProvider | undefined, + model: s.model ?? data.model, })), priority: data.priority as Priority | undefined, workingDirectory: data.workingDirectory, @@ -2133,6 +2196,7 @@ export class MCPAdapter { priority: s.priority as Priority | undefined, workingDirectory: s.workingDirectory, agent: s.agent as AgentProvider | undefined, + model: s.model ?? data.model, })), scheduleType: data.scheduleType === 'cron' ? ScheduleType.CRON : ScheduleType.ONE_TIME, cronExpression: data.cronExpression, @@ -2145,6 +2209,7 @@ export class MCPAdapter { expiresAt: data.expiresAt, afterScheduleId: data.afterSchedule ? ScheduleId(data.afterSchedule) : undefined, agent: data.agent as AgentProvider | undefined, + model: data.model, }; const result = await this.scheduleService.createScheduledPipeline(request); @@ -2224,6 +2289,7 @@ export class MCPAdapter { pipelineSteps: data.pipelineSteps, priority: data.priority as Priority | undefined, agent: data.agent as AgentProvider | undefined, + model: data.model, gitBranch: data.gitBranch, }; @@ -2556,6 +2622,7 @@ export class MCPAdapter { gitBranch: data.gitBranch, priority: data.priority as Priority | undefined, agent: data.agent as AgentProvider | undefined, + model: data.model, }; const request: ScheduledLoopCreateRequest = { @@ -2620,6 +2687,8 @@ export class MCPAdapter { authStatus: authStatus.ready ? 'ready' : 'not-configured', authMethod: authStatus.method, ...(authStatus.hint && { hint: authStatus.hint }), + ...(agentConfig.baseUrl && { baseUrl: agentConfig.baseUrl }), + ...(agentConfig.model && { model: agentConfig.model }), }; }); @@ -2866,7 +2935,7 @@ export class MCPAdapter { }; } - const { agent, action, apiKey } = parseResult.data; + const { agent, action, apiKey, baseUrl, model } = parseResult.data; switch (action) { case 'check': { @@ -2881,6 +2950,8 @@ export class MCPAdapter { success: true, ...status, ...(agentConfig.apiKey && { storedKey: maskApiKey(agentConfig.apiKey) }), + ...(agentConfig.baseUrl && { baseUrl: agentConfig.baseUrl }), + ...(agentConfig.model && { model: agentConfig.model }), }, null, 2, @@ -2891,30 +2962,63 @@ export class MCPAdapter { } case 'set': { - if (!apiKey) { + if (!apiKey && !baseUrl && !model) { return { content: [ { type: 'text', - text: JSON.stringify({ success: false, error: 'apiKey is required for set action' }, null, 2), + text: JSON.stringify( + { success: false, error: 'At least one of apiKey, baseUrl, or model is required for set action' }, + null, + 2, + ), }, ], isError: true, }; } - const result = saveAgentConfig(agent, 'apiKey', apiKey); - if (!result.ok) { - return { - content: [{ type: 'text', text: JSON.stringify({ success: false, error: result.error }, null, 2) }], - isError: true, - }; + + const messages: string[] = []; + + if (apiKey) { + const result = saveAgentConfig(agent, 'apiKey', apiKey); + if (!result.ok) { + return { + content: [{ type: 'text', text: JSON.stringify({ success: false, error: result.error }, null, 2) }], + isError: true, + }; + } + messages.push(`API key stored (${maskApiKey(apiKey)})`); } + + if (baseUrl !== undefined) { + const result = saveAgentConfig(agent, 'baseUrl', baseUrl); + if (!result.ok) { + return { + content: [{ type: 'text', text: JSON.stringify({ success: false, error: result.error }, null, 2) }], + isError: true, + }; + } + messages.push(baseUrl ? `baseUrl set to ${baseUrl}` : 'baseUrl cleared'); + } + + if (model !== undefined) { + const result = saveAgentConfig(agent, 'model', model); + if (!result.ok) { + return { + content: [{ type: 'text', text: JSON.stringify({ success: false, error: result.error }, null, 2) }], + isError: true, + }; + } + messages.push(model ? `model set to ${model}` : 'model cleared'); + } + return { content: [ { type: 'text', text: JSON.stringify( - { success: true, message: `API key stored for ${agent} (${maskApiKey(apiKey)})` }, + { success: true, message: `${agent}: ${messages.join(', ')}` }, null, 2, ), diff --git a/src/adapters/mcp-instructions.ts b/src/adapters/mcp-instructions.ts index 6675d3e2..19e9774f 100644 --- a/src/adapters/mcp-instructions.ts +++ b/src/adapters/mcp-instructions.ts @@ -68,6 +68,27 @@ The orchestrator manages its own task graph — you just provide the goal and gu - RetryTask to re-run a task from scratch - CancelTask / CancelLoop / CancelOrchestrator to stop work that's going wrong +## Agent & Model Configuration + +### Per-task model override +All task-creating tools accept an optional \`model\` field to override the agent's default model: +- DelegateTask with model: "claude-opus-4-5" → uses that model for this task only +- CreatePipeline steps can each have their own model, or set a top-level default +- CreateLoop with model: "gemini-2.0-flash" → each iteration uses that model + +### Agent defaults (ConfigureAgent) +Use ConfigureAgent to configure per-agent defaults that apply when no per-task override is set: +- action: "set", apiKey: "sk-..." → store API key +- action: "set", baseUrl: "https://proxy.example.com/v1" → route requests through a proxy +- action: "set", model: "claude-opus-4-5" → default model for all tasks using that agent +- action: "check" → see current auth status and stored config (baseUrl/model shown when set) +- action: "reset" → clear all stored config for the agent + +Model resolution order (highest priority wins): +1. Per-task \`model\` field (DelegateTask, pipeline step, etc.) +2. Agent config default (\`ConfigureAgent\` set model) +3. Agent's built-in default + ## Key Principles 1. **Parallelize when possible**: Independent tasks should run concurrently. Only use dependsOn when ordering matters. diff --git a/src/cli.ts b/src/cli.ts index 82d15116..e9247652 100644 --- a/src/cli.ts +++ b/src/cli.ts @@ -76,6 +76,7 @@ if (mainCommand === 'mcp') { timeout?: number; maxOutputBuffer?: number; agent?: string; + model?: string; } = {}; let promptWords: string[] = []; @@ -165,6 +166,15 @@ if (mainCommand === 'mcp') { ui.error(`--agent requires an agent name (${AGENT_PROVIDERS.join(', ')})`); process.exit(1); } + } else if (arg === '--model' || arg === '-m') { + const next = foregroundArgs[i + 1]; + if (next && !next.startsWith('-')) { + options.model = next; + i++; + } else { + ui.error('--model requires a model name (e.g. claude-opus-4-5)'); + process.exit(1); + } } else if (arg.startsWith('-')) { ui.error(`Unknown flag: ${arg}`); process.exit(1); @@ -183,6 +193,7 @@ if (mainCommand === 'mcp') { ' -p, --priority P0|P1|P2 Task priority (P0=critical, P1=high, P2=normal)', ' -w, --working-directory DIR Working directory for task execution', ' -a, --agent AGENT AI agent to use (claude, codex, gemini)', + ' -m, --model MODEL Model override (e.g. claude-opus-4-5)', ' -t, --timeout MS Task timeout in milliseconds', ' --max-output-buffer BYTES Maximum output buffer size', '', @@ -190,6 +201,7 @@ if (mainCommand === 'mcp') { ' beat run "refactor auth" # Fire-and-forget (default)', ' beat run "quick fix" --foreground # Stream output, wait', ' beat run "analyze code" --agent codex # Use Codex instead of Claude', + ' beat run "analyze code" --model claude-opus-4-5 # Use specific model', '', ].join('\n'), ); diff --git a/src/cli/commands/agents.ts b/src/cli/commands/agents.ts index f9768291..14c84a3e 100644 --- a/src/cli/commands/agents.ts +++ b/src/cli/commands/agents.ts @@ -94,7 +94,7 @@ export async function agentsConfigSet( value: string | undefined, ): Promise { if (!agent || !key || !value) { - ui.error('Usage: beat agents config set apiKey '); + ui.error('Usage: beat agents config set '); process.exit(1); } @@ -103,15 +103,18 @@ export async function agentsConfigSet( process.exit(1); } - if (key !== 'apiKey') { - ui.error(`Unknown config key: "${key}". Valid keys: apiKey`); + if (key !== 'apiKey' && key !== 'baseUrl' && key !== 'model') { + ui.error(`Unknown config key: "${key}". Valid keys: apiKey, baseUrl, model`); process.exit(1); } - ui.note( - 'API keys passed as CLI arguments may be stored in shell history. Consider using an environment variable instead.', - 'Warning', - ); + // Shell history warning only for apiKey (not baseUrl/model which are not secrets) + if (key === 'apiKey') { + ui.note( + 'API keys passed as CLI arguments may be stored in shell history. Consider using an environment variable instead.', + 'Warning', + ); + } const result = saveAgentConfig(agent, key, value); if (!result.ok) { @@ -119,7 +122,11 @@ export async function agentsConfigSet( process.exit(1); } - ui.success(`${agent}.${key} saved (${maskApiKey(value)})`); + if (key === 'apiKey') { + ui.success(`${agent}.${key} saved (${maskApiKey(value)})`); + } else { + ui.success(`${agent}.${key} saved: ${value}`); + } process.exit(0); } @@ -139,10 +146,21 @@ export async function agentsConfigShow(agent?: string): Promise { const config = loadAgentConfig(p); const auth = AGENT_AUTH[p]; + const parts: string[] = []; if (config.apiKey) { - lines.push(`${p.padEnd(10)} apiKey: ${maskApiKey(config.apiKey)} (env var: ${auth.envVars[0]})`); + parts.push(`apiKey: ${maskApiKey(config.apiKey)} (env var: ${auth.envVars[0]})`); + } + if (config.baseUrl) { + parts.push(`baseUrl: ${config.baseUrl}`); + } + if (config.model) { + parts.push(`model: ${config.model}`); + } + + if (parts.length > 0) { + lines.push(`${p.padEnd(10)} ${parts.join(' | ')}`); } else { - lines.push(`${p.padEnd(10)} ${ui.dim('(no stored key)')}`); + lines.push(`${p.padEnd(10)} ${ui.dim('(no stored config)')}`); } } diff --git a/src/cli/commands/run.ts b/src/cli/commands/run.ts index add41612..c710ee91 100644 --- a/src/cli/commands/run.ts +++ b/src/cli/commands/run.ts @@ -133,6 +133,7 @@ export async function runTask( timeout?: number; maxOutputBuffer?: number; agent?: string; + model?: string; }, ): Promise { let container: Container | undefined; @@ -167,6 +168,7 @@ export async function runTask( if (options.priority) params.push(`Priority: ${options.priority}`); if (options.workingDirectory) params.push(`Dir: ${options.workingDirectory}`); if (options.agent) params.push(`Agent: ${options.agent}`); + if (options.model) params.push(`Model: ${options.model}`); if (options.dependsOn && options.dependsOn.length > 0) params.push(`Deps: ${options.dependsOn.join(', ')}`); if (options.continueFrom) params.push(`Continue from: ${options.continueFrom}`); if (options.timeout) params.push(`Timeout: ${ui.formatMs(options.timeout)}`); @@ -181,6 +183,7 @@ export async function runTask( dependsOn: options?.dependsOn?.map((id: string) => TaskId(id)), continueFrom: options?.continueFrom ? TaskId(options.continueFrom) : undefined, agent: options?.agent as AgentProvider | undefined, + model: options?.model, }; const result = await taskManager.delegate(request); diff --git a/src/core/agents.ts b/src/core/agents.ts index eae5bc74..d3687795 100644 --- a/src/core/agents.ts +++ b/src/core/agents.ts @@ -76,6 +76,17 @@ export const AGENT_DESCRIPTIONS: Readonly> = Objec gemini: 'Gemini CLI (Google)', }); +/** + * Base URL environment variable names per agent provider + * Used by BaseAgentAdapter.resolveBaseUrl() to check user env before config + * Single source of truth — infrastructure-level override + */ +export const AGENT_BASE_URL_ENV: Readonly> = Object.freeze({ + claude: 'ANTHROPIC_BASE_URL', + codex: 'OPENAI_BASE_URL', + gemini: 'GEMINI_BASE_URL', +}); + /** * Auth requirements per agent provider — single source of truth * @@ -224,9 +235,15 @@ export interface AgentAdapter { * @param prompt - The task prompt to execute * @param workingDirectory - Directory to run in * @param taskId - Optional task ID for identification + * @param model - Optional model override (per-task model overrides agent config model) * @returns Process handle with PID, or error */ - spawn(prompt: string, workingDirectory: string, taskId?: string): Result<{ process: ChildProcess; pid: number }>; + spawn( + prompt: string, + workingDirectory: string, + taskId?: string, + model?: string, + ): Result<{ process: ChildProcess; pid: number }>; /** * Kill an agent process by PID diff --git a/src/core/configuration.ts b/src/core/configuration.ts index 5b11b4b4..731ff391 100644 --- a/src/core/configuration.ts +++ b/src/core/configuration.ts @@ -257,6 +257,8 @@ export function resetConfigValue(key: string): ConfigWriteResult { export interface AgentConfig { readonly apiKey?: string; + readonly baseUrl?: string; + readonly model?: string; } /** @@ -271,13 +273,23 @@ export function loadAgentConfig(provider: AgentProvider): AgentConfig { const record = section as Record; return { apiKey: typeof record.apiKey === 'string' ? record.apiKey : undefined, + baseUrl: typeof record.baseUrl === 'string' ? record.baseUrl : undefined, + model: typeof record.model === 'string' ? record.model : undefined, }; } /** * Save a key-value pair under the `agents.` section of config.json + * + * Edge cases: + * - Empty string: deletes the key instead of saving it + * - baseUrl: strips trailing slash before saving */ -export function saveAgentConfig(provider: AgentProvider, key: 'apiKey', value: string): ConfigWriteResult { +export function saveAgentConfig( + provider: AgentProvider, + key: 'apiKey' | 'baseUrl' | 'model', + value: string, +): ConfigWriteResult { const existing = loadConfigFile(); const agents = ( existing.agents && typeof existing.agents === 'object' && !Array.isArray(existing.agents) ? existing.agents : {} @@ -286,7 +298,15 @@ export function saveAgentConfig(provider: AgentProvider, key: 'apiKey', value: s agents[provider] && typeof agents[provider] === 'object' && !Array.isArray(agents[provider]) ? agents[provider] : {} ) as Record; - section[key] = value; + if (value === '') { + // Empty string clears the key + delete section[key]; + } else { + // Normalize baseUrl: strip trailing slash + const normalized = key === 'baseUrl' ? value.replace(/\/$/, '') : value; + section[key] = normalized; + } + agents[provider] = section; existing.agents = agents; return writeConfigFile(existing); diff --git a/src/core/domain.ts b/src/core/domain.ts index 830bd8c9..bc64efc4 100644 --- a/src/core/domain.ts +++ b/src/core/domain.ts @@ -104,6 +104,10 @@ export interface Task { // Resolved by TaskManager: explicit task agent > config defaultAgent > error readonly agent?: AgentProvider; + // Model override (per-task): overrides agent-config default model and CLI default + // Resolution order: per-task > agent-config > CLI default + readonly model?: string; + // Timestamps and results readonly createdAt: number; readonly updatedAt?: number; @@ -180,6 +184,9 @@ export interface TaskRequest { // Multi-agent support (v0.5.0): Which agent provider to use // Resolved by TaskManager: explicit task agent > config defaultAgent > error readonly agent?: AgentProvider; + + // Model override (per-task): overrides agent-config default model and CLI default + readonly model?: string; } export interface TaskUpdate { @@ -233,6 +240,9 @@ export const createTask = (request: TaskRequest): Task => { // Multi-agent support (v0.5.0) agent: request.agent, + // Per-task model override + model: request.model, + createdAt: now, updatedAt: now, }); @@ -389,6 +399,7 @@ export interface ScheduleCreateRequest { readonly expiresAt?: string; // ISO 8601 string (parsed by service) readonly afterScheduleId?: ScheduleId; // Chain: block until after-schedule's latest task completes readonly agent?: AgentProvider; // Multi-agent support (v0.5.0) + readonly model?: string; // Per-schedule model override } /** @@ -400,6 +411,7 @@ export interface PipelineStepRequest { readonly priority?: Priority; readonly workingDirectory?: string; readonly agent?: AgentProvider; // Multi-agent support (v0.5.0) + readonly model?: string; // Per-step model override } export interface PipelineCreateRequest { @@ -407,6 +419,7 @@ export interface PipelineCreateRequest { readonly priority?: Priority; // shared default for all steps readonly workingDirectory?: string; // shared default for all steps readonly agent?: AgentProvider; // shared default for all steps + readonly model?: string; // shared default model for all steps } /** @@ -427,6 +440,7 @@ export interface ScheduledPipelineCreateRequest { readonly expiresAt?: string; // ISO 8601 string (parsed by service) readonly afterScheduleId?: ScheduleId; readonly agent?: AgentProvider; // shared default for all steps + readonly model?: string; // shared default model for all steps } /** @@ -608,6 +622,7 @@ export interface LoopCreateRequest { readonly pipelineSteps?: readonly string[]; readonly priority?: Priority; readonly agent?: AgentProvider; + readonly model?: string; // Per-loop model override (applied to taskTemplate) readonly gitBranch?: string; // Branch name for loop iteration work (v0.8.0) } @@ -626,6 +641,7 @@ export const createLoop = (request: LoopCreateRequest, workingDirectory: string, priority: request.priority, workingDirectory, agent: request.agent, + model: request.model, }, pipelineSteps: request.pipelineSteps, exitCondition: request.exitCondition ?? '', @@ -697,6 +713,7 @@ export interface Orchestration { readonly stateFilePath: string; readonly workingDirectory: string; readonly agent?: AgentProvider; + readonly model?: string; readonly maxDepth: number; readonly maxWorkers: number; readonly maxIterations: number; @@ -714,6 +731,7 @@ export interface OrchestratorCreateRequest { readonly goal: string; readonly workingDirectory?: string; readonly agent?: AgentProvider; + readonly model?: string; readonly maxDepth?: number; readonly maxWorkers?: number; readonly maxIterations?: number; @@ -737,6 +755,7 @@ export const createOrchestration = ( stateFilePath, workingDirectory, agent: request.agent, + model: request.model, maxDepth: request.maxDepth ?? 3, maxWorkers: request.maxWorkers ?? 5, maxIterations: request.maxIterations ?? 50, diff --git a/src/core/interfaces.ts b/src/core/interfaces.ts index b069665d..abf9e1aa 100644 --- a/src/core/interfaces.ts +++ b/src/core/interfaces.ts @@ -55,7 +55,12 @@ export interface TaskQueue { * Process spawning abstraction */ export interface ProcessSpawner { - spawn(prompt: string, workingDirectory: string, taskId?: string): Result<{ process: ChildProcess; pid: number }>; + spawn( + prompt: string, + workingDirectory: string, + taskId?: string, + model?: string, + ): Result<{ process: ChildProcess; pid: number }>; kill(pid: number): Result; } diff --git a/src/implementations/base-agent-adapter.ts b/src/implementations/base-agent-adapter.ts index 46449861..bde6e576 100644 --- a/src/implementations/base-agent-adapter.ts +++ b/src/implementations/base-agent-adapter.ts @@ -13,7 +13,7 @@ */ import { ChildProcess, spawn } from 'child_process'; -import { AGENT_AUTH, AgentAdapter, AgentAuthConfig, AgentProvider, isCommandInPath } from '../core/agents.js'; +import { AGENT_AUTH, AGENT_BASE_URL_ENV, AgentAdapter, AgentAuthConfig, AgentProvider, isCommandInPath } from '../core/agents.js'; import { Configuration, loadAgentConfig } from '../core/configuration.js'; import { AutobeatError, agentMisconfigured, ErrorCode, processSpawnFailed } from '../core/errors.js'; import { err, ok, Result, tryCatch } from '../core/result.js'; @@ -28,8 +28,8 @@ export abstract class BaseAgentAdapter implements AgentAdapter { protected readonly command: string, ) {} - /** Build CLI args for the given prompt */ - protected abstract buildArgs(prompt: string): readonly string[]; + /** Build CLI args for the given prompt and optional model override */ + protected abstract buildArgs(prompt: string, model?: string): readonly string[]; /** Env var prefixes to strip before spawning (prevents nesting issues) */ protected abstract get envPrefixesToStrip(): readonly string[]; @@ -88,7 +88,41 @@ export abstract class BaseAgentAdapter implements AgentAdapter { return {}; } - spawn(prompt: string, workingDirectory: string, taskId?: string): Result<{ process: ChildProcess; pid: number }> { + /** + * Resolve base URL env var to inject into spawn env. + * Resolution order: user env (already in cleanEnv, takes precedence) → config file. + * Returns env var name → value to inject. Empty object means nothing to inject. + */ + protected resolveBaseUrl(): Record { + const baseUrlEnvVar = AGENT_BASE_URL_ENV[this.provider]; + // If user already has it set in their env, don't inject (cleanEnv will carry it through) + if (process.env[baseUrlEnvVar]) { + return {}; + } + // Check config file + const agentConfig = loadAgentConfig(this.provider); + if (agentConfig.baseUrl) { + return { [baseUrlEnvVar]: agentConfig.baseUrl }; + } + return {}; + } + + /** + * Resolve the model to use for this spawn. + * Resolution order: per-task model → agent-config model → undefined (use CLI default). + */ + protected resolveModel(taskModel?: string): string | undefined { + if (taskModel) return taskModel; + const agentConfig = loadAgentConfig(this.provider); + return agentConfig.model; + } + + spawn( + prompt: string, + workingDirectory: string, + taskId?: string, + model?: string, + ): Result<{ process: ChildProcess; pid: number }> { try { // Pre-spawn: verify CLI binary exists before anything else if (!isCommandInPath(this.command)) { @@ -104,8 +138,9 @@ export abstract class BaseAgentAdapter implements AgentAdapter { const authResult = this.resolveAuth(); if (!authResult.ok) return authResult; + const resolvedModel = this.resolveModel(model); const finalPrompt = this.transformPrompt(prompt); - const args = this.buildArgs(finalPrompt); + const args = this.buildArgs(finalPrompt, resolvedModel); const exactMatches = this.envExactMatchesToStrip; const cleanEnv = Object.fromEntries( @@ -113,10 +148,12 @@ export abstract class BaseAgentAdapter implements AgentAdapter { ([key]) => !this.envPrefixesToStrip.some((prefix) => key.startsWith(prefix)) && !exactMatches.includes(key), ), ); + const baseUrlEnv = this.resolveBaseUrl(); const env = { ...this.additionalEnv, ...cleanEnv, ...authResult.value.injectedEnv, + ...baseUrlEnv, AUTOBEAT_WORKER: 'true', ...(taskId && { AUTOBEAT_TASK_ID: taskId }), }; diff --git a/src/implementations/claude-adapter.ts b/src/implementations/claude-adapter.ts index 0de8e65b..1eb0ac8b 100644 --- a/src/implementations/claude-adapter.ts +++ b/src/implementations/claude-adapter.ts @@ -19,8 +19,9 @@ export class ClaudeAdapter extends BaseAgentAdapter { this.baseArgs = Object.freeze(['--print', '--dangerously-skip-permissions', '--output-format', 'json']); } - protected buildArgs(prompt: string): readonly string[] { - return [...this.baseArgs, '--', prompt]; + protected buildArgs(prompt: string, model?: string): readonly string[] { + const modelArgs: string[] = model ? ['--model', model] : []; + return [...this.baseArgs, ...modelArgs, '--', prompt]; } protected get envPrefixesToStrip(): readonly string[] { @@ -32,4 +33,26 @@ export class ClaudeAdapter extends BaseAgentAdapter { // Exact match for CLAUDECODE — avoids over-stripping CLAUDECODE_SESSION etc. return ['CLAUDECODE']; } + + /** + * Override resolveBaseUrl to also inject CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS=1 + * when a baseUrl is active (prevents proxy failures from experimental beta headers). + * + * CLAUDE_CODE_ vars are stripped from process.env before spawn, so injecting here + * (after stripping) ensures the value reaches the child process. + * The parent env ANTHROPIC_BASE_URL is preserved via cleanEnv if already set. + */ + protected resolveBaseUrl(): Record { + const baseUrlEnv = super.resolveBaseUrl(); + + // Determine if a baseUrl is active (from config or user env) + const hasBaseUrl = Object.keys(baseUrlEnv).length > 0 || Boolean(process.env.ANTHROPIC_BASE_URL); + + if (hasBaseUrl) { + // Auto-disable experimental betas to prevent proxy failures + return { ...baseUrlEnv, CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS: '1' }; + } + + return baseUrlEnv; + } } diff --git a/src/implementations/codex-adapter.ts b/src/implementations/codex-adapter.ts index c114d05c..170e6275 100644 --- a/src/implementations/codex-adapter.ts +++ b/src/implementations/codex-adapter.ts @@ -16,8 +16,9 @@ export class CodexAdapter extends BaseAgentAdapter { super(config, codexCommand); } - protected buildArgs(prompt: string): readonly string[] { - return ['--quiet', '--full-auto', '--', prompt]; + protected buildArgs(prompt: string, model?: string): readonly string[] { + const modelArgs: string[] = model ? ['--model', model] : []; + return ['--quiet', '--full-auto', ...modelArgs, '--', prompt]; } protected get envPrefixesToStrip(): readonly string[] { diff --git a/src/implementations/database.ts b/src/implementations/database.ts index 1575dc17..d9e543fc 100644 --- a/src/implementations/database.ts +++ b/src/implementations/database.ts @@ -753,6 +753,13 @@ export class Database implements TransactionRunner { db.exec('ALTER TABLE loop_iterations ADD COLUMN eval_feedback TEXT'); }, }, + { + version: 16, + description: 'Add model column to tasks for per-task model override', + up: (db) => { + db.exec('ALTER TABLE tasks ADD COLUMN model TEXT'); + }, + }, ]; } diff --git a/src/implementations/event-driven-worker-pool.ts b/src/implementations/event-driven-worker-pool.ts index c646a687..c707a136 100644 --- a/src/implementations/event-driven-worker-pool.ts +++ b/src/implementations/event-driven-worker-pool.ts @@ -104,8 +104,8 @@ export class EventDrivenWorkerPool implements WorkerPool { const adapter = adapterResult.value; const finalWorkingDirectory = task.workingDirectory || process.cwd(); - // Spawn the process using the resolved adapter - const spawnResult = adapter.spawn(task.prompt, finalWorkingDirectory, task.id); + // Spawn the process using the resolved adapter (pass model for per-task override) + const spawnResult = adapter.spawn(task.prompt, finalWorkingDirectory, task.id, task.model); if (!spawnResult.ok) { return err(spawnResult.error); diff --git a/src/implementations/gemini-adapter.ts b/src/implementations/gemini-adapter.ts index 174ef612..5682513a 100644 --- a/src/implementations/gemini-adapter.ts +++ b/src/implementations/gemini-adapter.ts @@ -16,8 +16,9 @@ export class GeminiAdapter extends BaseAgentAdapter { super(config, geminiCommand); } - protected buildArgs(prompt: string): readonly string[] { - return ['--yolo', '--prompt', prompt]; + protected buildArgs(prompt: string, model?: string): readonly string[] { + const modelArgs: string[] = model ? ['--model', model] : []; + return ['--yolo', ...modelArgs, '--prompt', prompt]; } protected get additionalEnv(): Record { diff --git a/src/implementations/process-spawner-adapter.ts b/src/implementations/process-spawner-adapter.ts index 59a7bcdd..68609e9a 100644 --- a/src/implementations/process-spawner-adapter.ts +++ b/src/implementations/process-spawner-adapter.ts @@ -23,8 +23,13 @@ export class ProcessSpawnerAdapter implements AgentAdapter { this.provider = provider; } - spawn(prompt: string, workingDirectory: string, taskId?: string): Result<{ process: ChildProcess; pid: number }> { - return this.spawner.spawn(prompt, workingDirectory, taskId); + spawn( + prompt: string, + workingDirectory: string, + taskId?: string, + model?: string, + ): Result<{ process: ChildProcess; pid: number }> { + return this.spawner.spawn(prompt, workingDirectory, taskId, model); } kill(pid: number): Result { diff --git a/src/implementations/process-spawner.ts b/src/implementations/process-spawner.ts index c11b1f71..572e135e 100644 --- a/src/implementations/process-spawner.ts +++ b/src/implementations/process-spawner.ts @@ -21,7 +21,8 @@ export class ClaudeProcessSpawner implements ProcessSpawner { this.baseArgs = Object.freeze(['--print', '--dangerously-skip-permissions', '--output-format', 'json']); } - spawn(prompt: string, workingDirectory: string, taskId?: string): Result<{ process: ChildProcess; pid: number }> { + // eslint-disable-next-line @typescript-eslint/no-unused-vars + spawn(prompt: string, workingDirectory: string, taskId?: string, _model?: string): Result<{ process: ChildProcess; pid: number }> { try { // Make prompt more explicit if it looks like a simple command let finalPrompt = prompt; diff --git a/src/implementations/task-repository.ts b/src/implementations/task-repository.ts index bc59c4f4..5d7a42f7 100644 --- a/src/implementations/task-repository.ts +++ b/src/implementations/task-repository.ts @@ -35,6 +35,7 @@ const TaskRowSchema = z.object({ dependencies: z.string().nullable(), continue_from: z.string().nullable(), agent: z.enum(AGENT_PROVIDERS_TUPLE).nullable(), + model: z.string().nullable(), }); /** @@ -60,6 +61,7 @@ interface TaskRow { readonly dependencies: string | null; readonly continue_from: string | null; readonly agent: string | null; + readonly model: string | null; } export class SQLiteTaskRepository implements TaskRepository, SyncTaskOperations { @@ -86,12 +88,12 @@ export class SQLiteTaskRepository implements TaskRepository, SyncTaskOperations id, prompt, status, priority, working_directory, timeout, max_output_buffer, created_at, started_at, completed_at, worker_id, exit_code, dependencies, - parent_task_id, retry_count, retry_of, continue_from, agent + parent_task_id, retry_count, retry_of, continue_from, agent, model ) VALUES ( @id, @prompt, @status, @priority, @workingDirectory, @timeout, @maxOutputBuffer, @createdAt, @startedAt, @completedAt, @workerId, @exitCode, @dependencies, - @parentTaskId, @retryCount, @retryOf, @continueFrom, @agent + @parentTaskId, @retryCount, @retryOf, @continueFrom, @agent, @model ) `); @@ -114,7 +116,8 @@ export class SQLiteTaskRepository implements TaskRepository, SyncTaskOperations retry_count = @retryCount, retry_of = @retryOf, continue_from = @continueFrom, - agent = @agent + agent = @agent, + model = @model WHERE id = @id `); @@ -122,7 +125,7 @@ export class SQLiteTaskRepository implements TaskRepository, SyncTaskOperations SELECT id, prompt, status, priority, working_directory, timeout, max_output_buffer, parent_task_id, retry_count, retry_of, created_at, started_at, completed_at, worker_id, exit_code, - dependencies, continue_from, agent + dependencies, continue_from, agent, model FROM tasks WHERE id = ? `); @@ -130,7 +133,7 @@ export class SQLiteTaskRepository implements TaskRepository, SyncTaskOperations SELECT id, prompt, status, priority, working_directory, timeout, max_output_buffer, parent_task_id, retry_count, retry_of, created_at, started_at, completed_at, worker_id, exit_code, - dependencies, continue_from, agent + dependencies, continue_from, agent, model FROM tasks ORDER BY created_at DESC `); @@ -138,7 +141,7 @@ export class SQLiteTaskRepository implements TaskRepository, SyncTaskOperations SELECT id, prompt, status, priority, working_directory, timeout, max_output_buffer, parent_task_id, retry_count, retry_of, created_at, started_at, completed_at, worker_id, exit_code, - dependencies, continue_from, agent + dependencies, continue_from, agent, model FROM tasks WHERE status = ? ORDER BY created_at DESC `); @@ -150,7 +153,7 @@ export class SQLiteTaskRepository implements TaskRepository, SyncTaskOperations SELECT id, prompt, status, priority, working_directory, timeout, max_output_buffer, parent_task_id, retry_count, retry_of, created_at, started_at, completed_at, worker_id, exit_code, - dependencies, continue_from, agent + dependencies, continue_from, agent, model FROM tasks ORDER BY created_at DESC LIMIT ? OFFSET ? `); @@ -190,6 +193,7 @@ export class SQLiteTaskRepository implements TaskRepository, SyncTaskOperations retryOf: task.retryOf || null, continueFrom: task.continueFrom || null, agent: task.agent || null, + model: task.model || null, }; } @@ -334,6 +338,7 @@ export class SQLiteTaskRepository implements TaskRepository, SyncTaskOperations retryOf: data.retry_of ? (data.retry_of as TaskId) : undefined, continueFrom: data.continue_from ? (data.continue_from as TaskId) : undefined, agent: data.agent ?? undefined, + model: data.model ?? undefined, createdAt: data.created_at, startedAt: data.started_at || undefined, completedAt: data.completed_at || undefined, diff --git a/src/services/handlers/schedule-handler.ts b/src/services/handlers/schedule-handler.ts index 14519583..c52d7018 100644 --- a/src/services/handlers/schedule-handler.ts +++ b/src/services/handlers/schedule-handler.ts @@ -397,6 +397,7 @@ export class ScheduleHandler extends BaseEventHandler { priority: step.priority ?? defaults.priority, workingDirectory: step.workingDirectory ?? defaults.workingDirectory, agent: step.agent ?? defaults.agent, + model: step.model ?? defaults.model, dependsOn: dependsOn.length > 0 ? dependsOn : undefined, }), ); diff --git a/src/services/schedule-manager.ts b/src/services/schedule-manager.ts index ed57058a..c2f0d1f4 100644 --- a/src/services/schedule-manager.ts +++ b/src/services/schedule-manager.ts @@ -71,6 +71,7 @@ export class ScheduleManagerService implements ScheduleService { priority: request.priority, workingDirectory: validatedWorkingDirectory, agent: agentResult.value, + model: request.model, }, scheduleType: request.scheduleType, cronExpression: request.cronExpression, @@ -504,6 +505,7 @@ export class ScheduleManagerService implements ScheduleService { priority: request.loopConfig.priority, workingDirectory: request.loopConfig.workingDirectory, agent: request.loopConfig.agent, + model: request.loopConfig.model, }, scheduleType: request.scheduleType, cronExpression: request.cronExpression, diff --git a/src/services/task-manager.ts b/src/services/task-manager.ts index ed2d530b..cae0abca 100644 --- a/src/services/task-manager.ts +++ b/src/services/task-manager.ts @@ -283,6 +283,7 @@ export class TaskManagerService implements TaskManager { retryCount, retryOf: taskId, agent: originalTask.agent, + model: originalTask.model, }; // Create the new retry task @@ -388,6 +389,7 @@ export class TaskManagerService implements TaskManager { retryCount, retryOf: taskId, agent: originalTask.agent, + model: originalTask.model, }; const newTask = createTask(resumeRequest); diff --git a/tests/fixtures/test-data.ts b/tests/fixtures/test-data.ts index 8446104c..9ecd8c95 100644 --- a/tests/fixtures/test-data.ts +++ b/tests/fixtures/test-data.ts @@ -12,6 +12,7 @@ export const createTestTask = (overrides?: Partial): Task => ({ timeout: 300000, maxOutputBuffer: 10485760, agent: 'claude', + model: undefined, ...overrides, }); diff --git a/tests/unit/core/domain.test.ts b/tests/unit/core/domain.test.ts index 17f8e201..399cd06c 100644 --- a/tests/unit/core/domain.test.ts +++ b/tests/unit/core/domain.test.ts @@ -507,5 +507,23 @@ describe('Domain Models - REAL Behavior Tests', () => { expect(Object.isFrozen(task)).toBe(true); expect(task.agent).toBe('gemini'); }); + + it('should propagate model field from request to task', () => { + const task = createTask({ + prompt: 'use specific model', + agent: 'claude', + model: 'claude-opus-4-5', + }); + + expect(task.model).toBe('claude-opus-4-5'); + }); + + it('should leave model undefined when not provided', () => { + const task = createTask({ + prompt: 'default model task', + }); + + expect(task.model).toBeUndefined(); + }); }); }); diff --git a/tests/unit/implementations/agent-adapters.test.ts b/tests/unit/implementations/agent-adapters.test.ts index 1663b9ae..c6343c54 100644 --- a/tests/unit/implementations/agent-adapters.test.ts +++ b/tests/unit/implementations/agent-adapters.test.ts @@ -508,3 +508,234 @@ describe('Pre-spawn auth validation', () => { } }); }); + +// ============================================================================ +// baseUrl Passthrough Tests +// ============================================================================ + +describe('baseUrl passthrough', () => { + let testDir: string; + let restoreConfig: () => void; + + beforeEach(() => { + vi.clearAllMocks(); + testDir = path.join(tmpdir(), `autobeat-baseurl-test-${Date.now()}`); + mkdirSync(testDir, { recursive: true }); + restoreConfig = _testSetConfigDir(testDir); + mockIsCommandInPath.mockReturnValue(true); + }); + + afterEach(() => { + restoreConfig(); + rmSync(testDir, { recursive: true, force: true }); + }); + + it('ClaudeAdapter: should inject ANTHROPIC_BASE_URL from config when not set in env', () => { + const mockChild = createMockChildProcess(1234); + mockSpawn.mockReturnValue(mockChild); + saveAgentConfig('claude', 'baseUrl', 'https://proxy.example.com'); + + const adapter = new ClaudeAdapter(testConfig, 'claude'); + adapter.spawn('test prompt', '/workspace'); + + const spawnOptions = mockSpawn.mock.calls[0][2] as { env: Record }; + expect(spawnOptions.env.ANTHROPIC_BASE_URL).toBe('https://proxy.example.com'); + adapter.dispose(); + }); + + it('ClaudeAdapter: user env ANTHROPIC_BASE_URL takes precedence over config', () => { + const mockChild = createMockChildProcess(1234); + mockSpawn.mockReturnValue(mockChild); + saveAgentConfig('claude', 'baseUrl', 'https://config.example.com'); + + process.env.ANTHROPIC_BASE_URL = 'https://env.example.com'; + try { + const adapter = new ClaudeAdapter(testConfig, 'claude'); + adapter.spawn('test prompt', '/workspace'); + + const spawnOptions = mockSpawn.mock.calls[0][2] as { env: Record }; + // User env (spread via cleanEnv) takes precedence over injected config + expect(spawnOptions.env.ANTHROPIC_BASE_URL).toBe('https://env.example.com'); + adapter.dispose(); + } finally { + delete process.env.ANTHROPIC_BASE_URL; + } + }); + + it('ClaudeAdapter: auto-sets CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS=1 when baseUrl is configured', () => { + const mockChild = createMockChildProcess(1234); + mockSpawn.mockReturnValue(mockChild); + saveAgentConfig('claude', 'baseUrl', 'https://proxy.example.com'); + + const adapter = new ClaudeAdapter(testConfig, 'claude'); + adapter.spawn('test prompt', '/workspace'); + + const spawnOptions = mockSpawn.mock.calls[0][2] as { env: Record }; + expect(spawnOptions.env.CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS).toBe('1'); + adapter.dispose(); + }); + + it('ClaudeAdapter: user env CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS takes precedence', () => { + const mockChild = createMockChildProcess(1234); + mockSpawn.mockReturnValue(mockChild); + saveAgentConfig('claude', 'baseUrl', 'https://proxy.example.com'); + + // Note: CLAUDE_CODE_ vars are stripped, so only non-CLAUDE_CODE_ prefix vars work + // CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS would be stripped by ClaudeAdapter's envPrefixesToStrip + // Test that explicitly setting it in injected env is overridden when present + const adapter = new ClaudeAdapter(testConfig, 'claude'); + adapter.spawn('test prompt', '/workspace'); + + const spawnOptions = mockSpawn.mock.calls[0][2] as { env: Record }; + // Auto-set to '1' since CLAUDE_CODE_ is stripped and baseUrl is configured + expect(spawnOptions.env.CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS).toBe('1'); + adapter.dispose(); + }); + + it('ClaudeAdapter: no CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS when baseUrl not configured', () => { + const mockChild = createMockChildProcess(1234); + mockSpawn.mockReturnValue(mockChild); + + const adapter = new ClaudeAdapter(testConfig, 'claude'); + adapter.spawn('test prompt', '/workspace'); + + const spawnOptions = mockSpawn.mock.calls[0][2] as { env: Record }; + expect(spawnOptions.env.CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS).toBeUndefined(); + adapter.dispose(); + }); + + it('CodexAdapter: should inject OPENAI_BASE_URL from config', () => { + const mockChild = createMockChildProcess(1234); + mockSpawn.mockReturnValue(mockChild); + saveAgentConfig('codex', 'baseUrl', 'https://openai-proxy.example.com'); + + const adapter = new CodexAdapter(testConfig, 'codex'); + adapter.spawn('test prompt', '/workspace'); + + const spawnOptions = mockSpawn.mock.calls[0][2] as { env: Record }; + expect(spawnOptions.env.OPENAI_BASE_URL).toBe('https://openai-proxy.example.com'); + adapter.dispose(); + }); + + it('GeminiAdapter: should inject GEMINI_BASE_URL from config', () => { + const mockChild = createMockChildProcess(1234); + mockSpawn.mockReturnValue(mockChild); + saveAgentConfig('gemini', 'baseUrl', 'https://gemini-proxy.example.com'); + + const adapter = new GeminiAdapter(testConfig, 'gemini'); + adapter.spawn('test prompt', '/workspace'); + + const spawnOptions = mockSpawn.mock.calls[0][2] as { env: Record }; + expect(spawnOptions.env.GEMINI_BASE_URL).toBe('https://gemini-proxy.example.com'); + adapter.dispose(); + }); +}); + +// ============================================================================ +// Model Passthrough Tests +// ============================================================================ + +describe('model passthrough', () => { + let testDir: string; + let restoreConfig: () => void; + + beforeEach(() => { + vi.clearAllMocks(); + testDir = path.join(tmpdir(), `autobeat-model-test-${Date.now()}`); + mkdirSync(testDir, { recursive: true }); + restoreConfig = _testSetConfigDir(testDir); + mockIsCommandInPath.mockReturnValue(true); + }); + + afterEach(() => { + restoreConfig(); + rmSync(testDir, { recursive: true, force: true }); + }); + + it('ClaudeAdapter: should include --model in args when model provided', () => { + const mockChild = createMockChildProcess(1234); + mockSpawn.mockReturnValue(mockChild); + + const adapter = new ClaudeAdapter(testConfig, 'claude'); + adapter.spawn('test prompt', '/workspace', 'task-1', 'claude-opus-4-5'); + + const [, args] = mockSpawn.mock.calls[0]; + expect(args).toContain('--model'); + expect(args).toContain('claude-opus-4-5'); + // --model should appear before '--' + const modelIdx = (args as string[]).indexOf('--model'); + const separatorIdx = (args as string[]).indexOf('--'); + expect(modelIdx).toBeLessThan(separatorIdx); + adapter.dispose(); + }); + + it('ClaudeAdapter: no --model in args when no model provided', () => { + const mockChild = createMockChildProcess(1234); + mockSpawn.mockReturnValue(mockChild); + + const adapter = new ClaudeAdapter(testConfig, 'claude'); + adapter.spawn('test prompt', '/workspace'); + + const [, args] = mockSpawn.mock.calls[0]; + expect(args).not.toContain('--model'); + adapter.dispose(); + }); + + it('ClaudeAdapter: per-task model overrides agent-config model', () => { + const mockChild = createMockChildProcess(1234); + mockSpawn.mockReturnValue(mockChild); + saveAgentConfig('claude', 'model', 'claude-sonnet-4-5'); + + const adapter = new ClaudeAdapter(testConfig, 'claude'); + adapter.spawn('test prompt', '/workspace', 'task-1', 'claude-opus-4-5'); + + const [, args] = mockSpawn.mock.calls[0]; + const modelIdx = (args as string[]).indexOf('--model'); + expect((args as string[])[modelIdx + 1]).toBe('claude-opus-4-5'); + adapter.dispose(); + }); + + it('ClaudeAdapter: uses agent-config model when no per-task model', () => { + const mockChild = createMockChildProcess(1234); + mockSpawn.mockReturnValue(mockChild); + saveAgentConfig('claude', 'model', 'claude-sonnet-4-5'); + + const adapter = new ClaudeAdapter(testConfig, 'claude'); + adapter.spawn('test prompt', '/workspace'); + + const [, args] = mockSpawn.mock.calls[0]; + expect(args).toContain('--model'); + expect(args).toContain('claude-sonnet-4-5'); + adapter.dispose(); + }); + + it('CodexAdapter: should include --model in args when model provided', () => { + const mockChild = createMockChildProcess(1234); + mockSpawn.mockReturnValue(mockChild); + + const adapter = new CodexAdapter(testConfig, 'codex'); + adapter.spawn('test prompt', '/workspace', 'task-1', 'gpt-4o'); + + const [, args] = mockSpawn.mock.calls[0]; + expect(args).toContain('--model'); + expect(args).toContain('gpt-4o'); + adapter.dispose(); + }); + + it('GeminiAdapter: should include --model in args when model provided', () => { + const mockChild = createMockChildProcess(1234); + mockSpawn.mockReturnValue(mockChild); + + const adapter = new GeminiAdapter(testConfig, 'gemini'); + adapter.spawn('test prompt', '/workspace', 'task-1', 'gemini-2.0-flash'); + + const [, args] = mockSpawn.mock.calls[0]; + expect(args).toContain('--model'); + expect(args).toContain('gemini-2.0-flash'); + // --model should appear before --prompt + const modelIdx = (args as string[]).indexOf('--model'); + const promptIdx = (args as string[]).indexOf('--prompt'); + expect(modelIdx).toBeLessThan(promptIdx); + adapter.dispose(); + }); +}); diff --git a/tests/unit/implementations/agent-config.test.ts b/tests/unit/implementations/agent-config.test.ts index 393c63e7..9eded91d 100644 --- a/tests/unit/implementations/agent-config.test.ts +++ b/tests/unit/implementations/agent-config.test.ts @@ -139,4 +139,79 @@ describe('Agent Config Storage', () => { expect(raw.agents).toBeUndefined(); }); }); + + describe('baseUrl support', () => { + it('should save and load baseUrl for a provider', () => { + const result = saveAgentConfig('claude', 'baseUrl', 'https://proxy.example.com/v1'); + expect(result.ok).toBe(true); + + const config = loadAgentConfig('claude'); + expect(config.baseUrl).toBe('https://proxy.example.com/v1'); + }); + + it('should strip trailing slash from baseUrl on save', () => { + saveAgentConfig('claude', 'baseUrl', 'https://proxy.example.com/v1/'); + const config = loadAgentConfig('claude'); + expect(config.baseUrl).toBe('https://proxy.example.com/v1'); + }); + + it('should delete baseUrl when empty string is saved', () => { + saveAgentConfig('claude', 'baseUrl', 'https://proxy.example.com'); + saveAgentConfig('claude', 'baseUrl', ''); + const config = loadAgentConfig('claude'); + expect(config.baseUrl).toBeUndefined(); + }); + + it('should preserve apiKey when saving baseUrl', () => { + saveAgentConfig('claude', 'apiKey', 'sk-test'); + saveAgentConfig('claude', 'baseUrl', 'https://proxy.example.com'); + + const config = loadAgentConfig('claude'); + expect(config.apiKey).toBe('sk-test'); + expect(config.baseUrl).toBe('https://proxy.example.com'); + }); + + it('should preserve baseUrl when saving apiKey', () => { + saveAgentConfig('claude', 'baseUrl', 'https://proxy.example.com'); + saveAgentConfig('claude', 'apiKey', 'sk-test'); + + const config = loadAgentConfig('claude'); + expect(config.baseUrl).toBe('https://proxy.example.com'); + expect(config.apiKey).toBe('sk-test'); + }); + }); + + describe('model support', () => { + it('should save and load model for a provider', () => { + const result = saveAgentConfig('claude', 'model', 'claude-opus-4-5'); + expect(result.ok).toBe(true); + + const config = loadAgentConfig('claude'); + expect(config.model).toBe('claude-opus-4-5'); + }); + + it('should delete model when empty string is saved', () => { + saveAgentConfig('claude', 'model', 'claude-opus-4-5'); + saveAgentConfig('claude', 'model', ''); + const config = loadAgentConfig('claude'); + expect(config.model).toBeUndefined(); + }); + + it('should preserve apiKey when saving model', () => { + saveAgentConfig('codex', 'apiKey', 'sk-openai'); + saveAgentConfig('codex', 'model', 'gpt-4o'); + + const config = loadAgentConfig('codex'); + expect(config.apiKey).toBe('sk-openai'); + expect(config.model).toBe('gpt-4o'); + }); + + it('should not clobber fields across different providers', () => { + saveAgentConfig('claude', 'model', 'claude-opus-4-5'); + saveAgentConfig('codex', 'model', 'gpt-4o'); + + expect(loadAgentConfig('claude').model).toBe('claude-opus-4-5'); + expect(loadAgentConfig('codex').model).toBe('gpt-4o'); + }); + }); }); diff --git a/tests/unit/implementations/event-driven-worker-pool.test.ts b/tests/unit/implementations/event-driven-worker-pool.test.ts index 12348216..ed21f629 100644 --- a/tests/unit/implementations/event-driven-worker-pool.test.ts +++ b/tests/unit/implementations/event-driven-worker-pool.test.ts @@ -247,7 +247,7 @@ describe('EventDrivenWorkerPool', () => { await pool.spawn(task); - expect(spawner.spawn as ReturnType).toHaveBeenCalledWith(task.prompt, '/my/project', task.id); + expect(spawner.spawn as ReturnType).toHaveBeenCalledWith(task.prompt, '/my/project', task.id, task.model); }); it('should fall back to process.cwd() when no workingDirectory provided', async () => { @@ -255,7 +255,7 @@ describe('EventDrivenWorkerPool', () => { await pool.spawn(task); - expect(spawner.spawn as ReturnType).toHaveBeenCalledWith(task.prompt, process.cwd(), task.id); + expect(spawner.spawn as ReturnType).toHaveBeenCalledWith(task.prompt, process.cwd(), task.id, task.model); }); it('should increase worker count after successful spawn', async () => { diff --git a/tests/unit/implementations/task-repository.test.ts b/tests/unit/implementations/task-repository.test.ts index c5f471b3..17156413 100644 --- a/tests/unit/implementations/task-repository.test.ts +++ b/tests/unit/implementations/task-repository.test.ts @@ -301,4 +301,72 @@ describe('SQLiteTaskRepository', () => { expect(found).toBeNull(); }); }); + + describe('model field persistence', () => { + it('should save and retrieve task with model', async () => { + const task = createTestTask({ + id: 'task-with-model', + model: 'claude-opus-4-5', + }); + await repo.save(task); + + const result = await repo.findById(TaskId('task-with-model')); + expect(result.ok).toBe(true); + if (!result.ok) return; + expect(result.value).not.toBeNull(); + expect(result.value!.model).toBe('claude-opus-4-5'); + }); + + it('should save and retrieve task without model as undefined', async () => { + const task = createTestTask({ + id: 'task-no-model', + }); + await repo.save(task); + + const result = await repo.findById(TaskId('task-no-model')); + expect(result.ok).toBe(true); + if (!result.ok) return; + expect(result.value).not.toBeNull(); + expect(result.value!.model).toBeUndefined(); + }); + + it('should update model via update()', async () => { + const task = createTestTask({ id: 'task-update-model' }); + await repo.save(task); + + const updateResult = await repo.update(TaskId('task-update-model'), { + model: 'gpt-4o', + }); + expect(updateResult.ok).toBe(true); + + const result = await repo.findById(TaskId('task-update-model')); + expect(result.ok).toBe(true); + if (!result.ok) return; + expect(result.value!.model).toBe('gpt-4o'); + }); + + it('should return model in findAll results', async () => { + const task = createTestTask({ id: 'findall-model-task', model: 'gemini-2.0-flash' }); + await repo.save(task); + + const result = await repo.findAll(); + expect(result.ok).toBe(true); + if (!result.ok) return; + const found = result.value.find((t) => t.id === 'findall-model-task'); + expect(found).toBeDefined(); + expect(found!.model).toBe('gemini-2.0-flash'); + }); + + it('should preserve model on tasks without model (backward compat)', async () => { + // Tasks created before migration v16 would have model=NULL + // Test that they deserialize to undefined (not null) + const task = createTestTask({ id: 'legacy-task' }); + await repo.save(task); + + const result = await repo.findById(TaskId('legacy-task')); + expect(result.ok).toBe(true); + if (!result.ok) return; + expect(result.value!.model).toBeUndefined(); + }); + }); }); From 8c72e7d5ee05075275ccacc8ea0ea9fb2035e747 Mon Sep 17 00:00:00 2001 From: Dean Sharon Date: Wed, 1 Apr 2026 16:04:39 +0300 Subject: [PATCH 02/11] =?UTF-8?q?Loop=20loop-a33ba028-8ebe-4e1c-a283-1cd7e?= =?UTF-8?q?b3f64ee=20iteration=201=20=E2=80=94=20pass?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/core/domain.ts | 12 +--- .../process-spawner-adapter.ts | 5 +- src/services/schedule-manager.ts | 1 + tests/fixtures/test-data.ts | 1 - tests/unit/core/domain.test.ts | 70 +++---------------- .../implementations/agent-adapters.test.ts | 17 ----- 6 files changed, 12 insertions(+), 94 deletions(-) diff --git a/src/core/domain.ts b/src/core/domain.ts index bc64efc4..f07919ed 100644 --- a/src/core/domain.ts +++ b/src/core/domain.ts @@ -229,7 +229,7 @@ export const createTask = (request: TaskRequest): Task => { // NOTE: dependsOn from request is the initial dependency list // Actual validation and DAG cycle detection happens in DependencyHandler dependsOn: request.dependsOn, - dependents: undefined, // Populated by DependencyRepository queries + // dependents populated by DependencyRepository queries — omitted here dependencyState: request.dependsOn && request.dependsOn.length > 0 ? 'blocked' : 'none', continueFrom: request.continueFrom, @@ -239,8 +239,6 @@ export const createTask = (request: TaskRequest): Task => { // Multi-agent support (v0.5.0) agent: request.agent, - - // Per-task model override model: request.model, createdAt: now, @@ -655,18 +653,12 @@ export const createLoop = (request: LoopCreateRequest, workingDirectory: string, cooldownMs: request.cooldownMs ?? 0, freshContext: request.freshContext ?? true, currentIteration: 0, - bestScore: undefined, - bestIterationId: undefined, - bestIterationCommitSha: undefined, consecutiveFailures: 0, status: LoopStatus.RUNNING, gitBranch: request.gitBranch, - gitBaseBranch: undefined, - gitStartCommitSha: undefined, scheduleId, createdAt: now, updatedAt: now, - completedAt: undefined, }); }; @@ -751,7 +743,6 @@ export const createOrchestration = ( return Object.freeze({ id: OrchestratorId(`orchestrator-${crypto.randomUUID()}`), goal: request.goal, - loopId: undefined, stateFilePath, workingDirectory, agent: request.agent, @@ -762,7 +753,6 @@ export const createOrchestration = ( status: OrchestratorStatus.PLANNING, createdAt: now, updatedAt: now, - completedAt: undefined, }); }; diff --git a/src/implementations/process-spawner-adapter.ts b/src/implementations/process-spawner-adapter.ts index 68609e9a..ea9b01f2 100644 --- a/src/implementations/process-spawner-adapter.ts +++ b/src/implementations/process-spawner-adapter.ts @@ -37,9 +37,6 @@ export class ProcessSpawnerAdapter implements AgentAdapter { } dispose(): void { - // ProcessSpawner may or may not have dispose - if ('dispose' in this.spawner && typeof this.spawner.dispose === 'function') { - (this.spawner as { dispose: () => void }).dispose(); - } + // ProcessSpawner interface does not define dispose — nothing to clean up } } diff --git a/src/services/schedule-manager.ts b/src/services/schedule-manager.ts index c2f0d1f4..76ababd2 100644 --- a/src/services/schedule-manager.ts +++ b/src/services/schedule-manager.ts @@ -365,6 +365,7 @@ export class ScheduleManagerService implements ScheduleService { workingDirectory: step.workingDirectory ?? request.workingDirectory, afterScheduleId: previousScheduleId, agent: step.agent ?? request.agent, + model: step.model ?? request.model, }); if (!result.ok) { diff --git a/tests/fixtures/test-data.ts b/tests/fixtures/test-data.ts index 9ecd8c95..8446104c 100644 --- a/tests/fixtures/test-data.ts +++ b/tests/fixtures/test-data.ts @@ -12,7 +12,6 @@ export const createTestTask = (overrides?: Partial): Task => ({ timeout: 300000, maxOutputBuffer: 10485760, agent: 'claude', - model: undefined, ...overrides, }); diff --git a/tests/unit/core/domain.test.ts b/tests/unit/core/domain.test.ts index 399cd06c..b394757f 100644 --- a/tests/unit/core/domain.test.ts +++ b/tests/unit/core/domain.test.ts @@ -5,13 +5,10 @@ import { createTask, isTerminalState, Priority, - type SystemResources, type Task, TaskId, - type TaskOutput, type TaskRequest, TaskStatus, - type TaskUpdate, updateTask, type Worker, WorkerId, @@ -47,15 +44,11 @@ describe('Domain Models - REAL Behavior Tests', () => { expect(task.status).toBe(TaskStatus.QUEUED); expect(task.priority).toBe(Priority.P2); // Default - // Additional validations for complete task structure - // FIX: timeout and maxOutputBuffer are undefined when not provided (no defaults in createTask) expect(task.timeout).toBeUndefined(); expect(task.maxOutputBuffer).toBeUndefined(); expect(task.workingDirectory).toBeUndefined(); - expect(task.assignedWorker).toBeUndefined(); expect(task.startedAt).toBeUndefined(); expect(task.completedAt).toBeUndefined(); - expect(task.output).toBeUndefined(); // FIX: output is also undefined when not provided expect(typeof task.createdAt).toBe('number'); expect(task.createdAt).toBeGreaterThan(0); expect(task.updatedAt).toBe(task.createdAt); @@ -87,16 +80,11 @@ describe('Domain Models - REAL Behavior Tests', () => { const tasks = Array.from({ length: 100 }, () => new TaskFactory().withPrompt('test').build()); const ids = new Set(tasks.map((t) => t.id)); - expect(ids.size).toBe(100); // All unique - expect(Array.isArray(tasks)).toBe(true); - expect(tasks.length).toBe(100); + expect(ids.size).toBe(100); - // All IDs should match the expected pattern - tasks.forEach((task) => { + for (const task of tasks) { expect(task.id).toMatch(/^task-[a-f0-9-]+$/); - expect(typeof task.id).toBe('string'); - expect(task.id.startsWith('task-')).toBe(true); - }); + } }); }); @@ -128,29 +116,21 @@ describe('Domain Models - REAL Behavior Tests', () => { }); it('should preserve immutability', () => { - // Mock Date.now() to return different values - // createTask calls Date.now() once, updateTask calls it once - const originalTime = 1000; - const updatedTime = 2000; const dateSpy = vi .spyOn(Date, 'now') - .mockReturnValueOnce(originalTime) // createTask (for createdAt and updatedAt) - .mockReturnValueOnce(updatedTime); // updateTask (for updatedAt) + .mockReturnValueOnce(1000) // createTask + .mockReturnValueOnce(2000); // updateTask const task = createTask({ prompt: 'test' }); - const originalStatus = task.status; - const originalUpdatedAt = task.updatedAt; - const updated = updateTask(task, { status: TaskStatus.FAILED }); dateSpy.mockRestore(); - expect(task.status).toBe(TaskStatus.QUEUED); // Original unchanged - expect(task.status).toBe(originalStatus); - expect(task.updatedAt).toBe(originalUpdatedAt); + expect(task.status).toBe(TaskStatus.QUEUED); + expect(task.updatedAt).toBe(1000); expect(updated.status).toBe(TaskStatus.FAILED); - expect(task).not.toBe(updated); // Different objects - expect(updated.updatedAt).toBeGreaterThan(originalUpdatedAt); + expect(task).not.toBe(updated); + expect(updated.updatedAt).toBe(2000); }); it('should update worker assignment', () => { @@ -187,17 +167,11 @@ describe('Domain Models - REAL Behavior Tests', () => { expect(isTerminalState(TaskStatus.COMPLETED)).toBe(true); expect(isTerminalState(TaskStatus.FAILED)).toBe(true); expect(isTerminalState(TaskStatus.CANCELLED)).toBe(true); - // Verify these are the only terminal states - expect([TaskStatus.COMPLETED, TaskStatus.FAILED, TaskStatus.CANCELLED].every((s) => isTerminalState(s))).toBe( - true, - ); }); it('should identify non-terminal states', () => { expect(isTerminalState(TaskStatus.QUEUED)).toBe(false); expect(isTerminalState(TaskStatus.RUNNING)).toBe(false); - // Verify these states allow progression - expect([TaskStatus.QUEUED, TaskStatus.RUNNING].every((s) => !isTerminalState(s))).toBe(true); }); }); @@ -310,32 +284,6 @@ describe('Domain Models - REAL Behavior Tests', () => { expect(worker.taskId).toBe('task-456'); expect(worker.pid).toBe(12345); }); - - it('should have correct SystemResources structure', () => { - const resources: SystemResources = { - cpuUsage: 45.5, - availableMemory: 8000000000, - totalMemory: 16000000000, - loadAverage: [1.5, 1.2, 1.0], - workerCount: 3, - }; - - expect(resources.cpuUsage).toBe(45.5); - expect(resources.loadAverage).toHaveLength(3); - }); - - it('should have correct TaskOutput structure', () => { - const output: TaskOutput = { - taskId: TaskId('task-123'), - stdout: ['Line 1', 'Line 2'], - stderr: ['Error 1'], - totalSize: 1024, - }; - - expect(output.taskId).toBe('task-123'); - expect(output.stdout).toHaveLength(2); - expect(output.stderr).toHaveLength(1); - }); }); describe('Real-world scenarios', () => { diff --git a/tests/unit/implementations/agent-adapters.test.ts b/tests/unit/implementations/agent-adapters.test.ts index c6343c54..c3eecc7e 100644 --- a/tests/unit/implementations/agent-adapters.test.ts +++ b/tests/unit/implementations/agent-adapters.test.ts @@ -575,23 +575,6 @@ describe('baseUrl passthrough', () => { adapter.dispose(); }); - it('ClaudeAdapter: user env CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS takes precedence', () => { - const mockChild = createMockChildProcess(1234); - mockSpawn.mockReturnValue(mockChild); - saveAgentConfig('claude', 'baseUrl', 'https://proxy.example.com'); - - // Note: CLAUDE_CODE_ vars are stripped, so only non-CLAUDE_CODE_ prefix vars work - // CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS would be stripped by ClaudeAdapter's envPrefixesToStrip - // Test that explicitly setting it in injected env is overridden when present - const adapter = new ClaudeAdapter(testConfig, 'claude'); - adapter.spawn('test prompt', '/workspace'); - - const spawnOptions = mockSpawn.mock.calls[0][2] as { env: Record }; - // Auto-set to '1' since CLAUDE_CODE_ is stripped and baseUrl is configured - expect(spawnOptions.env.CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS).toBe('1'); - adapter.dispose(); - }); - it('ClaudeAdapter: no CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS when baseUrl not configured', () => { const mockChild = createMockChildProcess(1234); mockSpawn.mockReturnValue(mockChild); From 154309d6c0831b13dd1db7a5acc4d5c192321261 Mon Sep 17 00:00:00 2001 From: Dean Sharon Date: Wed, 1 Apr 2026 16:10:22 +0300 Subject: [PATCH 03/11] fix: address self-review issues - Add missing `model` field to CreateOrchestrator MCP tool (Zod schema, inputSchema, and handler) so orchestrations can use per-task model overrides - Thread shared `model` into taskTemplate for scheduled pipelines so the default model propagates to triggered pipeline tasks - Add --model/-m flag to `beat orchestrate` CLI command - Include model in OrchestratorStatus MCP response and CLI status output --- src/adapters/mcp-adapter.ts | 7 +++++++ src/cli/commands/orchestrate.ts | 11 +++++++++++ src/services/schedule-manager.ts | 1 + 3 files changed, 19 insertions(+) diff --git a/src/adapters/mcp-adapter.ts b/src/adapters/mcp-adapter.ts index bb8911a2..dce3b705 100644 --- a/src/adapters/mcp-adapter.ts +++ b/src/adapters/mcp-adapter.ts @@ -219,6 +219,7 @@ const CreateOrchestratorSchema = z.object({ goal: z.string().min(1).max(8000).describe('High-level goal for the orchestrator to achieve'), workingDirectory: z.string().optional().describe('Working directory for workers (absolute path)'), agent: z.enum(AGENT_PROVIDERS_TUPLE).optional().describe('AI agent for the orchestrator loop'), + model: z.string().min(1).max(200).optional().describe('Model override for the orchestrator (overrides agent-config default)'), maxDepth: z.number().min(1).max(10).optional().default(3).describe('Max delegation depth'), maxWorkers: z.number().min(1).max(20).optional().default(5).describe('Max concurrent workers'), maxIterations: z.number().min(1).max(200).optional().default(50).describe('Max orchestrator iterations'), @@ -1296,6 +1297,10 @@ export class MCPAdapter { enum: [...AGENT_PROVIDERS], description: 'AI agent for the orchestrator loop', }, + model: { + type: 'string', + description: 'Model override for the orchestrator (overrides agent-config default)', + }, maxDepth: { type: 'number', description: 'Max delegation depth (1-10, default: 3)', @@ -2746,6 +2751,7 @@ export class MCPAdapter { goal: data.goal, workingDirectory: data.workingDirectory, agent: data.agent as AgentProvider | undefined, + model: data.model, maxDepth: data.maxDepth, maxWorkers: data.maxWorkers, maxIterations: data.maxIterations, @@ -2807,6 +2813,7 @@ export class MCPAdapter { stateFilePath: orchestration.stateFilePath, workingDirectory: orchestration.workingDirectory, agent: orchestration.agent, + ...(orchestration.model && { model: orchestration.model }), maxDepth: orchestration.maxDepth, maxWorkers: orchestration.maxWorkers, maxIterations: orchestration.maxIterations, diff --git a/src/cli/commands/orchestrate.ts b/src/cli/commands/orchestrate.ts index a8f28dca..46849784 100644 --- a/src/cli/commands/orchestrate.ts +++ b/src/cli/commands/orchestrate.ts @@ -97,6 +97,7 @@ interface OrchestrateCreateParsed { readonly goal: string; readonly workingDirectory?: string; readonly agent?: AgentProvider; + readonly model?: string; readonly maxDepth?: number; readonly maxWorkers?: number; readonly maxIterations?: number; @@ -128,6 +129,7 @@ type OrchestrateParsed = export function parseOrchestrateCreateArgs(args: readonly string[]): Result { let workingDirectory: string | undefined; let agent: AgentProvider | undefined; + let model: string | undefined; let maxDepth: number | undefined; let maxWorkers: number | undefined; let maxIterations: number | undefined; @@ -150,6 +152,11 @@ export function parseOrchestrateCreateArgs(args: readonly string[]): Result { stateFilePath: o.stateFilePath, workingDirectory: o.workingDirectory, agent: o.agent, + ...(o.model && { model: o.model }), maxDepth: o.maxDepth, maxWorkers: o.maxWorkers, maxIterations: o.maxIterations, @@ -497,6 +507,7 @@ export async function handleOrchestrateCommand( ' -f, --foreground Block and wait for completion', ' -w, --working-directory DIR Working directory for workers', ' -a, --agent AGENT AI agent (claude, codex, gemini)', + ' -m, --model MODEL Model override (e.g. claude-opus-4-5)', ' --max-depth N Max delegation depth (1-10, default: 3)', ' --max-workers N Max concurrent workers (1-20, default: 5)', ' --max-iterations N Max orchestrator iterations (1-200, default: 50)', diff --git a/src/services/schedule-manager.ts b/src/services/schedule-manager.ts index 76ababd2..d38deaeb 100644 --- a/src/services/schedule-manager.ts +++ b/src/services/schedule-manager.ts @@ -299,6 +299,7 @@ export class ScheduleManagerService implements ScheduleService { priority: request.priority, workingDirectory: validatedWorkingDirectory, agent: agentResult.value, + model: request.model, }, pipelineSteps: normalizedSteps, scheduleType: request.scheduleType, From 337fa219a3f1d8c9273ce170e8fec8dd4f6ddc7c Mon Sep 17 00:00:00 2001 From: Dean Sharon Date: Wed, 1 Apr 2026 16:20:38 +0300 Subject: [PATCH 04/11] =?UTF-8?q?fix:=20shepherd=20review=20fixes=20?= =?UTF-8?q?=E2=80=94=20model=20threading,=20Claude=20baseUrl=20warning,=20?= =?UTF-8?q?docs=20(task-2026-04-01=5Fshepherd-fixes)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Thread orchestration model to inner loop createLoop() call so iterations respect the --model flag passed to CreateOrchestrator - Add Claude-specific warning in ConfigureAgent (set/check) and ListAgents when baseUrl is configured without an API key (login-based auth ignores baseUrl) - Document in ConfigureAgent tool description that clearing individual fields requires action=reset or the CLI (MCP set action requires non-empty values) - Tests: verify model is propagated to loop taskTemplate; verify Claude warning appears in set/check actions and ListAgents; verify no warning for other agents --- src/adapters/mcp-adapter.ts | 64 ++++-- src/services/orchestration-manager.ts | 1 + tests/unit/adapters/mcp-adapter.test.ts | 193 ++++++++++++++++++ .../services/orchestration-manager.test.ts | 32 +++ 4 files changed, 273 insertions(+), 17 deletions(-) diff --git a/src/adapters/mcp-adapter.ts b/src/adapters/mcp-adapter.ts index dce3b705..8fd4838a 100644 --- a/src/adapters/mcp-adapter.ts +++ b/src/adapters/mcp-adapter.ts @@ -1382,7 +1382,8 @@ export class MCPAdapter { }, { name: 'ConfigureAgent', - description: 'Check auth status, store API key/baseUrl/model, or reset stored config for an agent', + description: + 'Check auth status, store API key/baseUrl/model, or reset stored config for an agent. Note: to clear individual fields (baseUrl, model), use action=reset (clears all) or the CLI `beat agents config set ""`. The MCP set action requires non-empty values.', inputSchema: { type: 'object', properties: { @@ -2684,6 +2685,12 @@ export class MCPAdapter { const agentConfig = loadAgentConfig(provider); const authStatus = checkAgentAuth(provider, agentConfig.apiKey); + // Claude-specific: warn when baseUrl is configured without an API key + const claudeBaseUrlWarning = + provider === 'claude' && agentConfig.baseUrl && !agentConfig.apiKey + ? 'Warning: Claude requires an API key when using a custom baseUrl. The base URL will be ignored with login-based auth.' + : undefined; + return { provider, description: AGENT_DESCRIPTIONS[provider], @@ -2694,6 +2701,7 @@ export class MCPAdapter { ...(authStatus.hint && { hint: authStatus.hint }), ...(agentConfig.baseUrl && { baseUrl: agentConfig.baseUrl }), ...(agentConfig.model && { model: agentConfig.model }), + ...(claudeBaseUrlWarning && { warning: claudeBaseUrlWarning }), }; }); @@ -2948,21 +2956,25 @@ export class MCPAdapter { case 'check': { const agentConfig = loadAgentConfig(agent); const status = checkAgentAuth(agent, agentConfig.apiKey); + + // Claude-specific: warn when baseUrl is configured without an API key + const checkPayload: Record = { + success: true, + ...status, + ...(agentConfig.apiKey && { storedKey: maskApiKey(agentConfig.apiKey) }), + ...(agentConfig.baseUrl && { baseUrl: agentConfig.baseUrl }), + ...(agentConfig.model && { model: agentConfig.model }), + }; + if (agent === 'claude' && agentConfig.baseUrl && !agentConfig.apiKey) { + checkPayload.warning = + 'Warning: Claude requires an API key when using a custom baseUrl. The base URL will be ignored with login-based auth.'; + } + return { content: [ { type: 'text', - text: JSON.stringify( - { - success: true, - ...status, - ...(agentConfig.apiKey && { storedKey: maskApiKey(agentConfig.apiKey) }), - ...(agentConfig.baseUrl && { baseUrl: agentConfig.baseUrl }), - ...(agentConfig.model && { model: agentConfig.model }), - }, - null, - 2, - ), + text: JSON.stringify(checkPayload, null, 2), }, ], }; @@ -3020,15 +3032,33 @@ export class MCPAdapter { messages.push(model ? `model set to ${model}` : 'model cleared'); } + // Claude-specific: warn when baseUrl is configured without an API key + // (login-based auth does not work with a custom baseUrl) + const warningMessages: string[] = []; + if (agent === 'claude') { + const currentConfig = loadAgentConfig(agent); + const effectiveBaseUrl = baseUrl !== undefined ? baseUrl : currentConfig.baseUrl; + const effectiveApiKey = apiKey ?? currentConfig.apiKey; + if (effectiveBaseUrl && !effectiveApiKey) { + warningMessages.push( + 'Warning: Claude requires an API key when using a custom baseUrl. The base URL will be ignored with login-based auth.', + ); + } + } + + const responsePayload: Record = { + success: true, + message: `${agent}: ${messages.join(', ')}`, + }; + if (warningMessages.length > 0) { + responsePayload.warning = warningMessages.join(' '); + } + return { content: [ { type: 'text', - text: JSON.stringify( - { success: true, message: `${agent}: ${messages.join(', ')}` }, - null, - 2, - ), + text: JSON.stringify(responsePayload, null, 2), }, ], }; diff --git a/src/services/orchestration-manager.ts b/src/services/orchestration-manager.ts index 99a1fa85..1a6e2117 100644 --- a/src/services/orchestration-manager.ts +++ b/src/services/orchestration-manager.ts @@ -157,6 +157,7 @@ export class OrchestrationManagerService implements OrchestrationService { freshContext: true, workingDirectory: validatedWorkingDirectory, agent, + ...(orchestration.model !== undefined && { model: orchestration.model }), }); if (!loopResult.ok) { diff --git a/tests/unit/adapters/mcp-adapter.test.ts b/tests/unit/adapters/mcp-adapter.test.ts index d19167b4..09c8caab 100644 --- a/tests/unit/adapters/mcp-adapter.test.ts +++ b/tests/unit/adapters/mcp-adapter.test.ts @@ -11,8 +11,12 @@ * Quality: 3-5 assertions per test, AAA pattern, behavioral testing */ +import { mkdirSync, rmSync } from 'fs'; +import { tmpdir } from 'os'; +import path from 'path'; import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; import { MCPAdapter } from '../../../src/adapters/mcp-adapter'; +import { _testSetConfigDir, saveAgentConfig } from '../../../src/core/configuration'; import type { Loop, LoopCreateRequest, @@ -2764,3 +2768,192 @@ describe('Orchestration tools via callTool()', () => { // NOTE: simulatePauseLoop and simulateResumeLoop helpers removed in favor of // adapter.callTool() which exercises full Zod validation + dispatch pipeline. + +// ============================================================================ +// ConfigureAgent Warning Tests — Claude baseUrl + missing apiKey +// ============================================================================ + +describe('ConfigureAgent — Claude baseUrl warning via callTool()', () => { + let testDir: string; + let restoreConfig: () => void; + + function makeConfigureAgentAdapter() { + return new MCPAdapter({ + taskManager: new MockTaskManager(), + logger: new MockLogger(), + scheduleService: stubScheduleService, + loopService: stubLoopService, + agentRegistry: undefined, + config: testConfig, + }); + } + + beforeEach(() => { + testDir = path.join(tmpdir(), `autobeat-configure-agent-warning-test-${Date.now()}`); + mkdirSync(testDir, { recursive: true }); + restoreConfig = _testSetConfigDir(testDir); + }); + + afterEach(() => { + restoreConfig(); + rmSync(testDir, { recursive: true, force: true }); + }); + + describe('set action', () => { + it('should include warning when setting baseUrl without an API key for Claude', async () => { + const adapter = makeConfigureAgentAdapter(); + + const result = await adapter.callTool('ConfigureAgent', { + agent: 'claude', + action: 'set', + baseUrl: 'https://proxy.example.com/v1', + }); + + expect(result.isError).toBeFalsy(); + const response = JSON.parse(result.content[0].text); + expect(response.success).toBe(true); + expect(response.warning).toContain('API key'); + expect(response.warning).toContain('baseUrl'); + }); + + it('should not include warning when setting baseUrl with an API key for Claude', async () => { + const adapter = makeConfigureAgentAdapter(); + + const result = await adapter.callTool('ConfigureAgent', { + agent: 'claude', + action: 'set', + baseUrl: 'https://proxy.example.com/v1', + apiKey: 'sk-test-key', + }); + + expect(result.isError).toBeFalsy(); + const response = JSON.parse(result.content[0].text); + expect(response.success).toBe(true); + expect(response.warning).toBeUndefined(); + }); + + it('should include warning when setting only apiKey=false and baseUrl already stored for Claude', async () => { + // Pre-condition: baseUrl already stored, no apiKey + saveAgentConfig('claude', 'baseUrl', 'https://proxy.example.com/v1'); + + const adapter = makeConfigureAgentAdapter(); + + // Set a model update (no apiKey provided, baseUrl already present) + const result = await adapter.callTool('ConfigureAgent', { + agent: 'claude', + action: 'set', + model: 'claude-opus-4-5', + }); + + expect(result.isError).toBeFalsy(); + const response = JSON.parse(result.content[0].text); + expect(response.success).toBe(true); + expect(response.warning).toContain('API key'); + }); + + it('should not include warning for non-Claude agents with baseUrl', async () => { + const adapter = makeConfigureAgentAdapter(); + + const result = await adapter.callTool('ConfigureAgent', { + agent: 'codex', + action: 'set', + baseUrl: 'https://proxy.example.com/v1', + }); + + expect(result.isError).toBeFalsy(); + const response = JSON.parse(result.content[0].text); + expect(response.success).toBe(true); + expect(response.warning).toBeUndefined(); + }); + }); + + describe('check action', () => { + it('should include warning when baseUrl is configured without apiKey for Claude', async () => { + saveAgentConfig('claude', 'baseUrl', 'https://proxy.example.com/v1'); + + const adapter = makeConfigureAgentAdapter(); + const result = await adapter.callTool('ConfigureAgent', { + agent: 'claude', + action: 'check', + }); + + expect(result.isError).toBeFalsy(); + const response = JSON.parse(result.content[0].text); + expect(response.success).toBe(true); + expect(response.warning).toContain('API key'); + expect(response.warning).toContain('baseUrl'); + }); + + it('should not include warning when Claude has both baseUrl and apiKey configured', async () => { + saveAgentConfig('claude', 'baseUrl', 'https://proxy.example.com/v1'); + saveAgentConfig('claude', 'apiKey', 'sk-stored-key'); + + const adapter = makeConfigureAgentAdapter(); + const result = await adapter.callTool('ConfigureAgent', { + agent: 'claude', + action: 'check', + }); + + expect(result.isError).toBeFalsy(); + const response = JSON.parse(result.content[0].text); + expect(response.success).toBe(true); + expect(response.warning).toBeUndefined(); + }); + + it('should not include warning when Claude has no baseUrl configured', async () => { + const adapter = makeConfigureAgentAdapter(); + const result = await adapter.callTool('ConfigureAgent', { + agent: 'claude', + action: 'check', + }); + + expect(result.isError).toBeFalsy(); + const response = JSON.parse(result.content[0].text); + expect(response.success).toBe(true); + expect(response.warning).toBeUndefined(); + }); + }); + + describe('ListAgents warning for Claude baseUrl without apiKey', () => { + it('should include warning on Claude entry in ListAgents when baseUrl is set without apiKey', async () => { + saveAgentConfig('claude', 'baseUrl', 'https://proxy.example.com/v1'); + + const adapter = makeConfigureAgentAdapter(); + const result = await adapter.callTool('ListAgents', {}); + + expect(result.isError).toBeFalsy(); + const response = JSON.parse(result.content[0].text); + expect(response.success).toBe(true); + + const claudeEntry = response.agents.find((a: { provider: string }) => a.provider === 'claude'); + expect(claudeEntry).toBeDefined(); + expect(claudeEntry.warning).toContain('API key'); + expect(claudeEntry.warning).toContain('baseUrl'); + }); + + it('should not include warning on Claude entry when apiKey is also configured', async () => { + saveAgentConfig('claude', 'baseUrl', 'https://proxy.example.com/v1'); + saveAgentConfig('claude', 'apiKey', 'sk-stored-key'); + + const adapter = makeConfigureAgentAdapter(); + const result = await adapter.callTool('ListAgents', {}); + + expect(result.isError).toBeFalsy(); + const response = JSON.parse(result.content[0].text); + const claudeEntry = response.agents.find((a: { provider: string }) => a.provider === 'claude'); + expect(claudeEntry.warning).toBeUndefined(); + }); + + it('should not include warning on non-Claude agents with baseUrl', async () => { + saveAgentConfig('codex', 'baseUrl', 'https://proxy.example.com/v1'); + + const adapter = makeConfigureAgentAdapter(); + const result = await adapter.callTool('ListAgents', {}); + + expect(result.isError).toBeFalsy(); + const response = JSON.parse(result.content[0].text); + const codexEntry = response.agents.find((a: { provider: string }) => a.provider === 'codex'); + expect(codexEntry.warning).toBeUndefined(); + }); + }); +}); diff --git a/tests/unit/services/orchestration-manager.test.ts b/tests/unit/services/orchestration-manager.test.ts index 5cb4df2f..1059ca0d 100644 --- a/tests/unit/services/orchestration-manager.test.ts +++ b/tests/unit/services/orchestration-manager.test.ts @@ -150,6 +150,38 @@ describe('OrchestrationManagerService - Unit Tests', () => { expect(result.value.maxWorkers).toBe(15); expect(result.value.maxIterations).toBe(100); }); + + it('should pass model to loop creation when model is specified', async () => { + const result = await service.createOrchestration({ + goal: 'Build auth with specific model', + model: 'claude-opus-4-5', + }); + + expect(result.ok).toBe(true); + if (!result.ok) return; + + // Verify the LoopCreated event contains the model + const loopEvents = eventBus.getEmittedEvents('LoopCreated'); + expect(loopEvents.length).toBe(1); + const createdLoop = (loopEvents[0] as { loop: { taskTemplate?: { model?: string } } }).loop; + // Model is stored on the loop's taskTemplate + expect(createdLoop?.taskTemplate?.model).toBe('claude-opus-4-5'); + }); + + it('should not set model on loop when model is not specified', async () => { + const result = await service.createOrchestration({ + goal: 'Build without model override', + }); + + expect(result.ok).toBe(true); + if (!result.ok) return; + + const loopEvents = eventBus.getEmittedEvents('LoopCreated'); + expect(loopEvents.length).toBe(1); + const createdLoop = (loopEvents[0] as { loop: { taskTemplate?: { model?: string } } }).loop; + // No model should be set when not requested + expect(createdLoop?.taskTemplate?.model).toBeUndefined(); + }); }); describe('getOrchestration()', () => { From 3e2447d0a730c908690b8850e9d4bd820bfb77a7 Mon Sep 17 00:00:00 2001 From: Dean Sharon Date: Fri, 3 Apr 2026 01:03:28 +0300 Subject: [PATCH 05/11] docs: clarify scheduled loop taskTemplate comment Replace misleading "placeholder" wording with explanation of why taskTemplate exists alongside loopConfig on scheduled loops. --- src/services/schedule-manager.ts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/services/schedule-manager.ts b/src/services/schedule-manager.ts index d38deaeb..20d8de1e 100644 --- a/src/services/schedule-manager.ts +++ b/src/services/schedule-manager.ts @@ -500,7 +500,8 @@ export class ScheduleManagerService implements ScheduleService { ); } - // Build the schedule with loopConfig and a placeholder taskTemplate + // Build the schedule with loopConfig (authoritative source) and a taskTemplate derived from it + // (Schedule type requires taskTemplate; used as fallback for workingDirectory in handleLoopTrigger) const schedule = createSchedule({ taskTemplate: { prompt: request.loopConfig.prompt ?? '', From 9906467d46700815adaa1974eb923205d46a884b Mon Sep 17 00:00:00 2001 From: Dean Sharon Date: Fri, 3 Apr 2026 01:45:40 +0300 Subject: [PATCH 06/11] =?UTF-8?q?Loop=20loop-f4f7bd10-e408-40b6-9bae-dd145?= =?UTF-8?q?48bf0b5=20iteration=201=20=E2=80=94=20pass?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/adapters/mcp-adapter.ts | 4 +- src/bootstrap.ts | 13 - src/implementations/process-spawner.ts | 143 ----- tests/setup.ts | 25 +- .../implementations/process-spawner.test.ts | 538 ------------------ 5 files changed, 4 insertions(+), 719 deletions(-) delete mode 100644 src/implementations/process-spawner.ts delete mode 100644 tests/unit/implementations/process-spawner.test.ts diff --git a/src/adapters/mcp-adapter.ts b/src/adapters/mcp-adapter.ts index 8fd4838a..de08ad5f 100644 --- a/src/adapters/mcp-adapter.ts +++ b/src/adapters/mcp-adapter.ts @@ -3018,7 +3018,7 @@ export class MCPAdapter { isError: true, }; } - messages.push(baseUrl ? `baseUrl set to ${baseUrl}` : 'baseUrl cleared'); + messages.push(`baseUrl set to ${baseUrl}`); } if (model !== undefined) { @@ -3029,7 +3029,7 @@ export class MCPAdapter { isError: true, }; } - messages.push(model ? `model set to ${model}` : 'model cleared'); + messages.push(`model set to ${model}`); } // Claude-specific: warn when baseUrl is configured without an API key diff --git a/src/bootstrap.ts b/src/bootstrap.ts index 29d4073e..f5c9c863 100644 --- a/src/bootstrap.ts +++ b/src/bootstrap.ts @@ -88,7 +88,6 @@ import { SQLiteLoopRepository } from './implementations/loop-repository.js'; import { SQLiteOrchestrationRepository } from './implementations/orchestration-repository.js'; import { BufferedOutputCapture } from './implementations/output-capture.js'; import { SQLiteOutputRepository } from './implementations/output-repository.js'; -import { ClaudeProcessSpawner } from './implementations/process-spawner.js'; import { ProcessSpawnerAdapter } from './implementations/process-spawner-adapter.js'; import { SystemResourceMonitor } from './implementations/resource-monitor.js'; import { SQLiteScheduleRepository } from './implementations/schedule-repository.js'; @@ -319,18 +318,6 @@ export async function bootstrap(options: BootstrapOptions = {}): Promise new PriorityTaskQueue()); - container.registerSingleton('processSpawner', () => { - // Use injected ProcessSpawner if provided (for testing) - if (options.processSpawner) { - logger.info('Using injected ProcessSpawner'); - return options.processSpawner; - } - - const configResult = container.get('config'); - if (!configResult.ok) throw new Error('Config required for ProcessSpawner'); - return new ClaudeProcessSpawner(configResult.value, 'claude'); - }); - // Register AgentRegistry for multi-agent support (v0.5.0) // ARCHITECTURE: If a custom ProcessSpawner is injected (tests), wrap it in a // compatibility adapter. Otherwise, register all 4 agent adapters. diff --git a/src/implementations/process-spawner.ts b/src/implementations/process-spawner.ts deleted file mode 100644 index 572e135e..00000000 --- a/src/implementations/process-spawner.ts +++ /dev/null @@ -1,143 +0,0 @@ -/** - * Process spawning implementation - * Handles Claude Code process creation - */ - -import { ChildProcess, spawn } from 'child_process'; -import { Configuration } from '../core/configuration.js'; -import { AutobeatError, ErrorCode, processSpawnFailed } from '../core/errors.js'; -import { ProcessSpawner } from '../core/interfaces.js'; -import { err, ok, Result, tryCatch } from '../core/result.js'; - -export class ClaudeProcessSpawner implements ProcessSpawner { - private readonly claudeCommand: string; - private readonly baseArgs: readonly string[]; - private readonly killTimeouts = new Map(); - private readonly config: Configuration; - - constructor(config: Configuration, claudeCommand = 'claude') { - this.config = config; - this.claudeCommand = claudeCommand; - this.baseArgs = Object.freeze(['--print', '--dangerously-skip-permissions', '--output-format', 'json']); - } - - // eslint-disable-next-line @typescript-eslint/no-unused-vars - spawn(prompt: string, workingDirectory: string, taskId?: string, _model?: string): Result<{ process: ChildProcess; pid: number }> { - try { - // Make prompt more explicit if it looks like a simple command - let finalPrompt = prompt; - - // If the prompt looks like a simple command without explicit instructions, - // wrap it to make Claude understand it should execute it - if ( - !prompt.toLowerCase().includes('run') && - !prompt.toLowerCase().includes('execute') && - !prompt.toLowerCase().includes('perform') && - !prompt.toLowerCase().includes('bash') && - !prompt.toLowerCase().includes('command') && - prompt.split(' ').length <= 3 - ) { - finalPrompt = `Execute the following bash command: ${prompt}`; - } - - // With --print flag, prompt is passed as argument, not via stdin - const args = [...this.baseArgs, finalPrompt]; - - // Log via proper logger instead of console.error to avoid interfering with output capture - // console.error(`[ProcessSpawner] Executing: ${this.claudeCommand} ${args.map(arg => `"${arg}"`).join(' ')}`); - // console.error(`[ProcessSpawner] Working directory: ${workingDirectory}`); - // console.error(`[ProcessSpawner] Environment keys: ${Object.keys(process.env).length}`); - - // Add Autobeat-specific environment variables for identification - // CRITICAL: Strip all Claude Code nesting indicators to prevent rejection - // Workers are independent Claude Code instances, not nested sessions - // Claude Code checks CLAUDECODE and any CLAUDE_CODE_* prefixed vars - const cleanEnv = Object.fromEntries( - Object.entries(process.env).filter(([key]) => key !== 'CLAUDECODE' && !key.startsWith('CLAUDE_CODE_')), - ); - const env = { - ...cleanEnv, - AUTOBEAT_WORKER: 'true', - ...(taskId && { AUTOBEAT_TASK_ID: taskId }), - }; - - const child = spawn(this.claudeCommand, args, { - cwd: workingDirectory, - env, - stdio: ['ignore', 'pipe', 'pipe'], - }); - - // ARCHITECTURE: Check PID immediately - spawn() is synchronous for PID assignment - if (!child.pid) { - return err(processSpawnFailed('Failed to get process PID')); - } - - return ok({ process: child, pid: child.pid }); - } catch (error) { - return err(processSpawnFailed(String(error))); - } - } - - kill(pid: number): Result { - return tryCatch( - () => { - // Clear any existing timeout for this PID - this.clearKillTimeout(pid); - - process.kill(pid, 'SIGTERM'); - - // Give it time to terminate gracefully before forcing - const timeoutId = setTimeout(() => { - try { - process.kill(pid, 'SIGKILL'); - } catch { - // Process might already be dead - } finally { - // Clean up timeout reference - this.killTimeouts.delete(pid); - } - }, this.config.killGracePeriodMs!); - - // Track timeout for cleanup - this.killTimeouts.set(pid, timeoutId); - }, - (error) => - new AutobeatError(ErrorCode.PROCESS_KILL_FAILED, `Failed to kill process ${pid}: ${error}`, { pid, error }), - ); - } - - /** - * Clear kill timeout for a specific PID - * @param pid - Process ID to clear timeout for - * @remarks Prevents timeout leaks during cleanup - * @internal - */ - private clearKillTimeout(pid: number): void { - const timeoutId = this.killTimeouts.get(pid); - if (timeoutId) { - clearTimeout(timeoutId); - this.killTimeouts.delete(pid); - } - } - - /** - * Clean up all pending kill timeouts and resources - * @remarks Must be called during shutdown to prevent timeout leaks - * @example - * ```typescript - * const spawner = new ClaudeProcessSpawner(); - * try { - * const result = spawner.spawn('test prompt', '/tmp', 'task-123'); - * // use the spawned process - * } finally { - * spawner.dispose(); // Ensure cleanup - * } - * ``` - */ - public dispose(): void { - for (const [pid, timeoutId] of this.killTimeouts) { - clearTimeout(timeoutId); - } - this.killTimeouts.clear(); - } -} diff --git a/tests/setup.ts b/tests/setup.ts index c71a5f43..160a851f 100644 --- a/tests/setup.ts +++ b/tests/setup.ts @@ -9,19 +9,15 @@ * Example: * TEST_ENV=performance npm test -- tests/performance/ * - * @see tests/unit/implementations/process-spawner.test.ts for resource cleanup example */ import { afterAll, beforeAll } from 'vitest'; import { InMemoryEventBus } from '../src/core/events/event-bus'; import type { Database } from '../src/implementations/database'; -import type { ClaudeProcessSpawner } from '../src/implementations/process-spawner'; - // Define types for resources that need cleanup interface TestResources { eventBuses: Set; databases: Set; - processSpawners: Set; intervals: Set; timeouts: Set; } @@ -30,7 +26,6 @@ interface TestResources { const activeResources: TestResources = { eventBuses: new Set(), databases: new Set(), - processSpawners: new Set(), intervals: new Set(), timeouts: new Set(), }; @@ -162,19 +157,6 @@ afterAll(() => { } activeResources.databases.clear(); - // Dispose all process spawners - for (const spawner of activeResources.processSpawners) { - try { - if (spawner.dispose) { - spawner.dispose(); - } - } catch (error) { - cleanupErrors.push(new Error(`Failed to dispose process spawner: ${error}`)); - console.error('Failed to dispose process spawner:', error); - } - } - activeResources.processSpawners.clear(); - // Force garbage collection if available if (global.gc) { try { @@ -197,8 +179,8 @@ afterAll(() => { // Export helper to register resources for cleanup export function registerForCleanup( - resource: InMemoryEventBus | Database | ClaudeProcessSpawner, - type: 'eventBus' | 'database' | 'spawner', + resource: InMemoryEventBus | Database, + type: 'eventBus' | 'database', ): void { switch (type) { case 'eventBus': @@ -207,8 +189,5 @@ export function registerForCleanup( case 'database': activeResources.databases.add(resource as Database); break; - case 'spawner': - activeResources.processSpawners.add(resource as ClaudeProcessSpawner); - break; } } diff --git a/tests/unit/implementations/process-spawner.test.ts b/tests/unit/implementations/process-spawner.test.ts deleted file mode 100644 index ede276f6..00000000 --- a/tests/unit/implementations/process-spawner.test.ts +++ /dev/null @@ -1,538 +0,0 @@ -import type { ChildProcess } from 'child_process'; -import { spawn } from 'child_process'; -import { EventEmitter } from 'events'; -import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; -import { ClaudeProcessSpawner } from '../../../src/implementations/process-spawner'; -import { TEST_COUNTS, TIMEOUTS } from '../../constants'; -import { createTestConfiguration } from '../../fixtures/factories'; -import { createMockChildProcess } from '../../fixtures/test-helpers'; - -// Mock child_process module -let mockSpawnImpl: (...args: unknown[]) => ChildProcess | null = () => null; -vi.mock('child_process', () => ({ - spawn: (...args: unknown[]) => mockSpawnImpl(...args), -})); - -describe('ClaudeProcessSpawner - Behavioral Tests', () => { - let spawner: ClaudeProcessSpawner; - let mockProcess: ChildProcess & EventEmitter; - let spawnSpy: ReturnType; - - beforeEach(() => { - spawner = new ClaudeProcessSpawner(createTestConfiguration()); - - // Create a properly typed mock child process - mockProcess = createMockChildProcess({ - kill: function (this: ChildProcess, signal?: string) { - // Simulate real process.kill behavior - if (signal === 'SIGTERM' || signal === 'SIGKILL') { - setImmediate(() => { - this.emit('exit', 0, signal); - }); - return true; - } - return false; - }, - }) as ChildProcess & EventEmitter; - - // Create a spy function that tracks calls - spawnSpy = vi.fn((...args: unknown[]) => mockProcess); - mockSpawnImpl = spawnSpy; - }); - - afterEach(() => { - vi.clearAllMocks(); - }); - - describe('Process spawning behavior', () => { - it('should successfully spawn a process and return process handle with PID', () => { - const result = spawner.spawn('echo hello', '/tmp'); - - // Test BEHAVIOR, not mock calls - expect(result.ok).toBe(true); - if (result.ok) { - // Verify we get a process handle - expect(result.value.process).toBeDefined(); - expect(result.value.process.stdout).toBeDefined(); - expect(result.value.process.stderr).toBeDefined(); - expect(result.value.process.stdin).toBeDefined(); - expect(result.value.process.kill).toBeDefined(); - expect(typeof result.value.process.kill).toBe('function'); - - // Verify we get a PID for process management - expect(result.value.pid).toBe(12345); - expect(typeof result.value.pid).toBe('number'); - expect(result.value.pid).toBeGreaterThan(0); - - // Verify process properties - expect(result.value.process.pid).toBe(12345); - expect(result.value.process.connected).toBe(false); - expect(result.value.process.killed).toBe(false); - expect(result.value.process.exitCode).toBeNull(); - expect(result.value.process.signalCode).toBeNull(); - expect(Array.isArray(result.value.process.spawnargs)).toBe(true); - expect(typeof result.value.process.spawnfile).toBe('string'); - } - }); - - it('should handle spawn failures gracefully and return error', () => { - spawnSpy.mockImplementation(() => { - throw new Error('Command not found: claude'); - }); - - const result = spawner.spawn('test', '/dir'); - - // Test error handling BEHAVIOR - expect(result.ok).toBe(false); - if (!result.ok) { - expect(result.error).toBeInstanceOf(Error); - expect(result.error.message).toContain('Command not found'); - expect(typeof result.error.message).toBe('string'); - expect(result.error.name).toBe('AutobeatError'); - expect(result.error.stack).toBeDefined(); - } - if (!result.ok) { - expect(result.error.code).toBe('PROCESS_SPAWN_FAILED'); - expect(result.error.message).toContain('Command not found'); - // Ensure error has context for debugging - expect(result.error.context).toBeDefined(); - } - }); - - it('should handle missing PID as a critical error', () => { - mockProcess.pid = undefined; - - const result = spawner.spawn('test', '/dir'); - - // Test that missing PID is treated as failure - expect(result.ok).toBe(false); - if (!result.ok) { - expect(result.error.message).toContain('Failed to get process PID'); - expect(result.error.code).toBe('PROCESS_SPAWN_FAILED'); - } - }); - - it('should emit stdout data from spawned process', () => { - const result = spawner.spawn('echo test', '/tmp'); - - expect(result.ok).toBe(true); - if (result.ok) { - const { process } = result.value; - - // Test that we can receive output - use promise instead of done() - return new Promise((resolve) => { - process.stdout?.on('data', (data) => { - expect(data).toBeDefined(); - resolve(); - }); - - // Simulate process output - setImmediate(() => { - (process.stdout as EventEmitter).emit('data', Buffer.from('test output')); - }); - }); - } - }); - - it('should emit stderr data from spawned process', () => { - const result = spawner.spawn('echo error', '/tmp'); - - expect(result.ok).toBe(true); - if (result.ok) { - const { process } = result.value; - - // Test that we can receive errors - use promise instead of done() - return new Promise((resolve) => { - process.stderr?.on('data', (data) => { - expect(data).toBeDefined(); - resolve(); - }); - - // Simulate process error output - setImmediate(() => { - (process.stderr as EventEmitter).emit('data', Buffer.from('error output')); - }); - }); - } - }); - - it('should wrap simple commands with execution instruction', () => { - // Test BEHAVIOR: simple commands get wrapped for Claude - const simpleCommands = ['ls', 'pwd', 'echo test', 'cat file.txt']; - - simpleCommands.forEach((cmd) => { - spawnSpy.mockClear(); - const result = spawner.spawn(cmd, '/dir'); - - expect(result.ok).toBe(true); - // Verify the command was processed (wrapped) - const lastArg = spawnSpy.mock.calls[0]?.[1]?.slice(-1)[0]; - expect(lastArg).toContain('Execute the following bash command:'); - expect(lastArg).toContain(cmd); - }); - }); - - it('should not wrap complex prompts that already have instructions', () => { - const complexPrompts = [ - 'Run the test suite and fix any errors', - 'Execute npm install', - 'Please perform a code review', - 'Use bash to list files', - 'Run this command: echo hello', - ]; - - complexPrompts.forEach((prompt) => { - spawnSpy.mockClear(); - const result = spawner.spawn(prompt, '/dir'); - - expect(result.ok).toBe(true); - // Verify complex prompts are passed as-is - const lastArg = spawnSpy.mock.calls[0]?.[1]?.slice(-1)[0]; - expect(lastArg).toBe(prompt); - }); - }); - }); - - describe('Process killing behavior', () => { - let processKillSpy: ReturnType; - - beforeEach(() => { - vi.useFakeTimers(); - // Mock process.kill with a spy - processKillSpy = vi.fn((pid: number, signal?: string): boolean => { - if (pid === 99999) { - throw new Error('No such process'); - } - return true; - }); - process.kill = processKillSpy as typeof process.kill; - }); - - afterEach(() => { - vi.useRealTimers(); - }); - - it('should gracefully terminate process with SIGTERM first', () => { - const pid = 54321; - const result = spawner.kill(pid); - - expect(result.ok).toBe(true); - expect(processKillSpy).toHaveBeenCalledWith(pid, 'SIGTERM'); - expect(processKillSpy).toHaveBeenCalledTimes(1); - if (result.ok) { - expect(result.value).toBeUndefined(); - } - expect(processKillSpy).not.toHaveBeenCalledWith(pid, 'SIGKILL'); - expect(typeof pid).toBe('number'); - expect(pid).toBeGreaterThan(0); - }); - - it('should escalate to SIGKILL after grace period', () => { - const pid = 54321; - const result = spawner.kill(pid); - - expect(result.ok).toBe(true); - // Initially only SIGTERM - expect(processKillSpy).toHaveBeenCalledTimes(1); - expect(processKillSpy).toHaveBeenCalledWith(pid, 'SIGTERM'); - expect(processKillSpy).toHaveBeenNthCalledWith(1, pid, 'SIGTERM'); - - // After grace period, SIGKILL - vi.advanceTimersByTime(TIMEOUTS.LONG); - expect(processKillSpy).toHaveBeenCalledTimes(2); - expect(processKillSpy).toHaveBeenCalledWith(pid, 'SIGKILL'); - expect(processKillSpy).toHaveBeenNthCalledWith(2, pid, 'SIGKILL'); - expect(vi.getTimerCount()).toBeGreaterThanOrEqual(0); - }); - - it('should handle kill failures and return appropriate error', () => { - const invalidPid = 99999; - const result = spawner.kill(invalidPid); - - expect(result.ok).toBe(false); - if (!result.ok) { - expect(result.error.code).toBe('PROCESS_KILL_FAILED'); - expect(result.error.message).toContain('99999'); - expect(result.error.message).toContain('No such process'); - expect(result.error.name).toBe('AutobeatError'); - expect(result.error.context).toBeDefined(); - expect(result.error.context?.pid).toBe(invalidPid); - expect(typeof result.error.message).toBe('string'); - expect(result.error.message.length).toBeGreaterThan(0); - } - }); - - it('should clean up kill timeouts on dispose', () => { - vi.useFakeTimers(); - - // Start killing multiple processes - spawner.kill(11111); - spawner.kill(22222); - spawner.kill(33333); - - // Should have 3 pending timeouts - expect(vi.getTimerCount()).toBe(3); - - // Dispose should clean them all - spawner.dispose(); - expect(vi.getTimerCount()).toBe(0); - - vi.useRealTimers(); - }); - }); - - describe('Working directory behavior', () => { - it('should spawn process in specified working directory', () => { - const testDir = '/home/test/project'; - const result = spawner.spawn('ls', testDir); - - expect(result.ok).toBe(true); - - // Verify process would run in correct directory - const spawnCall = spawnSpy.mock.calls[0]; - expect(spawnCall[2].cwd).toBe(testDir); - }); - - it('should handle relative and absolute paths correctly', () => { - const paths = ['.', '..', '/absolute/path', './relative', '../parent']; - - paths.forEach((path) => { - spawnSpy.mockClear(); - const result = spawner.spawn('pwd', path); - - expect(result.ok).toBe(true); - const spawnCall = spawnSpy.mock.calls[0]; - expect(spawnCall[2].cwd).toBe(path); - }); - }); - }); - - describe('Environment variable behavior', () => { - it('should preserve existing environment variables', () => { - const originalEnv = { ...process.env }; - process.env.CUSTOM_VAR = 'test-value'; - process.env.PATH = '/usr/bin:/bin'; - - const result = spawner.spawn('echo $CUSTOM_VAR', '/tmp'); - - expect(result.ok).toBe(true); - const spawnCall = spawnSpy.mock.calls[0]; - expect(spawnCall[2].env.CUSTOM_VAR).toBe('test-value'); - expect(spawnCall[2].env.PATH).toBe('/usr/bin:/bin'); - expect(spawnCall[2].env.AUTOBEAT_WORKER).toBe('true'); - - process.env = originalEnv; - }); - - it('should add AUTOBEAT_TASK_ID when task ID provided', () => { - const taskId = 'task-abc-123'; - const result = spawner.spawn('test', '/tmp', taskId); - - expect(result.ok).toBe(true); - const spawnCall = spawnSpy.mock.calls[0]; - expect(spawnCall[2].env.AUTOBEAT_TASK_ID).toBe(taskId); - expect(spawnCall[2].env.AUTOBEAT_WORKER).toBe('true'); - }); - }); - - describe('Error handling patterns', () => { - it('should wrap all spawn errors in Result type', () => { - const errors = [ - new Error('ENOENT'), - new Error('EACCES'), - new Error('spawn claude ENOENT'), - 'string error', - null, - undefined, - ]; - - errors.forEach((error) => { - spawnSpy.mockImplementation(() => { - throw error; - }); - - const result = spawner.spawn('test', '/dir'); - - // All errors should be wrapped consistently - expect(result.ok).toBe(false); - if (!result.ok) { - expect(result.error).toBeDefined(); - expect(result.error.code).toBe('PROCESS_SPAWN_FAILED'); - expect(result.error.message).toBeDefined(); - } - }); - }); - - it('should handle process crash after successful spawn', () => { - const result = spawner.spawn('test', '/dir'); - - expect(result.ok).toBe(true); - if (result.ok) { - const { process } = result.value; - - // Use promise instead of done() - return new Promise((resolve) => { - process.on('error', (err) => { - expect(err.message).toBe('Process crashed'); - resolve(); - }); - - // Simulate crash after spawn - setImmediate(() => { - process.emit('error', new Error('Process crashed')); - }); - }); - } - }); - }); - - describe('Resource cleanup', () => { - it('should not leak resources when spawning many processes', () => { - const pids: number[] = []; - - // Spawn many processes - for (let i = 0; i < 100; i++) { - mockProcess.pid = TEST_COUNTS.STRESS_TEST * 10 + i; - const result = spawner.spawn(`task ${i}`, '/tmp'); - - expect(result.ok).toBe(true); - if (result.ok) { - pids.push(result.value.pid); - } - } - - // All should have unique PIDs - const uniquePids = new Set(pids); - expect(uniquePids.size).toBe(100); - }); - - it('should handle rapid spawn and kill cycles', () => { - vi.useFakeTimers(); - - for (let i = 0; i < 10; i++) { - mockProcess.pid = 20000 + i; - const result = spawner.spawn('quick task', '/tmp'); - - if (result.ok) { - const killResult = spawner.kill(result.value.pid); - expect(killResult.ok).toBe(true); - } - } - - // Advance time to trigger all SIGKILL timeouts - vi.advanceTimersByTime(TIMEOUTS.LONG); - - // Cleanup should handle all - spawner.dispose(); - expect(vi.getTimerCount()).toBe(0); - - vi.useRealTimers(); - }); - }); - - describe('Environment variable handling', () => { - it('should strip all Claude Code nesting indicators from worker environment', () => { - // Capture originals for restoration - const saved: Record = {}; - const varsToSet: Record = { - CLAUDECODE: 'true', - CLAUDE_CODE_ENTRYPOINT: 'cli', - CLAUDE_CODE_EXPERIMENTAL_AGENT_TEAMS: '1', - }; - - for (const key of Object.keys(varsToSet)) { - saved[key] = process.env[key]; - process.env[key] = varsToSet[key]; - } - - try { - const result = spawner.spawn('test task', '/tmp', 'task-123'); - - expect(result.ok).toBe(true); - if (result.ok) { - const spawnCall = spawnSpy.mock.calls[0]; - const env = (spawnCall[2] as { env: Record }).env; - - // All nesting indicators must be stripped - expect(env.CLAUDECODE).toBeUndefined(); - expect(env.CLAUDE_CODE_ENTRYPOINT).toBeUndefined(); - expect(env.CLAUDE_CODE_EXPERIMENTAL_AGENT_TEAMS).toBeUndefined(); - - // Autobeat vars must be present - expect(env.AUTOBEAT_WORKER).toBe('true'); - expect(env.AUTOBEAT_TASK_ID).toBe('task-123'); - } - } finally { - for (const [key, original] of Object.entries(saved)) { - if (original !== undefined) { - process.env[key] = original; - } else { - delete process.env[key]; - } - } - } - }); - - it('should strip all CLAUDE_CODE_* prefixed env vars', () => { - const saved = process.env.CLAUDE_CODE_FUTURE_FLAG; - process.env.CLAUDE_CODE_FUTURE_FLAG = 'enabled'; - - try { - const result = spawner.spawn('test task', '/tmp'); - - expect(result.ok).toBe(true); - if (result.ok) { - const spawnCall = spawnSpy.mock.calls[0]; - const env = (spawnCall[2] as { env: Record }).env; - - expect(env.CLAUDE_CODE_FUTURE_FLAG).toBeUndefined(); - } - } finally { - if (saved !== undefined) { - process.env.CLAUDE_CODE_FUTURE_FLAG = saved; - } else { - delete process.env.CLAUDE_CODE_FUTURE_FLAG; - } - } - }); - - it('should preserve non-Claude-Code env vars', () => { - const saved: Record = {}; - const safeVars: Record = { - CLAUDE_API_KEY: 'sk-test-key', - ANTHROPIC_API_KEY: 'sk-ant-key', - PATH: '/usr/bin:/bin', - HOME: '/home/test', - }; - - for (const key of Object.keys(safeVars)) { - saved[key] = process.env[key]; - process.env[key] = safeVars[key]; - } - - try { - const result = spawner.spawn('test task', '/tmp'); - - expect(result.ok).toBe(true); - if (result.ok) { - const spawnCall = spawnSpy.mock.calls[0]; - const env = (spawnCall[2] as { env: Record }).env; - - // These must NOT be stripped - expect(env.CLAUDE_API_KEY).toBe('sk-test-key'); - expect(env.ANTHROPIC_API_KEY).toBe('sk-ant-key'); - expect(env.PATH).toBe('/usr/bin:/bin'); - expect(env.HOME).toBe('/home/test'); - } - } finally { - for (const [key, original] of Object.entries(saved)) { - if (original !== undefined) { - process.env[key] = original; - } else { - delete process.env[key]; - } - } - } - }); - }); -}); From 490d24acf296e2fe84dac99aaba4a2552b74387f Mon Sep 17 00:00:00 2001 From: Dean Sharon Date: Fri, 3 Apr 2026 02:11:46 +0300 Subject: [PATCH 07/11] fix: validate baseUrl in CLI agents config set command The CLI path for 'beat agents config set baseUrl ' accepted any string without validation, while the MCP ConfigureAgent tool correctly enforced z.string().url(). Adds new URL() validation before saveAgentConfig to reject malformed URLs with a user-friendly error message, matching the MCP validation boundary. Empty string (clear) is still allowed. --- src/cli/commands/agents.ts | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/cli/commands/agents.ts b/src/cli/commands/agents.ts index 14c84a3e..2dbb88d7 100644 --- a/src/cli/commands/agents.ts +++ b/src/cli/commands/agents.ts @@ -116,6 +116,16 @@ export async function agentsConfigSet( ); } + // Validate baseUrl is a well-formed absolute URL before saving + if (key === 'baseUrl' && value !== '') { + try { + new URL(value); + } catch { + ui.error(`Invalid baseUrl: "${value}" is not a valid URL. Example: https://proxy.example.com/v1`); + process.exit(1); + } + } + const result = saveAgentConfig(agent, key, value); if (!result.ok) { ui.error(result.error); From c33befb66b75d7ee2167cfed9fa2c3d1da3be80b Mon Sep 17 00:00:00 2001 From: Dean Sharon Date: Fri, 3 Apr 2026 02:12:11 +0300 Subject: [PATCH 08/11] perf: load agent config once per spawn instead of three times BaseAgentAdapter.spawn() previously called loadAgentConfig() independently inside resolveAuth(), resolveBaseUrl(), and resolveModel(), causing three separate readFileSync + JSON.parse operations per spawn. Config is now loaded once at the top of spawn() and passed as an AgentConfig parameter to each of the three methods. ClaudeAdapter.resolveBaseUrl() updated to accept and forward the pre-loaded config to super. --- src/implementations/base-agent-adapter.ts | 26 ++++++++++++++--------- src/implementations/claude-adapter.ts | 6 +++--- 2 files changed, 19 insertions(+), 13 deletions(-) diff --git a/src/implementations/base-agent-adapter.ts b/src/implementations/base-agent-adapter.ts index bde6e576..d9fa0952 100644 --- a/src/implementations/base-agent-adapter.ts +++ b/src/implementations/base-agent-adapter.ts @@ -14,7 +14,7 @@ import { ChildProcess, spawn } from 'child_process'; import { AGENT_AUTH, AGENT_BASE_URL_ENV, AgentAdapter, AgentAuthConfig, AgentProvider, isCommandInPath } from '../core/agents.js'; -import { Configuration, loadAgentConfig } from '../core/configuration.js'; +import { AgentConfig, Configuration, loadAgentConfig } from '../core/configuration.js'; import { AutobeatError, agentMisconfigured, ErrorCode, processSpawnFailed } from '../core/errors.js'; import { err, ok, Result, tryCatch } from '../core/result.js'; @@ -60,9 +60,10 @@ export abstract class BaseAgentAdapter implements AgentAdapter { * NOTE: spawn() verifies CLI binary exists before calling resolveAuth(), * so step 3 safely assumes login-based auth if no explicit key is configured. * + * @param agentConfig - Pre-loaded agent config (loaded once in spawn() to avoid redundant reads) * @returns Additional env vars to inject (e.g., stored API key), or error */ - protected resolveAuth(): Result<{ injectedEnv: Record }> { + protected resolveAuth(agentConfig: AgentConfig): Result<{ injectedEnv: Record }> { const auth = this.authConfig; // 1. Check env vars (explicit override, CI use case) @@ -73,7 +74,6 @@ export abstract class BaseAgentAdapter implements AgentAdapter { } // 2. Check config file for stored API key - const agentConfig = loadAgentConfig(this.provider); if (agentConfig.apiKey) { // Inject stored key as the first env var for this agent return ok({ injectedEnv: { [auth.envVars[0]]: agentConfig.apiKey } }); @@ -92,15 +92,16 @@ export abstract class BaseAgentAdapter implements AgentAdapter { * Resolve base URL env var to inject into spawn env. * Resolution order: user env (already in cleanEnv, takes precedence) → config file. * Returns env var name → value to inject. Empty object means nothing to inject. + * + * @param agentConfig - Pre-loaded agent config (loaded once in spawn() to avoid redundant reads) */ - protected resolveBaseUrl(): Record { + protected resolveBaseUrl(agentConfig: AgentConfig): Record { const baseUrlEnvVar = AGENT_BASE_URL_ENV[this.provider]; // If user already has it set in their env, don't inject (cleanEnv will carry it through) if (process.env[baseUrlEnvVar]) { return {}; } // Check config file - const agentConfig = loadAgentConfig(this.provider); if (agentConfig.baseUrl) { return { [baseUrlEnvVar]: agentConfig.baseUrl }; } @@ -110,10 +111,11 @@ export abstract class BaseAgentAdapter implements AgentAdapter { /** * Resolve the model to use for this spawn. * Resolution order: per-task model → agent-config model → undefined (use CLI default). + * + * @param agentConfig - Pre-loaded agent config (loaded once in spawn() to avoid redundant reads) */ - protected resolveModel(taskModel?: string): string | undefined { + protected resolveModel(agentConfig: AgentConfig, taskModel?: string): string | undefined { if (taskModel) return taskModel; - const agentConfig = loadAgentConfig(this.provider); return agentConfig.model; } @@ -134,11 +136,15 @@ export abstract class BaseAgentAdapter implements AgentAdapter { ); } + // Load agent config once — passed to resolveAuth, resolveBaseUrl, resolveModel + // to avoid redundant readFileSync + JSON.parse calls per spawn + const agentConfig = loadAgentConfig(this.provider); + // Pre-spawn auth validation - const authResult = this.resolveAuth(); + const authResult = this.resolveAuth(agentConfig); if (!authResult.ok) return authResult; - const resolvedModel = this.resolveModel(model); + const resolvedModel = this.resolveModel(agentConfig, model); const finalPrompt = this.transformPrompt(prompt); const args = this.buildArgs(finalPrompt, resolvedModel); @@ -148,7 +154,7 @@ export abstract class BaseAgentAdapter implements AgentAdapter { ([key]) => !this.envPrefixesToStrip.some((prefix) => key.startsWith(prefix)) && !exactMatches.includes(key), ), ); - const baseUrlEnv = this.resolveBaseUrl(); + const baseUrlEnv = this.resolveBaseUrl(agentConfig); const env = { ...this.additionalEnv, ...cleanEnv, diff --git a/src/implementations/claude-adapter.ts b/src/implementations/claude-adapter.ts index 1eb0ac8b..6470200e 100644 --- a/src/implementations/claude-adapter.ts +++ b/src/implementations/claude-adapter.ts @@ -6,7 +6,7 @@ */ import { AgentProvider } from '../core/agents.js'; -import { Configuration } from '../core/configuration.js'; +import { AgentConfig, Configuration } from '../core/configuration.js'; import { BaseAgentAdapter } from './base-agent-adapter.js'; export class ClaudeAdapter extends BaseAgentAdapter { @@ -42,8 +42,8 @@ export class ClaudeAdapter extends BaseAgentAdapter { * (after stripping) ensures the value reaches the child process. * The parent env ANTHROPIC_BASE_URL is preserved via cleanEnv if already set. */ - protected resolveBaseUrl(): Record { - const baseUrlEnv = super.resolveBaseUrl(); + protected resolveBaseUrl(agentConfig: AgentConfig): Record { + const baseUrlEnv = super.resolveBaseUrl(agentConfig); // Determine if a baseUrl is active (from config or user env) const hasBaseUrl = Object.keys(baseUrlEnv).length > 0 || Boolean(process.env.ANTHROPIC_BASE_URL); From e98f9d8ba36ecd375636a792c47b7c50500a2b30 Mon Sep 17 00:00:00 2001 From: Dean Sharon Date: Fri, 3 Apr 2026 02:12:18 +0300 Subject: [PATCH 09/11] fix: restore model field through persistence round-trip for loops, schedules, and orchestrations Add model to TaskRequestSchema in loop-repository and schedule-repository so Zod does not strip it on read-back. Add model to PipelineStepsSchema step object and LoopConfigSchema in schedule-repository for the same reason. Add migration v17 to add model column to the orchestrations table, and wire model through OrchestrationRowSchema, OrchestrationRow, toRow(), rowToOrchestration(), and the INSERT/UPDATE SQL statements. --- src/implementations/database.ts | 7 +++++++ src/implementations/loop-repository.ts | 1 + src/implementations/orchestration-repository.ts | 9 +++++++-- src/implementations/schedule-repository.ts | 3 +++ 4 files changed, 18 insertions(+), 2 deletions(-) diff --git a/src/implementations/database.ts b/src/implementations/database.ts index d9e543fc..f21fce61 100644 --- a/src/implementations/database.ts +++ b/src/implementations/database.ts @@ -760,6 +760,13 @@ export class Database implements TransactionRunner { db.exec('ALTER TABLE tasks ADD COLUMN model TEXT'); }, }, + { + version: 17, + description: 'Add model column to orchestrations for per-orchestration model override', + up: (db) => { + db.exec('ALTER TABLE orchestrations ADD COLUMN model TEXT'); + }, + }, ]; } diff --git a/src/implementations/loop-repository.ts b/src/implementations/loop-repository.ts index 517ed8a1..1fd34595 100644 --- a/src/implementations/loop-repository.ts +++ b/src/implementations/loop-repository.ts @@ -96,6 +96,7 @@ const TaskRequestSchema = z.object({ dependsOn: z.array(z.string()).optional(), continueFrom: z.string().optional(), agent: z.enum(AGENT_PROVIDERS_TUPLE).optional(), + model: z.string().optional(), }); /** diff --git a/src/implementations/orchestration-repository.ts b/src/implementations/orchestration-repository.ts index c0d7d7d8..5452877e 100644 --- a/src/implementations/orchestration-repository.ts +++ b/src/implementations/orchestration-repository.ts @@ -29,6 +29,7 @@ const OrchestrationRowSchema = z.object({ state_file_path: z.string().min(1), working_directory: z.string().min(1), agent: z.string().nullable(), + model: z.string().nullable(), max_depth: z.number(), max_workers: z.number(), max_iterations: z.number(), @@ -49,6 +50,7 @@ interface OrchestrationRow { readonly state_file_path: string; readonly working_directory: string; readonly agent: string | null; + readonly model: string | null; readonly max_depth: number; readonly max_workers: number; readonly max_iterations: number; @@ -78,11 +80,11 @@ export class SQLiteOrchestrationRepository implements OrchestrationRepository, S this.saveStmt = this.db.prepare(` INSERT INTO orchestrations ( id, goal, loop_id, state_file_path, working_directory, - agent, max_depth, max_workers, max_iterations, + agent, model, max_depth, max_workers, max_iterations, status, created_at, updated_at, completed_at ) VALUES ( @id, @goal, @loopId, @stateFilePath, @workingDirectory, - @agent, @maxDepth, @maxWorkers, @maxIterations, + @agent, @model, @maxDepth, @maxWorkers, @maxIterations, @status, @createdAt, @updatedAt, @completedAt ) `); @@ -94,6 +96,7 @@ export class SQLiteOrchestrationRepository implements OrchestrationRepository, S state_file_path = @stateFilePath, working_directory = @workingDirectory, agent = @agent, + model = @model, max_depth = @maxDepth, max_workers = @maxWorkers, max_iterations = @maxIterations, @@ -284,6 +287,7 @@ export class SQLiteOrchestrationRepository implements OrchestrationRepository, S stateFilePath: orchestration.stateFilePath, workingDirectory: orchestration.workingDirectory, agent: orchestration.agent ?? null, + model: orchestration.model ?? null, maxDepth: orchestration.maxDepth, maxWorkers: orchestration.maxWorkers, maxIterations: orchestration.maxIterations, @@ -303,6 +307,7 @@ export class SQLiteOrchestrationRepository implements OrchestrationRepository, S stateFilePath: data.state_file_path, workingDirectory: data.working_directory, agent: data.agent ? this.toAgentProvider(data.agent) : undefined, + model: data.model ?? undefined, maxDepth: data.max_depth, maxWorkers: data.max_workers, maxIterations: data.max_iterations, diff --git a/src/implementations/schedule-repository.ts b/src/implementations/schedule-repository.ts index 6639305e..8c0e185e 100644 --- a/src/implementations/schedule-repository.ts +++ b/src/implementations/schedule-repository.ts @@ -87,6 +87,7 @@ const TaskRequestSchema = z.object({ dependsOn: z.array(z.string()).optional(), continueFrom: z.string().optional(), agent: z.enum(AGENT_PROVIDERS_TUPLE).optional(), + model: z.string().optional(), }); /** @@ -106,6 +107,7 @@ const PipelineStepsSchema = z priority: z.enum(['P0', 'P1', 'P2']).optional(), workingDirectory: z.string().optional(), agent: z.enum(AGENT_PROVIDERS_TUPLE).optional(), + model: z.string().optional(), }), ) .min(2) @@ -131,6 +133,7 @@ const LoopConfigSchema = z.object({ pipelineSteps: z.array(z.string()).optional(), priority: z.nativeEnum(Priority).optional(), agent: z.enum(AGENT_PROVIDERS_TUPLE).optional(), + model: z.string().optional(), gitBranch: z.string().optional(), }) satisfies z.ZodType; From c243c252c96507d928eccc6a8d5fa29715e72e59 Mon Sep 17 00:00:00 2001 From: Dean Sharon Date: Fri, 3 Apr 2026 02:13:54 +0300 Subject: [PATCH 10/11] fix: resolve batch-c issues in ConfigureAgent and JSON Schema model constraints MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - configure-agent-partial-write: replace sequential early-return saves with collect-all pattern; errors now report which fields were already saved via alreadySaved[] to make partial-write visible instead of silent - json-schema-model-validation-inconsistent: add minLength:1/maxLength:200 to model fields in ScheduleTask, CreatePipeline (step+top-level), SchedulePipeline (step+top-level), CreateLoop, ScheduleLoop, CreateOrchestrator, and ConfigureAgent inputSchema blocks — matching the existing Zod schemas and the DelegateTask JSON Schema pattern - record-string-unknown-type-safety: replace Record with typed CheckPayload and SetPayload interfaces in handleConfigureAgent - duplicated-claude-baseurl-warning: extract getClaudeBaseUrlWarning() private helper; remove three copies of identical condition+string --- src/adapters/mcp-adapter.ts | 136 ++++++++++++++++++++++-------------- 1 file changed, 85 insertions(+), 51 deletions(-) diff --git a/src/adapters/mcp-adapter.ts b/src/adapters/mcp-adapter.ts index de08ad5f..453a2ff1 100644 --- a/src/adapters/mcp-adapter.ts +++ b/src/adapters/mcp-adapter.ts @@ -756,6 +756,8 @@ export class MCPAdapter { model: { type: 'string', description: 'Model override for this task (overrides agent-config default)', + minLength: 1, + maxLength: 200, }, }, required: ['prompt', 'scheduleType'], @@ -888,6 +890,8 @@ export class MCPAdapter { model: { type: 'string', description: 'Model override for this step', + minLength: 1, + maxLength: 200, }, }, required: ['prompt'], @@ -912,6 +916,8 @@ export class MCPAdapter { model: { type: 'string', description: 'Default model for all steps (individual steps can override)', + minLength: 1, + maxLength: 200, }, }, required: ['steps'], @@ -954,6 +960,8 @@ export class MCPAdapter { model: { type: 'string', description: 'Model override for this step', + minLength: 1, + maxLength: 200, }, }, required: ['prompt'], @@ -1011,6 +1019,8 @@ export class MCPAdapter { model: { type: 'string', description: 'Default model for all steps (individual steps can override)', + minLength: 1, + maxLength: 200, }, }, required: ['steps', 'scheduleType'], @@ -1105,6 +1115,8 @@ export class MCPAdapter { model: { type: 'string', description: 'Model override for each iteration task (overrides agent-config default)', + minLength: 1, + maxLength: 200, }, gitBranch: { type: 'string', @@ -1254,7 +1266,7 @@ export class MCPAdapter { gitBranch: { type: 'string', description: 'Git branch name for loop iteration work' }, priority: { type: 'string', enum: ['P0', 'P1', 'P2'] }, agent: { type: 'string', enum: [...AGENT_PROVIDERS] }, - model: { type: 'string', description: 'Model override for each iteration task (overrides agent-config default)' }, + model: { type: 'string', description: 'Model override for each iteration task (overrides agent-config default)', minLength: 1, maxLength: 200 }, scheduleType: { type: 'string', enum: ['cron', 'one_time'] }, cronExpression: { type: 'string', description: 'Cron expression (5-field)' }, scheduledAt: { type: 'string', description: 'ISO 8601 datetime for one-time loops' }, @@ -1300,6 +1312,8 @@ export class MCPAdapter { model: { type: 'string', description: 'Model override for the orchestrator (overrides agent-config default)', + minLength: 1, + maxLength: 200, }, maxDepth: { type: 'number', @@ -1408,6 +1422,8 @@ export class MCPAdapter { model: { type: 'string', description: 'Default model for this agent (set action, overridden by per-task model)', + minLength: 1, + maxLength: 200, }, }, required: ['agent'], @@ -2685,11 +2701,7 @@ export class MCPAdapter { const agentConfig = loadAgentConfig(provider); const authStatus = checkAgentAuth(provider, agentConfig.apiKey); - // Claude-specific: warn when baseUrl is configured without an API key - const claudeBaseUrlWarning = - provider === 'claude' && agentConfig.baseUrl && !agentConfig.apiKey - ? 'Warning: Claude requires an API key when using a custom baseUrl. The base URL will be ignored with login-based auth.' - : undefined; + const claudeBaseUrlWarning = this.getClaudeBaseUrlWarning(provider, agentConfig.baseUrl, agentConfig.apiKey); return { provider, @@ -2925,6 +2937,18 @@ export class MCPAdapter { }); } + /** + * Returns a warning string when Claude has a custom baseUrl configured without an API key. + * Login-based auth does not work with a custom baseUrl, so the setting will be silently + * ignored. Returns undefined for all other providers or when the condition is not met. + */ + private getClaudeBaseUrlWarning(provider: string, baseUrl: string | undefined, apiKey: string | undefined): string | undefined { + if (provider === 'claude' && baseUrl && !apiKey) { + return 'Warning: Claude requires an API key when using a custom baseUrl. The base URL will be ignored with login-based auth.'; + } + return undefined; + } + /** * Handle ConfigureAgent tool call * Actions: check auth status, set API key, reset stored key @@ -2957,18 +2981,25 @@ export class MCPAdapter { const agentConfig = loadAgentConfig(agent); const status = checkAgentAuth(agent, agentConfig.apiKey); - // Claude-specific: warn when baseUrl is configured without an API key - const checkPayload: Record = { + interface CheckPayload { + success: boolean; + ready: boolean; + method: string; + hint?: string; + storedKey?: string; + baseUrl?: string; + model?: string; + warning?: string; + } + const checkWarning = this.getClaudeBaseUrlWarning(agent, agentConfig.baseUrl, agentConfig.apiKey); + const checkPayload: CheckPayload = { success: true, ...status, ...(agentConfig.apiKey && { storedKey: maskApiKey(agentConfig.apiKey) }), ...(agentConfig.baseUrl && { baseUrl: agentConfig.baseUrl }), ...(agentConfig.model && { model: agentConfig.model }), + ...(checkWarning && { warning: checkWarning }), }; - if (agent === 'claude' && agentConfig.baseUrl && !agentConfig.apiKey) { - checkPayload.warning = - 'Warning: Claude requires an API key when using a custom baseUrl. The base URL will be ignored with login-based auth.'; - } return { content: [ @@ -2997,62 +3028,65 @@ export class MCPAdapter { }; } - const messages: string[] = []; + // Perform all writes up-front to avoid partial-write: collect each + // result before returning so that a late failure reports which fields + // were already saved and which failed. + type WriteAttempt = { key: 'apiKey' | 'baseUrl' | 'model'; label: string; ok: boolean; error?: string }; + const attempts: WriteAttempt[] = []; if (apiKey) { const result = saveAgentConfig(agent, 'apiKey', apiKey); - if (!result.ok) { - return { - content: [{ type: 'text', text: JSON.stringify({ success: false, error: result.error }, null, 2) }], - isError: true, - }; - } - messages.push(`API key stored (${maskApiKey(apiKey)})`); + attempts.push({ key: 'apiKey', label: `API key stored (${maskApiKey(apiKey)})`, ok: result.ok, error: result.ok ? undefined : result.error }); } if (baseUrl !== undefined) { const result = saveAgentConfig(agent, 'baseUrl', baseUrl); - if (!result.ok) { - return { - content: [{ type: 'text', text: JSON.stringify({ success: false, error: result.error }, null, 2) }], - isError: true, - }; - } - messages.push(`baseUrl set to ${baseUrl}`); + attempts.push({ key: 'baseUrl', label: `baseUrl set to ${baseUrl}`, ok: result.ok, error: result.ok ? undefined : result.error }); } if (model !== undefined) { const result = saveAgentConfig(agent, 'model', model); - if (!result.ok) { - return { - content: [{ type: 'text', text: JSON.stringify({ success: false, error: result.error }, null, 2) }], - isError: true, - }; - } - messages.push(`model set to ${model}`); + attempts.push({ key: 'model', label: `model set to ${model}`, ok: result.ok, error: result.ok ? undefined : result.error }); } - // Claude-specific: warn when baseUrl is configured without an API key - // (login-based auth does not work with a custom baseUrl) - const warningMessages: string[] = []; - if (agent === 'claude') { - const currentConfig = loadAgentConfig(agent); - const effectiveBaseUrl = baseUrl !== undefined ? baseUrl : currentConfig.baseUrl; - const effectiveApiKey = apiKey ?? currentConfig.apiKey; - if (effectiveBaseUrl && !effectiveApiKey) { - warningMessages.push( - 'Warning: Claude requires an API key when using a custom baseUrl. The base URL will be ignored with login-based auth.', - ); - } + const failed = attempts.filter((a) => !a.ok); + if (failed.length > 0) { + const saved = attempts.filter((a) => a.ok).map((a) => a.key); + return { + content: [ + { + type: 'text', + text: JSON.stringify( + { + success: false, + error: failed.map((a) => a.error).join('; '), + ...(saved.length > 0 && { alreadySaved: saved }), + }, + null, + 2, + ), + }, + ], + isError: true, + }; } - const responsePayload: Record = { + // Compute Claude baseUrl warning using effective values after writes + const currentConfig = loadAgentConfig(agent); + const effectiveBaseUrl = baseUrl !== undefined ? baseUrl : currentConfig.baseUrl; + const effectiveApiKey = apiKey ?? currentConfig.apiKey; + const setWarning = this.getClaudeBaseUrlWarning(agent, effectiveBaseUrl, effectiveApiKey); + + interface SetPayload { + success: boolean; + message: string; + warning?: string; + } + const responsePayload: SetPayload = { success: true, - message: `${agent}: ${messages.join(', ')}`, + message: `${agent}: ${attempts.map((a) => a.label).join(', ')}`, + ...(setWarning && { warning: setWarning }), }; - if (warningMessages.length > 0) { - responsePayload.warning = warningMessages.join(' '); - } return { content: [ From 2b91dc90dd903276afe3fa2471af25c733e90c3c Mon Sep 17 00:00:00 2001 From: Dean Sharon Date: Fri, 3 Apr 2026 02:22:43 +0300 Subject: [PATCH 11/11] test: add model threading coverage for retry, resume, and schedule paths (batch-e) - Add retry() model threading tests: verify model survives retry for both set and undefined values, preventing silent regression if the field is dropped - Add resume() model threading tests: same coverage for resume path - Add schedule manager model threading tests: createSchedule threads model to taskTemplate; createPipeline threads shared and per-step model; createScheduledLoop threads model through both taskTemplate and loopConfig - Fix stale docstring in ClaudeAdapter: remove reference to prompt transformation that was dropped during the v0.5.0 BaseAgentAdapter refactor --- src/implementations/claude-adapter.ts | 3 +- tests/unit/services/schedule-manager.test.ts | 75 ++++++++++++++++++++ tests/unit/services/task-manager.test.ts | 60 ++++++++++++++++ 3 files changed, 137 insertions(+), 1 deletion(-) diff --git a/src/implementations/claude-adapter.ts b/src/implementations/claude-adapter.ts index 6470200e..afedcdc6 100644 --- a/src/implementations/claude-adapter.ts +++ b/src/implementations/claude-adapter.ts @@ -2,7 +2,8 @@ * Claude Code agent adapter implementation * * ARCHITECTURE: Claude-specific logic on top of BaseAgentAdapter. - * Includes prompt transformation for short commands and nesting prevention. + * Handles nesting prevention (strips CLAUDE_CODE_* env vars) and + * injects CLAUDE_CODE_DISABLE_EXPERIMENTAL_BETAS when a baseUrl is active. */ import { AgentProvider } from '../core/agents.js'; diff --git a/tests/unit/services/schedule-manager.test.ts b/tests/unit/services/schedule-manager.test.ts index 4466dd5f..8f2b9fb9 100644 --- a/tests/unit/services/schedule-manager.test.ts +++ b/tests/unit/services/schedule-manager.test.ts @@ -258,6 +258,26 @@ describe('ScheduleManagerService - Unit Tests', () => { if (!result.ok) return; expect(result.value.maxRuns).toBe(5); }); + + it('should thread model into taskTemplate when provided', async () => { + const request = cronRequest({ model: 'claude-opus-4-5' }); + + const result = await service.createSchedule(request); + + expect(result.ok).toBe(true); + if (!result.ok) return; + expect(result.value.taskTemplate.model).toBe('claude-opus-4-5'); + }); + + it('should leave model undefined in taskTemplate when not provided', async () => { + const request = cronRequest(); + + const result = await service.createSchedule(request); + + expect(result.ok).toBe(true); + if (!result.ok) return; + expect(result.value.taskTemplate.model).toBeUndefined(); + }); }); describe('listSchedules()', () => { @@ -770,6 +790,32 @@ describe('ScheduleManagerService - Unit Tests', () => { expect(result.ok).toBe(true); expect(eventBus.getEventCount('ScheduleCreated')).toBe(3); }); + + it('should thread shared model to all steps as default', async () => { + const result = await service.createPipeline(pipelineRequest({ model: 'claude-opus-4-5' })); + + expect(result.ok).toBe(true); + if (!result.ok) return; + + const events = eventBus.getEmittedEvents('ScheduleCreated'); + for (const event of events) { + expect(event.schedule.taskTemplate.model).toBe('claude-opus-4-5'); + } + }); + + it('should allow per-step model override', async () => { + const result = await service.createPipeline({ + steps: [{ prompt: 'Step one', model: 'claude-haiku-3' }, { prompt: 'Step two' }], + model: 'claude-opus-4-5', + }); + + expect(result.ok).toBe(true); + if (!result.ok) return; + + const events = eventBus.getEmittedEvents('ScheduleCreated'); + expect(events[0].schedule.taskTemplate.model).toBe('claude-haiku-3'); + expect(events[1].schedule.taskTemplate.model).toBe('claude-opus-4-5'); + }); }); describe('createScheduledPipeline()', () => { @@ -1112,5 +1158,34 @@ describe('ScheduleManagerService - Unit Tests', () => { expect(result.value.nextRunAt).toBeDefined(); expect(typeof result.value.nextRunAt).toBe('number'); }); + + it('should thread model into taskTemplate and loopConfig when provided', async () => { + const request = scheduledLoopRequest({ + loopConfig: { + prompt: 'Fix failing tests', + strategy: LoopStrategy.RETRY, + exitCondition: 'npm test', + model: 'claude-opus-4-5', + }, + }); + + const result = await service.createScheduledLoop(request); + + expect(result.ok).toBe(true); + if (!result.ok) return; + expect(result.value.taskTemplate.model).toBe('claude-opus-4-5'); + expect(result.value.loopConfig!.model).toBe('claude-opus-4-5'); + }); + + it('should leave model undefined when not provided in loopConfig', async () => { + const request = scheduledLoopRequest(); + + const result = await service.createScheduledLoop(request); + + expect(result.ok).toBe(true); + if (!result.ok) return; + expect(result.value.taskTemplate.model).toBeUndefined(); + expect(result.value.loopConfig!.model).toBeUndefined(); + }); }); }); diff --git a/tests/unit/services/task-manager.test.ts b/tests/unit/services/task-manager.test.ts index 5130d1ee..38022bc9 100644 --- a/tests/unit/services/task-manager.test.ts +++ b/tests/unit/services/task-manager.test.ts @@ -848,6 +848,36 @@ describe('TaskManagerService', () => { expect(result.value.retryOf).toBe(TaskId('retry-3')); }); }); + + describe('model threading', () => { + it('should preserve model from original task on retry', async () => { + const failedTask = buildFailedTask({ + id: TaskId('model-retry-1'), + model: 'claude-opus-4-5', + }); + (taskRepo.findById as ReturnType).mockResolvedValue(ok(failedTask)); + + const result = await service.retry(TaskId('model-retry-1')); + + expect(result.ok).toBe(true); + if (!result.ok) return; + expect(result.value.model).toBe('claude-opus-4-5'); + }); + + it('should preserve undefined model from original task on retry', async () => { + const failedTask = buildFailedTask({ + id: TaskId('model-retry-2'), + model: undefined, + }); + (taskRepo.findById as ReturnType).mockResolvedValue(ok(failedTask)); + + const result = await service.retry(TaskId('model-retry-2')); + + expect(result.ok).toBe(true); + if (!result.ok) return; + expect(result.value.model).toBeUndefined(); + }); + }); }); // --------------------------------------------------------------------------- @@ -1064,5 +1094,35 @@ describe('TaskManagerService', () => { expect(result.ok).toBe(false); }); + + describe('model threading', () => { + it('should preserve model from original task on resume', async () => { + const failed = buildFailedTask({ + id: TaskId('model-resume-1'), + model: 'claude-opus-4-5', + }); + (taskRepo.findById as ReturnType).mockResolvedValue(ok(failed)); + + const result = await svcWithCheckpoint.resume({ taskId: TaskId('model-resume-1') }); + + expect(result.ok).toBe(true); + if (!result.ok) return; + expect(result.value.model).toBe('claude-opus-4-5'); + }); + + it('should preserve undefined model from original task on resume', async () => { + const failed = buildFailedTask({ + id: TaskId('model-resume-2'), + model: undefined, + }); + (taskRepo.findById as ReturnType).mockResolvedValue(ok(failed)); + + const result = await svcWithCheckpoint.resume({ taskId: TaskId('model-resume-2') }); + + expect(result.ok).toBe(true); + if (!result.ok) return; + expect(result.value.model).toBeUndefined(); + }); + }); }); });