dean0x · dean0x · Mar 30, 2026 · Mar 30, 2026 · Mar 30, 2026 · Mar 30, 2026
@@ -6,7 +6,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 
 ## [Unreleased]
 
-Nothing yet.
+### Added
+- **Agent Eval Mode**: Loops can now use an AI agent to evaluate iteration results instead of a shell command. Pass `--eval-mode agent` (CLI) or `evalMode: 'agent'` (MCP) to have the agent review output and decide pass/fail or score. Use `--eval-prompt` / `evalPrompt` to supply a custom evaluation prompt.
 
 ---
 

@@ -164,6 +164,8 @@ Quick reference for common operations:
 | Loop repository | `src/implementations/loop-repository.ts` |
 | Loop handler | `src/services/handlers/loop-handler.ts` |
 | Loop manager | `src/services/loop-manager.ts` |
+| Agent exit condition evaluator | `src/services/agent-exit-condition-evaluator.ts` |
+| Composite exit condition evaluator | `src/services/composite-exit-condition-evaluator.ts` |
 | Migrate command | `src/cli/commands/migrate.ts` |
 
 ## Documentation Structure

@@ -121,6 +121,14 @@ beat loop "Optimize the bundle size" \
   --max-iterations 10
 ```
 
+**Agent eval** -let an AI judge the result instead of a shell command:
+
+```bash
+beat loop "Fix the failing tests" --eval-mode agent --strategy retry
+beat loop "Optimize the algorithm" --eval-mode agent --strategy optimize --maximize \
+  --eval-prompt "Score the solution on correctness and efficiency (0-100)"
+```
+
 **Pipeline loops** -repeat a multi-step workflow:
 
 ```bash

@@ -2,7 +2,7 @@
 
 This document lists all features that are **currently implemented and working** in Autobeat.
 
-Last Updated: March 2026 (v1.0.0)
+Last Updated: March 2026
 
 ## ✅ Autonomous Orchestration (v1.0.0)
 
@@ -338,6 +338,7 @@ Last Updated: March 2026 (v1.0.0)
 ### Loop Strategies
 - **Retry**: Run a task until an exit condition passes — shell command returning exit code 0 ends the loop
 - **Optimize**: Run a task, score output with eval script, keep improvements — seek the best score across iterations (minimize or maximize direction)
+- **Agent Eval Mode**: Either strategy can delegate exit condition evaluation to an AI agent instead of a shell command. Pass `evalMode: 'agent'` (MCP) or `--eval-mode agent` (CLI). The agent reads iteration output and returns pass/fail (retry) or a numeric score (optimize). Use `evalPrompt` / `--eval-prompt` to customize the evaluation prompt.
 
 ### Single Task Loops
 - **Task Prompt**: Each iteration runs the same prompt (or enriched with checkpoint context if `freshContext` is false)
@@ -354,10 +355,14 @@ Last Updated: March 2026 (v1.0.0)
 - **Cooldown**: Delay between iterations in milliseconds (default: 0)
 - **Eval Timeout**: Timeout for exit condition evaluation (default: 60s, minimum: 1s)
 - **Fresh Context**: Each iteration gets a fresh agent context (default: true) or continues from previous checkpoint
+- **Eval Mode** (`evalMode`): `'shell'` (default) evaluates iteration results via a shell command exit code; `'agent'` delegates evaluation to an AI agent
+- **Eval Prompt** (`evalPrompt`): Optional custom instructions for the agent evaluator (agent mode only). When omitted, the agent uses a default review prompt.
 
 ### CLI Commands (v0.7.0+)
 - `beat loop <prompt> --until <cmd>`: Create a retry loop (run until shell command exits 0)
 - `beat loop <prompt> --eval <cmd> --minimize|--maximize`: Create an optimize loop (score-based)
+- `beat loop <prompt> --eval-mode agent --strategy retry`: Create a retry loop using agent evaluation
+- `beat loop <prompt> --eval-mode agent --strategy optimize --maximize [--eval-prompt "..."]`: Create an optimize loop with agent scoring
 - `beat loop --pipeline --step "..." --step "..." --until <cmd>`: Create a pipeline loop
 - `beat loop list [--status <status>]`: List loops with optional status filter
 - `beat loop status <loop-id> [--history]`: Get loop details and iteration history
@@ -373,6 +378,7 @@ Last Updated: March 2026 (v1.0.0)
 
 ### Database Schema
 - **Migration 10**: `loops` table for loop definitions and state, `loop_iterations` table for per-iteration execution records
+- **Migration 15**: `eval_mode` and `eval_prompt` columns on `loops` table, `eval_feedback` column on `loop_iterations` table
 
 ## ❌ NOT Implemented
 - **Distributed Processing**: Single-server only

@@ -17,6 +17,7 @@ import {
 } from '../core/agents.js';
 import { type Configuration, loadAgentConfig, resetAgentConfig, saveAgentConfig } from '../core/configuration.js';
 import {
+  EvalMode,
   LoopCreateRequest,
   LoopId,
   LoopStatus,
@@ -234,9 +235,31 @@ const ConfigureAgentSchema = z.object({
 const CreateLoopSchema = z.object({
   prompt: z.string().min(1).max(4000).optional().describe('Task prompt for each iteration'),
   strategy: z.enum(['retry', 'optimize']).describe('Loop strategy'),
-  exitCondition: z.string().min(1).max(4000).describe('Shell command to evaluate after each iteration'),
+  exitCondition: z
+    .string()
+    .min(1)
+    .max(4000)
+    .optional()
+    .describe('Shell command to evaluate after each iteration (required for shell eval mode)'),
+  evalMode: z
+    .nativeEnum(EvalMode)
+    .optional()
+    .default(EvalMode.SHELL)
+    .describe('Evaluation mode: shell command or agent review'),
+  evalPrompt: z
+    .string()
+    .min(1)
+    .max(8000)
+    .optional()
+    .describe('Custom prompt for agent evaluator (agent eval mode only)'),
   evalDirection: z.enum(['minimize', 'maximize']).optional().describe('Score direction for optimize strategy'),
-  evalTimeout: z.number().min(1000).optional().default(60000).describe('Eval script timeout in ms'),
+  evalTimeout: z
+    .number()
+    .min(1000)
+    .max(600000)
+    .optional()
+    .default(60000)
+    .describe('Eval timeout in ms (max: shell=300s, agent=600s)'),
   workingDirectory: z.string().optional().describe('Working directory for task and eval'),
   maxIterations: z.number().min(0).optional().default(10).describe('Max iterations (0 = unlimited)'),
   maxConsecutiveFailures: z.number().min(0).optional().default(3).describe('Max consecutive failures before stopping'),
@@ -287,9 +310,25 @@ const ScheduleLoopSchema = z.object({
   // Loop config fields
   prompt: z.string().min(1).max(4000).optional().describe('Task prompt for each iteration'),
   strategy: z.enum(['retry', 'optimize']).describe('Loop strategy'),
-  exitCondition: z.string().min(1).max(4000).describe('Shell command to evaluate after each iteration'),
+  exitCondition: z
+    .string()
+    .min(1)
+    .max(4000)
+    .optional()
+    .describe('Shell command to evaluate after each iteration (required for shell eval mode)'),
+  evalMode: z
+    .nativeEnum(EvalMode)
+    .optional()
+    .default(EvalMode.SHELL)
+    .describe('Evaluation mode: shell command or agent review'),
+  evalPrompt: z
+    .string()
+    .min(1)
+    .max(8000)
+    .optional()
+    .describe('Custom prompt for agent evaluator (agent eval mode only)'),
   evalDirection: z.enum(['minimize', 'maximize']).optional().describe('Score direction for optimize strategy'),
-  evalTimeout: z.number().min(1000).optional().describe('Eval script timeout in ms'),
+  evalTimeout: z.number().min(1000).max(600000).optional().describe('Eval timeout in ms (max: shell=300s, agent=600s)'),
   workingDirectory: z.string().optional().describe('Working directory for task and eval'),
   maxIterations: z.number().min(0).optional().describe('Max iterations (0 = unlimited)'),
   maxConsecutiveFailures: z.number().min(0).optional().describe('Max consecutive failures'),
@@ -952,15 +991,27 @@ export class MCPAdapter {
                     description:
                       'Shell command to evaluate after each iteration (exit code 0 = pass for retry, stdout = score for optimize)',
                   },
+                  evalMode: {
+                    type: 'string',
+                    enum: ['shell', 'agent'],
+                    description: 'Evaluation mode: shell command or agent review (default: shell)',
+                  },
+                  evalPrompt: {
+                    type: 'string',
+                    description: 'Custom prompt for agent evaluator (agent eval mode only)',
+                    minLength: 1,
+                    maxLength: 8000,
+                  },
                   evalDirection: {
                     type: 'string',
                     enum: ['minimize', 'maximize'],
                     description: 'Score direction for optimize strategy',
                   },
                   evalTimeout: {
                     type: 'number',
-                    description: 'Eval script timeout in ms (default: 60000)',
+                    description: 'Eval script timeout in ms (default: 60000, max: 600000)',
                     minimum: 1000,
+                    maximum: 600000,
                   },
                   workingDirectory: {
                     type: 'string',
@@ -1007,7 +1058,7 @@ export class MCPAdapter {
                     description: 'Git branch name for loop iteration work (v0.8.0)',
                   },
                 },
-                required: ['strategy', 'exitCondition'],
+                required: ['strategy'],
               },
             },
             {
@@ -1120,8 +1171,22 @@ export class MCPAdapter {
                   prompt: { type: 'string', description: 'Task prompt for each iteration' },
                   strategy: { type: 'string', enum: ['retry', 'optimize'], description: 'Loop strategy' },
                   exitCondition: { type: 'string', description: 'Shell command to evaluate after each iteration' },
+                  evalMode: {
+                    type: 'string',
+                    enum: ['shell', 'agent'],
+                    description: 'Evaluation mode: shell command or agent review (default: shell)',
+                  },
+                  evalPrompt: {
+                    type: 'string',
+                    description: 'Custom prompt for agent evaluator (agent eval mode only)',
+                  },
                   evalDirection: { type: 'string', enum: ['minimize', 'maximize'] },
-                  evalTimeout: { type: 'number', description: 'Eval script timeout in ms', minimum: 1000 },
+                  evalTimeout: {
+                    type: 'number',
+                    description: 'Eval script timeout in ms',
+                    minimum: 1000,
+                    maximum: 600000,
+                  },
                   workingDirectory: { type: 'string' },
                   maxIterations: { type: 'number', description: 'Max iterations (0 = unlimited)', minimum: 0 },
                   maxConsecutiveFailures: { type: 'number', minimum: 0 },
@@ -1144,7 +1209,7 @@ export class MCPAdapter {
                   maxRuns: { type: 'number', description: 'Maximum number of loop runs for cron schedules' },
                   expiresAt: { type: 'string', description: 'ISO 8601 expiration datetime' },
                 },
-                required: ['strategy', 'exitCondition', 'scheduleType'],
+                required: ['strategy', 'scheduleType'],
               },
             },
             // Agent tools (v0.5.0 Multi-Agent Support)
@@ -2145,6 +2210,8 @@ export class MCPAdapter {
       prompt: data.prompt,
       strategy: data.strategy === 'retry' ? LoopStrategy.RETRY : LoopStrategy.OPTIMIZE,
       exitCondition: data.exitCondition,
+      evalMode: data.evalMode,
+      evalPrompt: data.evalPrompt,
       evalDirection: toOptimizeDirection(data.evalDirection),
       evalTimeout: data.evalTimeout,
       workingDirectory: data.workingDirectory,
@@ -2226,6 +2293,8 @@ export class MCPAdapter {
             gitBranch: loop.gitBranch ?? null,
             gitBaseBranch: loop.gitBaseBranch ?? null,
             gitStartCommitSha: loop.gitStartCommitSha ?? null,
+            evalMode: loop.evalMode,
+            evalPrompt: loop.evalPrompt ?? null,
             scheduleId: loop.scheduleId ?? null,
             createdAt: new Date(loop.createdAt).toISOString(),
             updatedAt: new Date(loop.updatedAt).toISOString(),
@@ -2250,6 +2319,7 @@ export class MCPAdapter {
             score: iter.score ?? null,
             exitCode: iter.exitCode ?? null,
             errorMessage: iter.errorMessage ?? null,
+            evalFeedback: iter.evalFeedback ?? null,
             gitBranch: iter.gitBranch ?? null,
             gitCommitSha: iter.gitCommitSha ?? null,
             preIterationCommitSha: iter.preIterationCommitSha ?? null,
@@ -2471,6 +2541,8 @@ export class MCPAdapter {
       prompt: data.prompt,
       strategy: data.strategy === 'retry' ? LoopStrategy.RETRY : LoopStrategy.OPTIMIZE,
       exitCondition: data.exitCondition,
+      evalMode: data.evalMode,
+      evalPrompt: data.evalPrompt,
       evalDirection: toOptimizeDirection(data.evalDirection),
       evalTimeout: data.evalTimeout,
       workingDirectory: data.workingDirectory,