snarktank · Christoffer91 · Mar 14, 2026 · Mar 14, 2026 · Mar 14, 2026 · Mar 15, 2026
diff --git a/src/installer/agent-cron.ts b/src/installer/agent-cron.ts
@@ -6,6 +6,13 @@ import { getDb } from "../db.js";
 const DEFAULT_EVERY_MS = 300_000; // 5 minutes
 const DEFAULT_AGENT_TIMEOUT_SECONDS = 30 * 60; // 30 minutes
 
+function prefixThinkingDirective(thinking: string | undefined, body: string): string {
+  if (!thinking) return body;
+  return `/think ${thinking}
+
+${body}`;
+}
+
 function buildAgentPrompt(workflowId: string, agentId: string): string {
   const fullAgentId = `${workflowId}_${agentId}`;
   const cli = resolveAntfarmCli();
@@ -50,11 +57,11 @@ RULES:
 The workflow cannot advance until you report. Your session ending without reporting = broken pipeline.`;
 }
 
-export function buildWorkPrompt(workflowId: string, agentId: string): string {
+export function buildWorkPrompt(workflowId: string, agentId: string, thinking?: string): string {
   const fullAgentId = `${workflowId}_${agentId}`;
   const cli = resolveAntfarmCli();
 
-  return `You are an Antfarm workflow agent. Execute the pending work below.
+  const body = `You are an Antfarm workflow agent. Execute the pending work below.
 
 ⚠️ CRITICAL: You MUST call "step complete" or "step fail" before ending your session. If you don't, the workflow will be stuck forever. This is non-negotiable.
 
@@ -85,18 +92,26 @@ RULES:
 3. If you're unsure whether to complete or fail, call step fail with an explanation
 
 The workflow cannot advance until you report. Your session ending without reporting = broken pipeline.`;
+
+  return prefixThinkingDirective(thinking, body);
 }
 
 const DEFAULT_POLLING_TIMEOUT_SECONDS = 120;
 const DEFAULT_POLLING_MODEL = "default";
 
-export function buildPollingPrompt(workflowId: string, agentId: string, workModel?: string): string {
+export function buildPollingPrompt(
+  workflowId: string,
+  agentId: string,
+  workModel?: string,
+  workThinking?: string,
+  pollingThinking?: string,
+): string {
   const fullAgentId = `${workflowId}_${agentId}`;
   const cli = resolveAntfarmCli();
   const model = workModel ?? "default";
-  const workPrompt = buildWorkPrompt(workflowId, agentId);
+  const workPrompt = buildWorkPrompt(workflowId, agentId, workThinking);
 
-  return `Step 1 — Quick check for pending work (lightweight, no side effects):
+  const body = `Step 1 — Quick check for pending work (lightweight, no side effects):
 \`\`\`
 node ${cli} step peek "${fullAgentId}"
 \`\`\`
@@ -120,6 +135,8 @@ ${workPrompt}
 ---END WORK PROMPT---
 
 Reply with a short summary of what you spawned.`;
+
+  return prefixThinkingDirective(pollingThinking, body);
 }
 
 export async function setupAgentCrons(workflow: WorkflowSpec): Promise<void> {
@@ -129,6 +146,7 @@ export async function setupAgentCrons(workflow: WorkflowSpec): Promise<void> {
 
   // Resolve polling model: per-agent > workflow-level > default
   const workflowPollingModel = workflow.polling?.model ?? DEFAULT_POLLING_MODEL;
+  const workflowPollingThinking = workflow.polling?.thinking;
   const workflowPollingTimeout = workflow.polling?.timeoutSeconds ?? DEFAULT_POLLING_TIMEOUT_SECONDS;
 
   for (let i = 0; i < agents.length; i++) {
@@ -140,7 +158,7 @@ export async function setupAgentCrons(workflow: WorkflowSpec): Promise<void> {
     // Two-phase: Phase 1 uses cheap polling model + minimal prompt
     const pollingModel = agent.pollingModel ?? workflowPollingModel;
     const workModel = agent.model; // Phase 2 model (passed to sessions_spawn via prompt)
-    const prompt = buildPollingPrompt(workflow.id, agent.id, workModel);
+    const prompt = buildPollingPrompt(workflow.id, agent.id, workModel, agent.thinking, workflowPollingThinking);
     const timeoutSeconds = workflowPollingTimeout;
 
     const result = await createAgentCronJob({

diff --git a/src/installer/events.ts b/src/installer/events.ts
@@ -8,8 +8,8 @@ const EVENTS_FILE = path.join(EVENTS_DIR, "events.jsonl");
 const MAX_EVENTS_SIZE = 10 * 1024 * 1024; // 10MB
 
 export type EventType =
-  | "run.started" | "run.completed" | "run.failed"
-  | "step.pending" | "step.running" | "step.done" | "step.failed" | "step.timeout"
+  | "run.started" | "run.completed" | "run.failed" | "run.blocked"
+  | "step.pending" | "step.running" | "step.done" | "step.failed" | "step.timeout" | "step.blocked"
   | "story.started" | "story.done" | "story.verified" | "story.retry" | "story.failed"
   | "pipeline.advanced";
 

diff --git a/src/installer/install.ts b/src/installer/install.ts
@@ -72,39 +72,79 @@ const TIMEOUT_20_MIN = 1200;
 const TIMEOUT_30_MIN = 1800;
 
 const ROLE_POLICIES: Record<AgentRole, { profile?: string; alsoAllow?: string[]; deny: string[]; timeoutSeconds: number }> = {
+  // planning: read-only reasoning/planning — no exec, no web, no sessions, no memory
+  planning: {
+    profile: "coding",
+    deny: [
+      ...ALWAYS_DENY,
+      "group:runtime", "group:sessions", "group:memory",
+      "write", "edit", "apply_patch",
+      "image", "tts",
+      "group:ui",
+    ],
+    timeoutSeconds: TIMEOUT_20_MIN,
+  },
+
+  // coordination: read + sessions only — used by orchestrators that spawn subagents
+  coordination: {
+    profile: "coding",
+    deny: [
+      ...ALWAYS_DENY,
+      "group:runtime", "group:memory",
+      "write", "edit", "apply_patch",
+      "image", "tts",
+      "group:ui",
+    ],
+    timeoutSeconds: TIMEOUT_20_MIN,
+  },
+
+  // research: read + web only — no exec, no sessions, no memory, no writing
+  research: {
+    profile: "coding",
+    alsoAllow: ["web_search", "web_fetch"],
+    deny: [
+      ...ALWAYS_DENY,
+      "group:runtime", "group:sessions", "group:memory",
+      "write", "edit", "apply_patch",
+      "image", "tts",
+      "group:ui",
+    ],
+    timeoutSeconds: TIMEOUT_20_MIN,
+  },
+
   // analysis: read code, run git/grep, reason — no writing, no web, no browser
   analysis: {
     profile: "coding",
     deny: [
       ...ALWAYS_DENY,
-      "write", "edit", "apply_patch",  // no file modification
-      "image", "tts",                  // unnecessary
-      "group:ui",                      // no browser/canvas
+      "write", "edit", "apply_patch",
+      "image", "tts",
+      "group:ui",
     ],
-    timeoutSeconds: TIMEOUT_20_MIN,  // codebase exploration + reasoning
+    timeoutSeconds: TIMEOUT_20_MIN,
   },
 
   // coding: full read/write/exec — the workhorses (developer, fixer, setup)
   coding: {
     profile: "coding",
     deny: [
       ...ALWAYS_DENY,
-      "image", "tts",                  // unnecessary
-      "group:ui",                      // no browser/canvas
+      "image", "tts",
+      "group:ui",
     ],
-    timeoutSeconds: TIMEOUT_30_MIN,  // implements code + build + tests
+    timeoutSeconds: TIMEOUT_30_MIN,
   },
 
   // verification: read + exec but NO write — preserves independent verification integrity
   verification: {
     profile: "coding",
     deny: [
       ...ALWAYS_DENY,
-      "write", "edit", "apply_patch",  // cannot modify code it's verifying
-      "image", "tts",                  // unnecessary
-      "group:ui",                      // no browser/canvas
+      "write", "edit", "apply_patch",
+      "image", "tts",
+      "group:ui",
     ],
-    timeoutSeconds: TIMEOUT_20_MIN,  // code review + runs tests
+    timeoutSeconds: TIMEOUT_20_MIN,
   },
 
   // testing: read + exec + browser/web for E2E, NO write
@@ -113,22 +153,22 @@ const ROLE_POLICIES: Record<AgentRole, { profile?: string; alsoAllow?: string[];
     alsoAllow: ["browser", "web_search", "web_fetch"],
     deny: [
       ...ALWAYS_DENY,
-      "write", "edit", "apply_patch",  // testers don't write production code
-      "image", "tts",                  // unnecessary
+      "write", "edit", "apply_patch",
+      "image", "tts",
     ],
-    timeoutSeconds: TIMEOUT_30_MIN,  // full test suites + E2E
+    timeoutSeconds: TIMEOUT_30_MIN,
   },
 
   // pr: just needs read + exec (for `gh pr create`)
   pr: {
     profile: "coding",
     deny: [
       ...ALWAYS_DENY,
-      "write", "edit", "apply_patch",  // no file modification
-      "image", "tts",                  // unnecessary
-      "group:ui",                      // no browser/canvas
+      "write", "edit", "apply_patch",
+      "image", "tts",
+      "group:ui",
     ],
-    timeoutSeconds: TIMEOUT_20_MIN,  // quick task, no special-casing
+    timeoutSeconds: TIMEOUT_20_MIN,
   },
 
   // scanning: read + exec + web (CVE lookups), NO write
@@ -137,11 +177,11 @@ const ROLE_POLICIES: Record<AgentRole, { profile?: string; alsoAllow?: string[];
     alsoAllow: ["web_search", "web_fetch"],
     deny: [
       ...ALWAYS_DENY,
-      "write", "edit", "apply_patch",  // scanners don't modify code
-      "image", "tts",                  // unnecessary
-      "group:ui",                      // no browser/canvas
+      "write", "edit", "apply_patch",
+      "image", "tts",
+      "group:ui",
     ],
-    timeoutSeconds: TIMEOUT_20_MIN,  // security scanning + web lookups
+    timeoutSeconds: TIMEOUT_20_MIN,
   },
 };
 
@@ -161,9 +201,10 @@ const SUBAGENT_POLICY = { allowAgents: [] as string[] };
  */
 function inferRole(agentId: string): AgentRole {
   const id = agentId.toLowerCase();
-  if (id.includes("planner") || id.includes("prioritizer") || id.includes("reviewer")
-      || id.includes("investigator") || id.includes("triager")) return "analysis";
-  if (id.includes("verifier")) return "verification";
+  if (id.includes("planner") || id.includes("writer") || id.includes("prioritizer")
+      || id.includes("reviewer") || id.includes("investigator") || id.includes("triager")) return "planning";
+  if (id.includes("orchestrator")) return "coordination";
+  if (id.includes("scout") || id.includes("analyst") || id.includes("skeptic") || id.includes("verifier")) return "research";
   if (id.includes("tester")) return "testing";
   if (id.includes("scanner")) return "scanning";
   if (id === "pr" || id.includes("/pr")) return "pr";

diff --git a/src/installer/step-ops.contract.test.ts b/src/installer/step-ops.contract.test.ts
@@ -0,0 +1,113 @@
+import { afterEach, describe, it } from "node:test";
+import assert from "node:assert/strict";
+import crypto from "node:crypto";
+import { getDb } from "../db.js";
+import { completeStep, validateStepOutputContract } from "./step-ops.js";
+
+type TestStep = {
+  id?: string;
+  stepId: string;
+  status?: string;
+  expects?: string;
+  stepIndex: number;
+  maxRetries?: number;
+};
+
+const testRunIds: string[] = [];
+
+function createRunWithSteps(opts: { runId: string; workflowId?: string; runStatus?: string; steps: TestStep[] }) {
+  const db = getDb();
+  const now = new Date().toISOString();
+  db.prepare(
+    "INSERT INTO runs (id, workflow_id, task, status, context, created_at, updated_at) VALUES (?, ?, ?, ?, '{}', ?, ?)"
+  ).run(opts.runId, opts.workflowId ?? "test-workflow", "test task", opts.runStatus ?? "running", now, now);
+
+  for (const step of opts.steps) {
+    const id = step.id ?? crypto.randomUUID();
+    db.prepare(
+      "INSERT INTO steps (id, run_id, step_id, agent_id, step_index, input_template, expects, status, output, retry_count, max_retries, created_at, updated_at) VALUES (?, ?, ?, ?, ?, '', ?, ?, NULL, 0, ?, ?, ?)"
+    ).run(
+      id,
+      opts.runId,
+      step.stepId,
+      "test-agent",
+      step.stepIndex,
+      step.expects ?? "STATUS: done",
+      step.status ?? "pending",
+      step.maxRetries ?? 2,
+      now,
+      now,
+    );
+    step.id = id;
+  }
+}
+
+function cleanupRun(runId: string) {
+  const db = getDb();
+  db.prepare("DELETE FROM stories WHERE run_id = ?").run(runId);
+  db.prepare("DELETE FROM steps WHERE run_id = ?").run(runId);
+  db.prepare("DELETE FROM runs WHERE id = ?").run(runId);
+}
+
+afterEach(() => {
+  for (const runId of testRunIds) cleanupRun(runId);
+  testRunIds.length = 0;
+});
+
+describe("validateStepOutputContract", () => {
+  it("rejects missing STATUS", () => {
+    assert.throws(
+      () => validateStepOutputContract("FINAL_REPORT: hello", "FINAL_REPORT:"),
+      /Missing required STATUS field/,
+    );
+  });
+
+  it("rejects malformed JSON payloads", () => {
+    assert.throws(
+      () => validateStepOutputContract("STATUS: done\nVERIFIED_PACKET_JSON: {not valid}", "VERIFIED_PACKET_JSON:"),
+      /Malformed VERIFIED_PACKET_JSON/,
+    );
+  });
+
+  it("allows blocked output even when expects targets success fields", () => {
+    const validated = validateStepOutputContract("STATUS: blocked\nBLOCK_REASON: waiting on human", "VERIFIED_PACKET_JSON:");
+    assert.equal(validated.status, "blocked");
+  });
+});
+
+describe("completeStep contract enforcement", () => {
+  it("blocks the run when a step reports STATUS: blocked", () => {
+    const runId = crypto.randomUUID();
+    testRunIds.push(runId);
+    const step: TestStep = { stepId: "verify", stepIndex: 0, expects: "VERIFIED_PACKET_JSON:" };
+    createRunWithSteps({ runId, steps: [step] });
+
+    const result = completeStep(step.id!, "STATUS: blocked\nBLOCK_REASON: waiting on human review");
+    assert.deepEqual(result, { advanced: false, runCompleted: false });
+
+    const db = getDb();
+    const run = db.prepare("SELECT status FROM runs WHERE id = ?").get(runId) as { status: string };
+    const stepRow = db.prepare("SELECT status, output FROM steps WHERE id = ?").get(step.id!) as { status: string; output: string };
+    assert.equal(run.status, "blocked");
+    assert.equal(stepRow.status, "blocked");
+    assert.match(stepRow.output, /BLOCK_REASON: waiting on human review/);
+  });
+
+  it("fails closed when verifier output is missing required VERIFIED_PACKET_JSON", () => {
+    const runId = crypto.randomUUID();
+    testRunIds.push(runId);
+    const verifyStep: TestStep = { stepId: "verify", stepIndex: 0, expects: "VERIFIED_PACKET_JSON:" };
+    const writeStep: TestStep = { stepId: "write", stepIndex: 1, status: "waiting", expects: "FINAL_REPORT:" };
+    createRunWithSteps({ runId, steps: [verifyStep, writeStep] });
+
+    const result = completeStep(verifyStep.id!, "STATUS: done\nCONFIDENCE_SUMMARY: looks good");
+    assert.deepEqual(result, { advanced: false, runCompleted: false });
+
+    const db = getDb();
+    const verifyRow = db.prepare("SELECT status, retry_count FROM steps WHERE id = ?").get(verifyStep.id!) as { status: string; retry_count: number };
+    const writeRow = db.prepare("SELECT status FROM steps WHERE id = ?").get(writeStep.id!) as { status: string };
+    assert.equal(verifyRow.status, "pending");
+    assert.equal(verifyRow.retry_count, 1);
+    assert.equal(writeRow.status, "waiting");
+  });
+});