Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 24 additions & 6 deletions src/installer/agent-cron.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,13 @@ import { getDb } from "../db.js";
const DEFAULT_EVERY_MS = 300_000; // 5 minutes
const DEFAULT_AGENT_TIMEOUT_SECONDS = 30 * 60; // 30 minutes

function prefixThinkingDirective(thinking: string | undefined, body: string): string {
if (!thinking) return body;
return `/think ${thinking}

${body}`;
}

function buildAgentPrompt(workflowId: string, agentId: string): string {
const fullAgentId = `${workflowId}_${agentId}`;
const cli = resolveAntfarmCli();
Expand Down Expand Up @@ -50,11 +57,11 @@ RULES:
The workflow cannot advance until you report. Your session ending without reporting = broken pipeline.`;
}

export function buildWorkPrompt(workflowId: string, agentId: string): string {
export function buildWorkPrompt(workflowId: string, agentId: string, thinking?: string): string {
const fullAgentId = `${workflowId}_${agentId}`;
const cli = resolveAntfarmCli();

return `You are an Antfarm workflow agent. Execute the pending work below.
const body = `You are an Antfarm workflow agent. Execute the pending work below.

⚠️ CRITICAL: You MUST call "step complete" or "step fail" before ending your session. If you don't, the workflow will be stuck forever. This is non-negotiable.

Expand Down Expand Up @@ -85,18 +92,26 @@ RULES:
3. If you're unsure whether to complete or fail, call step fail with an explanation

The workflow cannot advance until you report. Your session ending without reporting = broken pipeline.`;

return prefixThinkingDirective(thinking, body);
}

const DEFAULT_POLLING_TIMEOUT_SECONDS = 120;
const DEFAULT_POLLING_MODEL = "default";

export function buildPollingPrompt(workflowId: string, agentId: string, workModel?: string): string {
export function buildPollingPrompt(
workflowId: string,
agentId: string,
workModel?: string,
workThinking?: string,
pollingThinking?: string,
): string {
const fullAgentId = `${workflowId}_${agentId}`;
const cli = resolveAntfarmCli();
const model = workModel ?? "default";
const workPrompt = buildWorkPrompt(workflowId, agentId);
const workPrompt = buildWorkPrompt(workflowId, agentId, workThinking);

return `Step 1 — Quick check for pending work (lightweight, no side effects):
const body = `Step 1 — Quick check for pending work (lightweight, no side effects):
\`\`\`
node ${cli} step peek "${fullAgentId}"
\`\`\`
Expand All @@ -120,6 +135,8 @@ ${workPrompt}
---END WORK PROMPT---

Reply with a short summary of what you spawned.`;

return prefixThinkingDirective(pollingThinking, body);
}

export async function setupAgentCrons(workflow: WorkflowSpec): Promise<void> {
Expand All @@ -129,6 +146,7 @@ export async function setupAgentCrons(workflow: WorkflowSpec): Promise<void> {

// Resolve polling model: per-agent > workflow-level > default
const workflowPollingModel = workflow.polling?.model ?? DEFAULT_POLLING_MODEL;
const workflowPollingThinking = workflow.polling?.thinking;
const workflowPollingTimeout = workflow.polling?.timeoutSeconds ?? DEFAULT_POLLING_TIMEOUT_SECONDS;

for (let i = 0; i < agents.length; i++) {
Expand All @@ -140,7 +158,7 @@ export async function setupAgentCrons(workflow: WorkflowSpec): Promise<void> {
// Two-phase: Phase 1 uses cheap polling model + minimal prompt
const pollingModel = agent.pollingModel ?? workflowPollingModel;
const workModel = agent.model; // Phase 2 model (passed to sessions_spawn via prompt)
const prompt = buildPollingPrompt(workflow.id, agent.id, workModel);
const prompt = buildPollingPrompt(workflow.id, agent.id, workModel, agent.thinking, workflowPollingThinking);
const timeoutSeconds = workflowPollingTimeout;

const result = await createAgentCronJob({
Expand Down
4 changes: 2 additions & 2 deletions src/installer/events.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ const EVENTS_FILE = path.join(EVENTS_DIR, "events.jsonl");
const MAX_EVENTS_SIZE = 10 * 1024 * 1024; // 10MB

export type EventType =
| "run.started" | "run.completed" | "run.failed"
| "step.pending" | "step.running" | "step.done" | "step.failed" | "step.timeout"
| "run.started" | "run.completed" | "run.failed" | "run.blocked"
| "step.pending" | "step.running" | "step.done" | "step.failed" | "step.timeout" | "step.blocked"
| "story.started" | "story.done" | "story.verified" | "story.retry" | "story.failed"
| "pipeline.advanced";

Expand Down
91 changes: 66 additions & 25 deletions src/installer/install.ts
Original file line number Diff line number Diff line change
Expand Up @@ -72,39 +72,79 @@ const TIMEOUT_20_MIN = 1200;
const TIMEOUT_30_MIN = 1800;

const ROLE_POLICIES: Record<AgentRole, { profile?: string; alsoAllow?: string[]; deny: string[]; timeoutSeconds: number }> = {
// planning: read-only reasoning/planning — no exec, no web, no sessions, no memory
planning: {
profile: "coding",
deny: [
...ALWAYS_DENY,
"group:runtime", "group:sessions", "group:memory",
"write", "edit", "apply_patch",
"image", "tts",
"group:ui",
],
timeoutSeconds: TIMEOUT_20_MIN,
},

// coordination: read + sessions only — used by orchestrators that spawn subagents
coordination: {
profile: "coding",
deny: [
...ALWAYS_DENY,
"group:runtime", "group:memory",
"write", "edit", "apply_patch",
"image", "tts",
"group:ui",
],
timeoutSeconds: TIMEOUT_20_MIN,
},

// research: read + web only — no exec, no sessions, no memory, no writing
research: {
profile: "coding",
alsoAllow: ["web_search", "web_fetch"],
deny: [
...ALWAYS_DENY,
"group:runtime", "group:sessions", "group:memory",
"write", "edit", "apply_patch",
"image", "tts",
"group:ui",
],
timeoutSeconds: TIMEOUT_20_MIN,
},

// analysis: read code, run git/grep, reason — no writing, no web, no browser
analysis: {
profile: "coding",
deny: [
...ALWAYS_DENY,
"write", "edit", "apply_patch", // no file modification
"image", "tts", // unnecessary
"group:ui", // no browser/canvas
"write", "edit", "apply_patch",
"image", "tts",
"group:ui",
],
timeoutSeconds: TIMEOUT_20_MIN, // codebase exploration + reasoning
timeoutSeconds: TIMEOUT_20_MIN,
},

// coding: full read/write/exec — the workhorses (developer, fixer, setup)
coding: {
profile: "coding",
deny: [
...ALWAYS_DENY,
"image", "tts", // unnecessary
"group:ui", // no browser/canvas
"image", "tts",
"group:ui",
],
timeoutSeconds: TIMEOUT_30_MIN, // implements code + build + tests
timeoutSeconds: TIMEOUT_30_MIN,
},

// verification: read + exec but NO write — preserves independent verification integrity
verification: {
profile: "coding",
deny: [
...ALWAYS_DENY,
"write", "edit", "apply_patch", // cannot modify code it's verifying
"image", "tts", // unnecessary
"group:ui", // no browser/canvas
"write", "edit", "apply_patch",
"image", "tts",
"group:ui",
],
timeoutSeconds: TIMEOUT_20_MIN, // code review + runs tests
timeoutSeconds: TIMEOUT_20_MIN,
},

// testing: read + exec + browser/web for E2E, NO write
Expand All @@ -113,22 +153,22 @@ const ROLE_POLICIES: Record<AgentRole, { profile?: string; alsoAllow?: string[];
alsoAllow: ["browser", "web_search", "web_fetch"],
deny: [
...ALWAYS_DENY,
"write", "edit", "apply_patch", // testers don't write production code
"image", "tts", // unnecessary
"write", "edit", "apply_patch",
"image", "tts",
],
timeoutSeconds: TIMEOUT_30_MIN, // full test suites + E2E
timeoutSeconds: TIMEOUT_30_MIN,
},

// pr: just needs read + exec (for `gh pr create`)
pr: {
profile: "coding",
deny: [
...ALWAYS_DENY,
"write", "edit", "apply_patch", // no file modification
"image", "tts", // unnecessary
"group:ui", // no browser/canvas
"write", "edit", "apply_patch",
"image", "tts",
"group:ui",
],
timeoutSeconds: TIMEOUT_20_MIN, // quick task, no special-casing
timeoutSeconds: TIMEOUT_20_MIN,
},

// scanning: read + exec + web (CVE lookups), NO write
Expand All @@ -137,11 +177,11 @@ const ROLE_POLICIES: Record<AgentRole, { profile?: string; alsoAllow?: string[];
alsoAllow: ["web_search", "web_fetch"],
deny: [
...ALWAYS_DENY,
"write", "edit", "apply_patch", // scanners don't modify code
"image", "tts", // unnecessary
"group:ui", // no browser/canvas
"write", "edit", "apply_patch",
"image", "tts",
"group:ui",
],
timeoutSeconds: TIMEOUT_20_MIN, // security scanning + web lookups
timeoutSeconds: TIMEOUT_20_MIN,
},
};

Expand All @@ -161,9 +201,10 @@ const SUBAGENT_POLICY = { allowAgents: [] as string[] };
*/
function inferRole(agentId: string): AgentRole {
const id = agentId.toLowerCase();
if (id.includes("planner") || id.includes("prioritizer") || id.includes("reviewer")
|| id.includes("investigator") || id.includes("triager")) return "analysis";
if (id.includes("verifier")) return "verification";
if (id.includes("planner") || id.includes("writer") || id.includes("prioritizer")
|| id.includes("reviewer") || id.includes("investigator") || id.includes("triager")) return "planning";
if (id.includes("orchestrator")) return "coordination";
if (id.includes("scout") || id.includes("analyst") || id.includes("skeptic") || id.includes("verifier")) return "research";
if (id.includes("tester")) return "testing";
if (id.includes("scanner")) return "scanning";
if (id === "pr" || id.includes("/pr")) return "pr";
Expand Down
113 changes: 113 additions & 0 deletions src/installer/step-ops.contract.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
import { afterEach, describe, it } from "node:test";
import assert from "node:assert/strict";
import crypto from "node:crypto";
import { getDb } from "../db.js";
import { completeStep, validateStepOutputContract } from "./step-ops.js";

type TestStep = {
id?: string;
stepId: string;
status?: string;
expects?: string;
stepIndex: number;
maxRetries?: number;
};

const testRunIds: string[] = [];

function createRunWithSteps(opts: { runId: string; workflowId?: string; runStatus?: string; steps: TestStep[] }) {
const db = getDb();
const now = new Date().toISOString();
db.prepare(
"INSERT INTO runs (id, workflow_id, task, status, context, created_at, updated_at) VALUES (?, ?, ?, ?, '{}', ?, ?)"
).run(opts.runId, opts.workflowId ?? "test-workflow", "test task", opts.runStatus ?? "running", now, now);

for (const step of opts.steps) {
const id = step.id ?? crypto.randomUUID();
db.prepare(
"INSERT INTO steps (id, run_id, step_id, agent_id, step_index, input_template, expects, status, output, retry_count, max_retries, created_at, updated_at) VALUES (?, ?, ?, ?, ?, '', ?, ?, NULL, 0, ?, ?, ?)"
).run(
id,
opts.runId,
step.stepId,
"test-agent",
step.stepIndex,
step.expects ?? "STATUS: done",
step.status ?? "pending",
step.maxRetries ?? 2,
now,
now,
);
step.id = id;
}
}

function cleanupRun(runId: string) {
const db = getDb();
db.prepare("DELETE FROM stories WHERE run_id = ?").run(runId);
db.prepare("DELETE FROM steps WHERE run_id = ?").run(runId);
db.prepare("DELETE FROM runs WHERE id = ?").run(runId);
}

afterEach(() => {
for (const runId of testRunIds) cleanupRun(runId);
testRunIds.length = 0;
});

describe("validateStepOutputContract", () => {
it("rejects missing STATUS", () => {
assert.throws(
() => validateStepOutputContract("FINAL_REPORT: hello", "FINAL_REPORT:"),
/Missing required STATUS field/,
);
});

it("rejects malformed JSON payloads", () => {
assert.throws(
() => validateStepOutputContract("STATUS: done\nVERIFIED_PACKET_JSON: {not valid}", "VERIFIED_PACKET_JSON:"),
/Malformed VERIFIED_PACKET_JSON/,
);
});

it("allows blocked output even when expects targets success fields", () => {
const validated = validateStepOutputContract("STATUS: blocked\nBLOCK_REASON: waiting on human", "VERIFIED_PACKET_JSON:");
assert.equal(validated.status, "blocked");
});
});

describe("completeStep contract enforcement", () => {
it("blocks the run when a step reports STATUS: blocked", () => {
const runId = crypto.randomUUID();
testRunIds.push(runId);
const step: TestStep = { stepId: "verify", stepIndex: 0, expects: "VERIFIED_PACKET_JSON:" };
createRunWithSteps({ runId, steps: [step] });

const result = completeStep(step.id!, "STATUS: blocked\nBLOCK_REASON: waiting on human review");
assert.deepEqual(result, { advanced: false, runCompleted: false });

const db = getDb();
const run = db.prepare("SELECT status FROM runs WHERE id = ?").get(runId) as { status: string };
const stepRow = db.prepare("SELECT status, output FROM steps WHERE id = ?").get(step.id!) as { status: string; output: string };
assert.equal(run.status, "blocked");
assert.equal(stepRow.status, "blocked");
assert.match(stepRow.output, /BLOCK_REASON: waiting on human review/);
});

it("fails closed when verifier output is missing required VERIFIED_PACKET_JSON", () => {
const runId = crypto.randomUUID();
testRunIds.push(runId);
const verifyStep: TestStep = { stepId: "verify", stepIndex: 0, expects: "VERIFIED_PACKET_JSON:" };
const writeStep: TestStep = { stepId: "write", stepIndex: 1, status: "waiting", expects: "FINAL_REPORT:" };
createRunWithSteps({ runId, steps: [verifyStep, writeStep] });

const result = completeStep(verifyStep.id!, "STATUS: done\nCONFIDENCE_SUMMARY: looks good");
assert.deepEqual(result, { advanced: false, runCompleted: false });

const db = getDb();
const verifyRow = db.prepare("SELECT status, retry_count FROM steps WHERE id = ?").get(verifyStep.id!) as { status: string; retry_count: number };
const writeRow = db.prepare("SELECT status FROM steps WHERE id = ?").get(writeStep.id!) as { status: string };
assert.equal(verifyRow.status, "pending");
assert.equal(verifyRow.retry_count, 1);
assert.equal(writeRow.status, "waiting");
});
});
Loading