Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 24 additions & 6 deletions src/installer/agent-cron.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,13 @@ import { getDb } from "../db.js";
const DEFAULT_EVERY_MS = 300_000; // 5 minutes
const DEFAULT_AGENT_TIMEOUT_SECONDS = 30 * 60; // 30 minutes

function prefixThinkingDirective(thinking: string | undefined, body: string): string {
if (!thinking) return body;
return `/think ${thinking}

${body}`;
}

function buildAgentPrompt(workflowId: string, agentId: string): string {
const fullAgentId = `${workflowId}_${agentId}`;
const cli = resolveAntfarmCli();
Expand Down Expand Up @@ -50,11 +57,11 @@ RULES:
The workflow cannot advance until you report. Your session ending without reporting = broken pipeline.`;
}

export function buildWorkPrompt(workflowId: string, agentId: string): string {
export function buildWorkPrompt(workflowId: string, agentId: string, thinking?: string): string {
const fullAgentId = `${workflowId}_${agentId}`;
const cli = resolveAntfarmCli();

return `You are an Antfarm workflow agent. Execute the pending work below.
const body = `You are an Antfarm workflow agent. Execute the pending work below.

⚠️ CRITICAL: You MUST call "step complete" or "step fail" before ending your session. If you don't, the workflow will be stuck forever. This is non-negotiable.

Expand Down Expand Up @@ -85,18 +92,26 @@ RULES:
3. If you're unsure whether to complete or fail, call step fail with an explanation

The workflow cannot advance until you report. Your session ending without reporting = broken pipeline.`;

return prefixThinkingDirective(thinking, body);
}

const DEFAULT_POLLING_TIMEOUT_SECONDS = 120;
const DEFAULT_POLLING_MODEL = "default";

export function buildPollingPrompt(workflowId: string, agentId: string, workModel?: string): string {
export function buildPollingPrompt(
workflowId: string,
agentId: string,
workModel?: string,
workThinking?: string,
pollingThinking?: string,
): string {
const fullAgentId = `${workflowId}_${agentId}`;
const cli = resolveAntfarmCli();
const model = workModel ?? "default";
const workPrompt = buildWorkPrompt(workflowId, agentId);
const workPrompt = buildWorkPrompt(workflowId, agentId, workThinking);

return `Step 1 — Quick check for pending work (lightweight, no side effects):
const body = `Step 1 — Quick check for pending work (lightweight, no side effects):
\`\`\`
node ${cli} step peek "${fullAgentId}"
\`\`\`
Expand All @@ -120,6 +135,8 @@ ${workPrompt}
---END WORK PROMPT---

Reply with a short summary of what you spawned.`;

return prefixThinkingDirective(pollingThinking, body);
}

export async function setupAgentCrons(workflow: WorkflowSpec): Promise<void> {
Expand All @@ -129,6 +146,7 @@ export async function setupAgentCrons(workflow: WorkflowSpec): Promise<void> {

// Resolve polling model: per-agent > workflow-level > default
const workflowPollingModel = workflow.polling?.model ?? DEFAULT_POLLING_MODEL;
const workflowPollingThinking = workflow.polling?.thinking;
const workflowPollingTimeout = workflow.polling?.timeoutSeconds ?? DEFAULT_POLLING_TIMEOUT_SECONDS;

for (let i = 0; i < agents.length; i++) {
Expand All @@ -140,7 +158,7 @@ export async function setupAgentCrons(workflow: WorkflowSpec): Promise<void> {
// Two-phase: Phase 1 uses cheap polling model + minimal prompt
const pollingModel = agent.pollingModel ?? workflowPollingModel;
const workModel = agent.model; // Phase 2 model (passed to sessions_spawn via prompt)
const prompt = buildPollingPrompt(workflow.id, agent.id, workModel);
const prompt = buildPollingPrompt(workflow.id, agent.id, workModel, agent.thinking, workflowPollingThinking);
const timeoutSeconds = workflowPollingTimeout;

const result = await createAgentCronJob({
Expand Down
91 changes: 66 additions & 25 deletions src/installer/install.ts
Original file line number Diff line number Diff line change
Expand Up @@ -72,39 +72,79 @@ const TIMEOUT_20_MIN = 1200;
const TIMEOUT_30_MIN = 1800;

const ROLE_POLICIES: Record<AgentRole, { profile?: string; alsoAllow?: string[]; deny: string[]; timeoutSeconds: number }> = {
// planning: read-only reasoning/planning — no exec, no web, no sessions, no memory
planning: {
profile: "coding",
deny: [
...ALWAYS_DENY,
"group:runtime", "group:sessions", "group:memory",
"write", "edit", "apply_patch",
"image", "tts",
"group:ui",
],
timeoutSeconds: TIMEOUT_20_MIN,
},

// coordination: read + sessions only — used by orchestrators that spawn subagents
coordination: {
profile: "coding",
deny: [
...ALWAYS_DENY,
"group:runtime", "group:memory",
"write", "edit", "apply_patch",
"image", "tts",
"group:ui",
],
timeoutSeconds: TIMEOUT_20_MIN,
},

// research: read + web only — no exec, no sessions, no memory, no writing
research: {
profile: "coding",
alsoAllow: ["web_search", "web_fetch"],
deny: [
...ALWAYS_DENY,
"group:runtime", "group:sessions", "group:memory",
"write", "edit", "apply_patch",
"image", "tts",
"group:ui",
],
timeoutSeconds: TIMEOUT_20_MIN,
},

// analysis: read code, run git/grep, reason — no writing, no web, no browser
analysis: {
profile: "coding",
deny: [
...ALWAYS_DENY,
"write", "edit", "apply_patch", // no file modification
"image", "tts", // unnecessary
"group:ui", // no browser/canvas
"write", "edit", "apply_patch",
"image", "tts",
"group:ui",
],
timeoutSeconds: TIMEOUT_20_MIN, // codebase exploration + reasoning
timeoutSeconds: TIMEOUT_20_MIN,
},

// coding: full read/write/exec — the workhorses (developer, fixer, setup)
coding: {
profile: "coding",
deny: [
...ALWAYS_DENY,
"image", "tts", // unnecessary
"group:ui", // no browser/canvas
"image", "tts",
"group:ui",
],
timeoutSeconds: TIMEOUT_30_MIN, // implements code + build + tests
timeoutSeconds: TIMEOUT_30_MIN,
},

// verification: read + exec but NO write — preserves independent verification integrity
verification: {
profile: "coding",
deny: [
...ALWAYS_DENY,
"write", "edit", "apply_patch", // cannot modify code it's verifying
"image", "tts", // unnecessary
"group:ui", // no browser/canvas
"write", "edit", "apply_patch",
"image", "tts",
"group:ui",
],
timeoutSeconds: TIMEOUT_20_MIN, // code review + runs tests
timeoutSeconds: TIMEOUT_20_MIN,
},

// testing: read + exec + browser/web for E2E, NO write
Expand All @@ -113,22 +153,22 @@ const ROLE_POLICIES: Record<AgentRole, { profile?: string; alsoAllow?: string[];
alsoAllow: ["browser", "web_search", "web_fetch"],
deny: [
...ALWAYS_DENY,
"write", "edit", "apply_patch", // testers don't write production code
"image", "tts", // unnecessary
"write", "edit", "apply_patch",
"image", "tts",
],
timeoutSeconds: TIMEOUT_30_MIN, // full test suites + E2E
timeoutSeconds: TIMEOUT_30_MIN,
},

// pr: just needs read + exec (for `gh pr create`)
pr: {
profile: "coding",
deny: [
...ALWAYS_DENY,
"write", "edit", "apply_patch", // no file modification
"image", "tts", // unnecessary
"group:ui", // no browser/canvas
"write", "edit", "apply_patch",
"image", "tts",
"group:ui",
],
timeoutSeconds: TIMEOUT_20_MIN, // quick task, no special-casing
timeoutSeconds: TIMEOUT_20_MIN,
},

// scanning: read + exec + web (CVE lookups), NO write
Expand All @@ -137,11 +177,11 @@ const ROLE_POLICIES: Record<AgentRole, { profile?: string; alsoAllow?: string[];
alsoAllow: ["web_search", "web_fetch"],
deny: [
...ALWAYS_DENY,
"write", "edit", "apply_patch", // scanners don't modify code
"image", "tts", // unnecessary
"group:ui", // no browser/canvas
"write", "edit", "apply_patch",
"image", "tts",
"group:ui",
],
timeoutSeconds: TIMEOUT_20_MIN, // security scanning + web lookups
timeoutSeconds: TIMEOUT_20_MIN,
},
};

Expand All @@ -161,9 +201,10 @@ const SUBAGENT_POLICY = { allowAgents: [] as string[] };
*/
function inferRole(agentId: string): AgentRole {
const id = agentId.toLowerCase();
if (id.includes("planner") || id.includes("prioritizer") || id.includes("reviewer")
|| id.includes("investigator") || id.includes("triager")) return "analysis";
if (id.includes("verifier")) return "verification";
if (id.includes("planner") || id.includes("writer") || id.includes("prioritizer")
|| id.includes("reviewer") || id.includes("investigator") || id.includes("triager")) return "planning";
if (id.includes("orchestrator")) return "coordination";
if (id.includes("scout") || id.includes("analyst") || id.includes("skeptic") || id.includes("verifier")) return "research";
if (id.includes("tester")) return "testing";
if (id.includes("scanner")) return "scanning";
if (id === "pr" || id.includes("/pr")) return "pr";
Expand Down
11 changes: 8 additions & 3 deletions src/installer/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,28 +7,33 @@ export type WorkflowAgentFiles = {
/**
* Agent roles control tool access during install.
*
* - analysis: Read-only code exploration (planner, prioritizer, reviewer, investigator, triager)
* - planning: Read-only reasoning/planning (planner, writer)
* - coordination: Read + sessions only, no exec/write (orchestrator)
* - research: Read + web only, no exec/write/sessions (scout, analyst, skeptic, verifier)
* - analysis: Read-only code exploration (legacy/general)
* - coding: Full read/write/exec for implementation (developer, fixer, setup)
* - verification: Read + exec but NO write — independent verification integrity (verifier)
* - verification: Read + exec but NO write — independent verification integrity (legacy verifier role)
* - testing: Read + exec + browser/web for E2E testing, NO write (tester)
* - pr: Read + exec only — just runs `gh pr create` (pr)
* - scanning: Read + exec + web search for CVE lookups, NO write (scanner)
*/
export type AgentRole = "analysis" | "coding" | "verification" | "testing" | "pr" | "scanning";
export type AgentRole = "planning" | "coordination" | "research" | "analysis" | "coding" | "verification" | "testing" | "pr" | "scanning";

export type WorkflowAgent = {
id: string;
name?: string;
description?: string;
role?: AgentRole;
model?: string;
thinking?: string;
pollingModel?: string;
timeoutSeconds?: number;
workspace: WorkflowAgentFiles;
};

export type PollingConfig = {
model?: string;
thinking?: string;
timeoutSeconds?: number;
};

Expand Down
36 changes: 36 additions & 0 deletions src/installer/workflow-spec.deep-research.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import { describe, it } from "node:test";
import assert from "node:assert/strict";
import path from "node:path";
import { fileURLToPath } from "node:url";
import { loadWorkflowSpec } from "./workflow-spec.js";

const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);
const repoRoot = path.resolve(__dirname, "..", "..");
const workflowDir = path.join(repoRoot, "workflows", "deep-research");

describe("deep-research workflow preflight", () => {
it("loads with a preflight agent that uses analysis role", async () => {
const workflow = await loadWorkflowSpec(workflowDir);
const preflight = workflow.agents.find((agent) => agent.id === "preflight");
assert.ok(preflight, "expected preflight agent to exist");
assert.equal(preflight?.role, "analysis");
});

it("runs preflight before plan and research", async () => {
const workflow = await loadWorkflowSpec(workflowDir);
const stepIds = workflow.steps.map((step) => step.id);
assert.deepEqual(stepIds.slice(0, 3), ["preflight", "plan", "research"]);
});

it("threads local context fields into plan and research", async () => {
const workflow = await loadWorkflowSpec(workflowDir);
const plan = workflow.steps.find((step) => step.id === "plan");
const research = workflow.steps.find((step) => step.id === "research");
assert.ok(plan?.input.includes("{{local_context_summary}}"));
assert.ok(plan?.input.includes("{{local_context_packet_json}}"));
assert.ok(research?.input.includes("{{local_context_summary}}"));
assert.ok(research?.input.includes("{{web_research_needed}}"));
assert.ok(research?.input.includes("{{safe_shared_context}}"));
});
});
Loading