Skip to content

Commit ed24713

Browse files
authored
Merge branch 'main' into fix/git-log-delimiter-collision
2 parents 7850abd + 1db9d86 commit ed24713

19 files changed

Lines changed: 517 additions & 34 deletions

src/__tests__/authority-rules.test.ts

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -427,6 +427,57 @@ describe("Rate Limit Rules", () => {
427427
expect(decision.reasonCode).toBe("RATE_LIMIT_SPAWN");
428428
});
429429
});
430+
431+
describe("rate limit DB unavailable", () => {
432+
it("denies when DB is not accessible (fail-closed)", () => {
433+
const rules = createRateLimitRules();
434+
const engine = new PolicyEngine(db, rules);
435+
436+
const tool = createMockTool({
437+
name: "update_genesis_prompt",
438+
riskLevel: "dangerous",
439+
category: "self_mod",
440+
});
441+
// Pass no DB to simulate DB unavailable
442+
const request = createRequest(tool, {}, "agent");
443+
444+
const decision = engine.evaluate(request);
445+
expect(decision.action).toBe("deny");
446+
expect(decision.reasonCode).toBe("DB_UNAVAILABLE");
447+
});
448+
449+
it("denies edit_own_file when DB is not accessible", () => {
450+
const rules = createRateLimitRules();
451+
const engine = new PolicyEngine(db, rules);
452+
453+
const tool = createMockTool({
454+
name: "edit_own_file",
455+
riskLevel: "dangerous",
456+
category: "self_mod",
457+
});
458+
const request = createRequest(tool, {}, "agent");
459+
460+
const decision = engine.evaluate(request);
461+
expect(decision.action).toBe("deny");
462+
expect(decision.reasonCode).toBe("DB_UNAVAILABLE");
463+
});
464+
465+
it("denies spawn_child when DB is not accessible", () => {
466+
const rules = createRateLimitRules();
467+
const engine = new PolicyEngine(db, rules);
468+
469+
const tool = createMockTool({
470+
name: "spawn_child",
471+
riskLevel: "dangerous",
472+
category: "replication",
473+
});
474+
const request = createRequest(tool, {}, "agent");
475+
476+
const decision = engine.evaluate(request);
477+
expect(decision.action).toBe("deny");
478+
expect(decision.reasonCode).toBe("DB_UNAVAILABLE");
479+
});
480+
});
430481
});
431482

432483
// ─── Financial Phase 1 Rules Tests ──────────────────────────────

src/__tests__/low-compute.test.ts

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
import { describe, it, expect, vi } from "vitest";
2+
import {
3+
canRunInference,
4+
getModelForTier,
5+
applyTierRestrictions,
6+
} from "../survival/low-compute.js";
7+
import type { SurvivalTier } from "../types.js";
8+
9+
describe("canRunInference", () => {
10+
it("allows inference for 'high' tier", () => {
11+
expect(canRunInference("high")).toBe(true);
12+
});
13+
14+
it("allows inference for 'normal' tier", () => {
15+
expect(canRunInference("normal")).toBe(true);
16+
});
17+
18+
it("allows inference for 'low_compute' tier", () => {
19+
expect(canRunInference("low_compute")).toBe(true);
20+
});
21+
22+
it("allows inference for 'critical' tier", () => {
23+
expect(canRunInference("critical")).toBe(true);
24+
});
25+
26+
it("denies inference for 'dead' tier", () => {
27+
expect(canRunInference("dead")).toBe(false);
28+
});
29+
});
30+
31+
describe("getModelForTier", () => {
32+
const defaultModel = "gpt-5.2";
33+
34+
it("returns default model for 'high' tier", () => {
35+
expect(getModelForTier("high", defaultModel)).toBe(defaultModel);
36+
});
37+
38+
it("returns default model for 'normal' tier", () => {
39+
expect(getModelForTier("normal", defaultModel)).toBe(defaultModel);
40+
});
41+
42+
it("returns a cheaper model for 'low_compute' tier", () => {
43+
const model = getModelForTier("low_compute", defaultModel);
44+
expect(model).not.toBe(defaultModel);
45+
});
46+
47+
it("returns a cheaper model for 'critical' tier", () => {
48+
const model = getModelForTier("critical", defaultModel);
49+
expect(model).not.toBe(defaultModel);
50+
});
51+
52+
it("returns a value for every tier", () => {
53+
const tiers: SurvivalTier[] = ["high", "normal", "low_compute", "critical", "dead"];
54+
for (const tier of tiers) {
55+
const model = getModelForTier(tier, defaultModel);
56+
expect(model).toBeTruthy();
57+
}
58+
});
59+
});
60+
61+
describe("applyTierRestrictions", () => {
62+
function makeMocks() {
63+
return {
64+
inference: { setLowComputeMode: vi.fn() },
65+
db: {
66+
setKV: vi.fn(),
67+
getKV: vi.fn(),
68+
raw: {} as any,
69+
insertTurn: vi.fn(),
70+
updateTurn: vi.fn(),
71+
getTurnsBySession: vi.fn(),
72+
insertToolCall: vi.fn(),
73+
getToolCallsByTurn: vi.fn(),
74+
getChildById: vi.fn(),
75+
getChildren: vi.fn(),
76+
insertChild: vi.fn(),
77+
updateChild: vi.fn(),
78+
deleteChild: vi.fn(),
79+
close: vi.fn(),
80+
},
81+
};
82+
}
83+
84+
it("sets low compute mode off for 'high' tier", () => {
85+
const { inference, db } = makeMocks();
86+
applyTierRestrictions("high", inference as any, db as any);
87+
expect(inference.setLowComputeMode).toHaveBeenCalledWith(false);
88+
expect(db.setKV).toHaveBeenCalledWith("current_tier", "high");
89+
});
90+
91+
it("sets low compute mode off for 'normal' tier", () => {
92+
const { inference, db } = makeMocks();
93+
applyTierRestrictions("normal", inference as any, db as any);
94+
expect(inference.setLowComputeMode).toHaveBeenCalledWith(false);
95+
});
96+
97+
it("sets low compute mode on for 'low_compute' tier", () => {
98+
const { inference, db } = makeMocks();
99+
applyTierRestrictions("low_compute", inference as any, db as any);
100+
expect(inference.setLowComputeMode).toHaveBeenCalledWith(true);
101+
});
102+
103+
it("sets low compute mode on for 'critical' tier", () => {
104+
const { inference, db } = makeMocks();
105+
applyTierRestrictions("critical", inference as any, db as any);
106+
expect(inference.setLowComputeMode).toHaveBeenCalledWith(true);
107+
});
108+
109+
it("sets low compute mode on for 'dead' tier", () => {
110+
const { inference, db } = makeMocks();
111+
applyTierRestrictions("dead", inference as any, db as any);
112+
expect(inference.setLowComputeMode).toHaveBeenCalledWith(true);
113+
});
114+
});

src/__tests__/memory.test.ts

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,15 @@ describe("WorkingMemoryManager", () => {
166166
expect(wm.getBySession("s2")).toHaveLength(1);
167167
expect(wm.getBySession("s1")[0].content).toBe("S1 entry");
168168
});
169+
170+
it("prune with negative maxEntries does not delete entries", () => {
171+
wm.add({ sessionId: "s1", content: "Entry 1", contentType: "note" });
172+
wm.add({ sessionId: "s1", content: "Entry 2", contentType: "note" });
173+
174+
const removed = wm.prune("s1", -5);
175+
expect(removed).toBe(0);
176+
expect(wm.getBySession("s1")).toHaveLength(2);
177+
});
169178
});
170179

171180
// ─── Episodic Memory Tests ────────────────────────────────────
@@ -242,6 +251,30 @@ describe("EpisodicMemoryManager", () => {
242251
}
243252
expect(ep.getRecent("s1", 3)).toHaveLength(3);
244253
});
254+
255+
it("prune with zero or negative retentionDays does not delete entries", () => {
256+
ep.record({ sessionId: "s1", eventType: "test", summary: "Recent event" });
257+
258+
expect(ep.prune(0)).toBe(0);
259+
expect(ep.prune(-30)).toBe(0);
260+
expect(ep.getRecent("s1")).toHaveLength(1);
261+
});
262+
263+
it("should escape SQL LIKE wildcards in search queries", () => {
264+
ep.record({ sessionId: "s1", eventType: "test", summary: "100% complete" });
265+
ep.record({ sessionId: "s1", eventType: "test", summary: "file_name test" });
266+
ep.record({ sessionId: "s1", eventType: "test", summary: "unrelated entry" });
267+
268+
// '%' in query should match literally, not as a wildcard
269+
const pctResults = ep.search("100%");
270+
expect(pctResults).toHaveLength(1);
271+
expect(pctResults[0].summary).toBe("100% complete");
272+
273+
// '_' in query should match literally, not as single-char wildcard
274+
const underResults = ep.search("file_name");
275+
expect(underResults).toHaveLength(1);
276+
expect(underResults[0].summary).toBe("file_name test");
277+
});
245278
});
246279

247280
// ─── Semantic Memory Tests ────────────────────────────────────
@@ -385,6 +418,22 @@ describe("ProceduralMemoryManager", () => {
385418
pm.delete("temp_proc");
386419
expect(pm.get("temp_proc")).toBeUndefined();
387420
});
421+
422+
it("should escape SQL LIKE wildcards in search queries", () => {
423+
pm.save({ name: "deploy_100%", description: "Full deploy", steps: [] });
424+
pm.save({ name: "deploy_app", description: "Standard deploy", steps: [] });
425+
426+
// '%' should match literally — only "deploy_100%" matches, not both
427+
const pctResults = pm.search("100%");
428+
expect(pctResults).toHaveLength(1);
429+
expect(pctResults[0].name).toBe("deploy_100%");
430+
431+
// '_' should match literally — "deploy_app" should not match "deploy.app"
432+
pm.save({ name: "deploy.app", description: "Dot deploy", steps: [] });
433+
const underResults = pm.search("deploy_app");
434+
expect(underResults).toHaveLength(1);
435+
expect(underResults[0].name).toBe("deploy_app");
436+
});
388437
});
389438

390439
// ─── Relationship Memory Tests ────────────────────────────────
@@ -640,6 +689,26 @@ describe("MemoryIngestionPipeline", () => {
640689
expect(decision).toBeTruthy();
641690
expect(decision!.content).toContain("edit_own_file");
642691
});
692+
693+
it("should record inbox sender interaction only once per turn, not per tool call", () => {
694+
// Simulate a turn from an agent message with multiple tool calls
695+
const turn = makeTurn({
696+
inputSource: "agent" as any,
697+
input: "[Message from 0xDEADBEEF]: Hello there",
698+
toolCalls: [
699+
makeToolCallResult({ id: "tc_1", name: "exec", result: "ok" }),
700+
makeToolCallResult({ id: "tc_2", name: "exec", result: "ok" }),
701+
makeToolCallResult({ id: "tc_3", name: "exec", result: "ok" }),
702+
],
703+
});
704+
pipeline.ingest("s1", turn, turn.toolCalls);
705+
706+
const rm = new RelationshipMemoryManager(db);
707+
const rel = rm.get("0xDEADBEEF");
708+
expect(rel).toBeTruthy();
709+
// Should have interaction_count of 0 (new record) — NOT inflated by N tool calls
710+
expect(rel!.interactionCount).toBe(0);
711+
});
643712
});
644713

645714
// ─── Turn Classification Tests ────────────────────────────────

src/__tests__/replication.test.ts

Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,16 @@
1010

1111
import { describe, it, expect, beforeEach, afterEach, vi } from "vitest";
1212
import { isValidWalletAddress, spawnChild } from "../replication/spawn.js";
13+
import { SandboxCleanup } from "../replication/cleanup.js";
14+
import { ChildLifecycle } from "../replication/lifecycle.js";
15+
import { pruneDeadChildren } from "../replication/lineage.js";
1316
import {
1417
MockConwayClient,
1518
createTestDb,
1619
createTestIdentity,
1720
} from "./mocks.js";
1821
import type { AutomatonDatabase, GenesisConfig } from "../types.js";
22+
import { MIGRATION_V7 } from "../state/schema.js";
1923

2024
// Mock fs for constitution propagation
2125
vi.mock("fs", async (importOriginal) => {
@@ -194,3 +198,110 @@ describe("spawnChild", () => {
194198
expect(deleteSpy).not.toHaveBeenCalled();
195199
});
196200
});
201+
202+
// ─── SandboxCleanup ──────────────────────────────────────────
203+
204+
describe("SandboxCleanup", () => {
205+
let conway: MockConwayClient;
206+
let db: AutomatonDatabase;
207+
let lifecycle: ChildLifecycle;
208+
209+
beforeEach(() => {
210+
conway = new MockConwayClient();
211+
db = createTestDb();
212+
// Apply lifecycle events migration
213+
db.raw.exec(MIGRATION_V7);
214+
lifecycle = new ChildLifecycle(db.raw);
215+
});
216+
217+
afterEach(() => {
218+
vi.restoreAllMocks();
219+
});
220+
221+
it("does not transition to cleaned_up when sandbox deletion fails", async () => {
222+
// Create a child and transition to stopped
223+
lifecycle.initChild("child-1", "test-child", "sandbox-1", "test prompt");
224+
lifecycle.transition("child-1", "sandbox_created", "created");
225+
lifecycle.transition("child-1", "runtime_ready", "ready");
226+
lifecycle.transition("child-1", "wallet_verified", "verified");
227+
lifecycle.transition("child-1", "funded", "funded");
228+
lifecycle.transition("child-1", "starting", "starting");
229+
lifecycle.transition("child-1", "healthy", "healthy");
230+
lifecycle.transition("child-1", "stopped", "stopped");
231+
232+
// Make deleteSandbox fail
233+
vi.spyOn(conway, "deleteSandbox").mockRejectedValue(new Error("API unavailable"));
234+
235+
const cleanup = new SandboxCleanup(conway, lifecycle, db.raw);
236+
237+
await expect(cleanup.cleanup("child-1")).rejects.toThrow("API unavailable");
238+
239+
// Child should still be in "stopped" state, NOT "cleaned_up"
240+
const state = lifecycle.getCurrentState("child-1");
241+
expect(state).toBe("stopped");
242+
});
243+
244+
it("transitions to cleaned_up when sandbox deletion succeeds", async () => {
245+
lifecycle.initChild("child-2", "test-child", "sandbox-2", "test prompt");
246+
lifecycle.transition("child-2", "sandbox_created", "created");
247+
lifecycle.transition("child-2", "runtime_ready", "ready");
248+
lifecycle.transition("child-2", "wallet_verified", "verified");
249+
lifecycle.transition("child-2", "funded", "funded");
250+
lifecycle.transition("child-2", "starting", "starting");
251+
lifecycle.transition("child-2", "healthy", "healthy");
252+
lifecycle.transition("child-2", "stopped", "stopped");
253+
254+
const cleanup = new SandboxCleanup(conway, lifecycle, db.raw);
255+
await cleanup.cleanup("child-2");
256+
257+
const state = lifecycle.getCurrentState("child-2");
258+
expect(state).toBe("cleaned_up");
259+
});
260+
});
261+
262+
// ─── pruneDeadChildren ──────────────────────────────────────
263+
264+
describe("pruneDeadChildren", () => {
265+
let db: AutomatonDatabase;
266+
let conway: MockConwayClient;
267+
268+
beforeEach(() => {
269+
db = createTestDb();
270+
db.raw.exec(MIGRATION_V7);
271+
conway = new MockConwayClient();
272+
});
273+
274+
afterEach(() => {
275+
vi.restoreAllMocks();
276+
});
277+
278+
function insertChild(id: string, name: string, status: string, createdAt: string): void {
279+
db.raw.prepare(
280+
`INSERT INTO children (id, name, address, sandbox_id, genesis_prompt, status, created_at)
281+
VALUES (?, ?, '0xabc', 'sandbox-${id}', 'prompt', ?, ?)`,
282+
).run(id, name, status, createdAt);
283+
}
284+
285+
it("attempts sandbox cleanup for children with dead status", async () => {
286+
// Insert 7 dead children (exceeds keepLast=5, so 2 should be pruned)
287+
for (let i = 0; i < 7; i++) {
288+
insertChild(`dead-${i}`, `child-${i}`, "dead", `2020-01-0${i + 1} 00:00:00`);
289+
}
290+
291+
// Create a mock cleanup that tracks calls
292+
const cleanupCalls: string[] = [];
293+
const mockCleanup = {
294+
cleanup: vi.fn(async (childId: string) => {
295+
cleanupCalls.push(childId);
296+
}),
297+
} as any;
298+
299+
const removed = await pruneDeadChildren(db, mockCleanup, 5);
300+
301+
// 2 oldest should be removed (dead-0 and dead-1)
302+
expect(removed).toBe(2);
303+
// cleanup.cleanup should have been called for "dead" children
304+
expect(cleanupCalls).toContain("dead-0");
305+
expect(cleanupCalls).toContain("dead-1");
306+
});
307+
});

0 commit comments

Comments
 (0)