Skip to content

Commit 7b1ead3

Browse files
authored
Merge pull request #370 from asynkron/codex/fix-virtual-agent-summary-output
Improve virtual agent summaries
2 parents 4240973 + b1cb208 commit 7b1ead3

File tree

3 files changed

+271
-28
lines changed

3 files changed

+271
-28
lines changed

packages/core/src/agent/__tests__/virtualCommandExecutor.test.ts

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ import { describe, expect, test, jest } from '@jest/globals';
33

44
import type { ResponsesClient } from '../../openai/responses.js';
55
import { createChatMessageEntry } from '../historyEntry.js';
6+
import { createObservationHistoryEntry } from '../historyMessageBuilder.js';
67
import type { PassExecutionBaseOptions } from '../loopSupport.js';
78
import type { ExecuteAgentPassOptions } from '../passExecutor/types.js';
89
import { createVirtualCommandExecutor } from '../virtualCommandExecutor.js';
@@ -82,6 +83,7 @@ describe('createVirtualCommandExecutor', () => {
8283

8384
expect(passExecutor).toHaveBeenCalledTimes(1);
8485
expect(outcome.result.exit_code).toBe(0);
86+
expect(outcome.result.stdout).toContain('Summary for "Virtual agent: research"');
8587
expect(outcome.result.stdout).toContain('virtual result summary');
8688
expect(outcome.executionDetails.type).toBe('VIRTUAL');
8789
for (const call of emitEvent.mock.calls) {
@@ -100,6 +102,51 @@ describe('createVirtualCommandExecutor', () => {
100102
}
101103
});
102104

105+
test('includes command observations when the assistant does not respond', async () => {
106+
const baseOptions = createBaseOptions();
107+
const emitEvent = jest.fn();
108+
const emitDebug = jest.fn();
109+
110+
const passExecutor = jest.fn(async (options: ExecuteAgentPassOptions) => {
111+
options.history.push(
112+
createObservationHistoryEntry({
113+
observation: {
114+
observation_for_llm: {
115+
stdout: 'README.md explains the CLI usage.',
116+
stderr: '',
117+
truncated: false,
118+
exit_code: 0,
119+
},
120+
observation_metadata: { timestamp: '2024-01-01T00:00:00.000Z' },
121+
},
122+
pass: options.passIndex,
123+
}),
124+
);
125+
return false;
126+
});
127+
128+
const executor = createVirtualCommandExecutor({
129+
systemPrompt: 'system prompt',
130+
baseOptions,
131+
passExecutor,
132+
createChatMessageEntryFn: createChatMessageEntry,
133+
emitEvent,
134+
emitDebug,
135+
createSubAgentLabel: () => 'SubAgent-observation',
136+
});
137+
138+
const outcome = await executor({
139+
command: { shell: 'openagent', run: 'virtual-agent explore {}' },
140+
descriptor: { action: 'explore', argument: '{}' },
141+
});
142+
143+
expect(outcome.result.exit_code).toBe(0);
144+
expect(outcome.result.stdout).toContain('Summary for "Virtual agent: explore"');
145+
expect(outcome.result.stdout).toContain('No assistant summary was produced. Review command results below.');
146+
expect(outcome.result.stdout).toContain('Command Results:');
147+
expect(outcome.result.stdout).toContain('README.md explains the CLI usage.');
148+
});
149+
103150
test('limits the number of passes when configured via JSON argument', async () => {
104151
const baseOptions = createBaseOptions();
105152
const emitEvent = jest.fn();

packages/core/src/agent/context.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@
6767
- `passExecutor.ts` now consolidates approval, execution safety, and plan snapshot helpers so the main loop reads linearly while emitting consistent status updates.
6868
- Pass executor unit tests now rely on `passExecutor/__testUtils__/passExecutor.ts` helpers (stored outside `__tests__` so Jest does not collect them as suites), keeping the primary spec focused on behavior assertions instead of repeated mock wiring.
6969
- `passExecutor/commandRuntime.ts` emits the active plan step snapshot alongside each `command-result` event so downstream UIs can display the parent step metadata with command output, races command approval/execution against ESC triggers so human cancellations surface a `'stop'` outcome immediately instead of marching through the remaining plan steps, and short-circuits ESC-triggered waits while finishing command result processing in the background.
70-
- `commandExecution.ts` now understands `openagent` shell commands with the `virtual-agent` prefix and routes them to an injected virtual command executor, enabling recursive/knowledge tasks without leaving the plan runtime. When no executor is configured the runtime emits a structured virtual-command error so plans can recover gracefully, truncating oversized arguments and pointing hosts at the `virtualCommandExecutor` hook. The agent loop now wires a default in-process executor that spins a scoped pass sequence and reports the collected assistant messages as the command result, so virtual commands behave like sub-agents by default. The executor defaults to 10 passes when callers omit a limit and now honors higher requested limits without imposing an artificial ceiling.
70+
- `commandExecution.ts` now understands `openagent` shell commands with the `virtual-agent` prefix and routes them to an injected virtual command executor, enabling recursive/knowledge tasks without leaving the plan runtime. When no executor is configured the runtime emits a structured virtual-command error so plans can recover gracefully, truncating oversized arguments and pointing hosts at the `virtualCommandExecutor` hook. The agent loop now wires a default in-process executor that spins a scoped pass sequence and reports the collected assistant messages as the command result, so virtual commands behave like sub-agents by default. The executor defaults to 10 passes when callers omit a limit and now honors higher requested limits without imposing an artificial ceiling. Virtual command results now consolidate the final assistant message with the recorded command observations so hosts receive a readable summary and the underlying stdout/stderr payloads instead of a bare command log.
7171
- `passExecutor.ts` explicitly treats human command rejections as a successful pass result, and the pass executor suite now includes a regression test to ensure the loop continues after vetoes.
7272
- `passExecutor/planRuntime/` now hosts dedicated helpers (`stateMachine/`, `initialization.ts`, `finalization.ts`, `idleHandlers.ts`, `effects.ts`, `persistence.ts`, `persistenceCoordinator.ts`, `runtimeController.ts`, `observationRecorder.ts`, `reminderController.ts`) so `planRuntime.ts` delegates mutations, persistence, and reminder tracking to focused modules. The runtime methods now return discriminated-union results with explicit side-effect descriptors that callers commit via `applyPlanRuntimeEffects`, shrinking the core class dramatically.
7373
- Persistence/plan state bridging helpers now live in `passExecutor/planRuntime/persistenceEffects.ts`, so initialization, idle-handling, and finalization modules compose persistence warnings/snapshots without hand-rolled duplication.

packages/core/src/agent/virtualCommandExecutor.ts

Lines changed: 223 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,21 @@ import type { PlanHistory } from './passExecutor/types.js';
1212
import type { EmitEvent } from './passExecutor/types.js';
1313
import type { DebugRuntimeEventPayload, EmitRuntimeEventOptions } from './runtimeTypes.js';
1414

15+
interface ObservationSummary {
16+
readonly summary: string | null;
17+
readonly details: string | null;
18+
readonly stdout: string;
19+
readonly stderr: string;
20+
readonly exitCode: number | null;
21+
readonly truncated: boolean;
22+
readonly truncationNotice: string | null;
23+
}
24+
25+
interface VirtualAgentFindings {
26+
readonly assistantMessages: string[];
27+
readonly observations: ObservationSummary[];
28+
}
29+
1530
interface VirtualAgentExecutorConfig {
1631
readonly systemPrompt: string;
1732
readonly baseOptions: PassExecutionBaseOptions;
@@ -104,6 +119,205 @@ const parseDescriptor = (descriptor: VirtualCommandDescriptor): ParsedVirtualDes
104119
} satisfies ParsedVirtualDescriptor;
105120
};
106121

122+
const toTrimmedString = (value: unknown): string | null => {
123+
if (typeof value !== 'string') {
124+
return null;
125+
}
126+
127+
const trimmed = value.trim();
128+
return trimmed.length > 0 ? trimmed : null;
129+
};
130+
131+
const toFiniteNumber = (value: unknown): number | null => {
132+
if (typeof value !== 'number' || !Number.isFinite(value)) {
133+
return null;
134+
}
135+
136+
return value;
137+
};
138+
139+
const parseObservationContent = (raw: string): ObservationSummary | null => {
140+
try {
141+
const parsed = JSON.parse(raw) as {
142+
type?: unknown;
143+
payload?: unknown;
144+
summary?: unknown;
145+
details?: unknown;
146+
} | null;
147+
148+
if (!parsed || typeof parsed !== 'object') {
149+
return null;
150+
}
151+
152+
const typeValue = toTrimmedString(parsed.type);
153+
if (typeValue !== 'observation') {
154+
return null;
155+
}
156+
157+
const payloadCandidate = parsed.payload as {
158+
stdout?: unknown;
159+
stderr?: unknown;
160+
exit_code?: unknown;
161+
truncated?: unknown;
162+
truncation_notice?: unknown;
163+
summary?: unknown;
164+
details?: unknown;
165+
} | null;
166+
167+
if (!payloadCandidate || typeof payloadCandidate !== 'object') {
168+
return null;
169+
}
170+
171+
const summary = toTrimmedString(parsed.summary)
172+
?? toTrimmedString(payloadCandidate.summary)
173+
?? null;
174+
const details = toTrimmedString(parsed.details)
175+
?? toTrimmedString(payloadCandidate.details)
176+
?? null;
177+
const stdout = typeof payloadCandidate.stdout === 'string' ? payloadCandidate.stdout : '';
178+
const stderr = typeof payloadCandidate.stderr === 'string' ? payloadCandidate.stderr : '';
179+
const exitCode = toFiniteNumber(payloadCandidate.exit_code);
180+
const truncationNotice = toTrimmedString(payloadCandidate.truncation_notice);
181+
const truncated = payloadCandidate.truncated === true;
182+
183+
return {
184+
summary,
185+
details,
186+
stdout,
187+
stderr,
188+
exitCode,
189+
truncated,
190+
truncationNotice,
191+
} satisfies ObservationSummary;
192+
} catch (error) {
193+
return null;
194+
}
195+
};
196+
197+
const collectFindings = (history: PlanHistory): VirtualAgentFindings => {
198+
const assistantMessages: string[] = [];
199+
const observations: ObservationSummary[] = [];
200+
201+
for (const entry of history) {
202+
if (!entry || typeof entry !== 'object') {
203+
continue;
204+
}
205+
206+
const payload = entry.payload as {
207+
role?: unknown;
208+
content?: unknown;
209+
} | undefined;
210+
211+
if (!payload || typeof payload !== 'object') {
212+
continue;
213+
}
214+
215+
const role = toTrimmedString(payload.role);
216+
if (role === 'assistant') {
217+
const content = toTrimmedString(payload.content);
218+
if (content) {
219+
assistantMessages.push(content);
220+
}
221+
continue;
222+
}
223+
224+
const contentValue = payload.content;
225+
if (typeof contentValue === 'string') {
226+
const observation = parseObservationContent(contentValue);
227+
if (observation) {
228+
observations.push(observation);
229+
}
230+
continue;
231+
}
232+
233+
if (
234+
contentValue &&
235+
typeof contentValue === 'object'
236+
) {
237+
const serialized = JSON.stringify(contentValue);
238+
const observation = parseObservationContent(serialized);
239+
if (observation) {
240+
observations.push(observation);
241+
}
242+
}
243+
}
244+
245+
return { assistantMessages, observations } satisfies VirtualAgentFindings;
246+
};
247+
248+
const indentBlock = (text: string): string =>
249+
text
250+
.split('\n')
251+
.map((line) => ` ${line}`)
252+
.join('\n');
253+
254+
const formatObservation = (
255+
observation: ObservationSummary,
256+
index: number,
257+
total: number,
258+
): string => {
259+
const lines: string[] = [];
260+
const headingPrefix = total > 1 ? `${index + 1}. ` : '';
261+
const headingBody = observation.summary ?? 'Command result';
262+
lines.push(`${headingPrefix}${headingBody}`);
263+
264+
if (typeof observation.exitCode === 'number') {
265+
lines.push(` Exit code: ${observation.exitCode}`);
266+
}
267+
268+
const trimmedStdout = observation.stdout.trim();
269+
if (trimmedStdout.length > 0) {
270+
lines.push(' Stdout:');
271+
lines.push(indentBlock(trimmedStdout));
272+
}
273+
274+
const trimmedStderr = observation.stderr.trim();
275+
if (trimmedStderr.length > 0) {
276+
lines.push(' Stderr:');
277+
lines.push(indentBlock(trimmedStderr));
278+
}
279+
280+
if (observation.truncated) {
281+
const notice = observation.truncationNotice ?? 'Output truncated.';
282+
lines.push(` Notice: ${notice}`);
283+
} else if (observation.truncationNotice) {
284+
lines.push(` Notice: ${observation.truncationNotice}`);
285+
}
286+
287+
if (observation.details) {
288+
lines.push(` Details: ${observation.details}`);
289+
}
290+
291+
return lines.join('\n');
292+
};
293+
294+
const buildStdoutFromFindings = (
295+
taskLabel: string,
296+
findings: VirtualAgentFindings,
297+
): string => {
298+
const sections: string[] = [];
299+
const assistantCount = findings.assistantMessages.length;
300+
const summaryText =
301+
assistantCount > 0
302+
? findings.assistantMessages[assistantCount - 1]
303+
: 'No assistant summary was produced. Review command results below.';
304+
305+
const summarySection = [`Summary for "${taskLabel}":`, summaryText].join('\n');
306+
sections.push(summarySection);
307+
308+
if (findings.observations.length > 0) {
309+
const observationLines: string[] = [];
310+
observationLines.push('Command Results:');
311+
for (let index = 0; index < findings.observations.length; index += 1) {
312+
const formatted = formatObservation(findings.observations[index], index, findings.observations.length);
313+
observationLines.push(formatted);
314+
}
315+
sections.push(observationLines.join('\n'));
316+
}
317+
318+
return sections.join('\n\n').trim();
319+
};
320+
107321
const buildInitialHistory = (
108322
config: VirtualAgentExecutorConfig,
109323
parsed: ParsedVirtualDescriptor,
@@ -206,42 +420,23 @@ const cloneBaseOptions = (
206420
return cloned;
207421
};
208422

209-
const collectAssistantMessages = (history: PlanHistory): string[] => {
210-
const outputs: string[] = [];
211-
for (const entry of history) {
212-
if (!entry || typeof entry !== 'object') {
213-
continue;
214-
}
215-
const payload = (entry as { payload?: unknown }).payload;
216-
if (!payload || typeof payload !== 'object') {
217-
continue;
218-
}
219-
const role = (payload as { role?: unknown }).role;
220-
if (role !== 'assistant') {
221-
continue;
222-
}
223-
const content = (payload as { content?: unknown }).content;
224-
if (typeof content === 'string' && content.trim()) {
225-
outputs.push(content.trim());
226-
}
227-
}
228-
return outputs;
229-
};
230-
231423
const buildResult = (
232424
command: VirtualCommandExecutionContext['command'],
233425
descriptor: VirtualCommandDescriptor,
234426
history: PlanHistory,
235427
passesExecuted: number,
236428
maxPasses: number,
429+
taskLabel: string,
237430
failure: string | null,
238431
runtimeMs: number,
239432
): CommandExecutionResult => {
240-
const assistantOutputs = collectAssistantMessages(history);
241-
const stdout = assistantOutputs.length > 0 ? assistantOutputs.join('\n\n---\n\n') : '';
242-
const success = !failure && stdout.length > 0;
243-
244-
const normalizedFailure = success ? null : failure ?? 'Virtual agent did not produce a response.';
433+
const findings = collectFindings(history);
434+
const hasResults = findings.assistantMessages.length > 0 || findings.observations.length > 0;
435+
const normalizedFailure = !failure && hasResults
436+
? null
437+
: failure ?? 'Virtual agent did not produce a response.';
438+
const stdout = normalizedFailure ? '' : buildStdoutFromFindings(taskLabel, findings);
439+
const success = normalizedFailure === null && stdout.length > 0;
245440

246441
const result = {
247442
stdout: success ? stdout : '',
@@ -338,6 +533,7 @@ export const createVirtualCommandExecutor = (
338533
history,
339534
passesExecuted,
340535
parsed.maxPasses,
536+
parsed.summary,
341537
failure,
342538
runtimeMs,
343539
);

0 commit comments

Comments
 (0)