Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/loose-adults-glow.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"@upstash/box": minor
---

add toolCallId and first class tool result event to streaming chunks #122
115 changes: 110 additions & 5 deletions packages/sdk/src/__tests__/box-agent-run.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,12 @@ describe("box.agent.run", () => {

it("calls onToolUse callback", async () => {
const { box, fetchMock } = await createTestBox();
const tools: Array<{ name: string; input: Record<string, unknown> }> = [];
const tools: Array<{ toolCallId?: string; name: string; input: Record<string, unknown> }> = [];

fetchMock.mockResolvedValueOnce(
mockSSEResponse([
{ event: "run_start", data: { run_id: "r1" } },
{ event: "tool", data: { name: "Read", input: { path: "/test" } } },
{ event: "tool", data: { id: "tool-1", name: "Read", input: { path: "/test" } } },
{ event: "done", data: {} },
]),
);
Expand All @@ -45,9 +45,30 @@ describe("box.agent.run", () => {
});

expect(tools).toHaveLength(1);
expect(tools[0]!.toolCallId).toBe("tool-1");
expect(tools[0]!.name).toBe("Read");
});

it("calls onToolResult callback", async () => {
const { box, fetchMock } = await createTestBox();
const results: Array<{ toolCallId?: string; output: unknown }> = [];

fetchMock.mockResolvedValueOnce(
mockSSEResponse([
{ event: "run_start", data: { run_id: "r1" } },
{ event: "tool_result", data: { toolCallId: "tool-1", output: { ok: true } } },
{ event: "done", data: {} },
]),
);

await box.agent.run({
prompt: "test",
onToolResult: (result) => results.push(result),
});

expect(results).toEqual([{ toolCallId: "tool-1", output: { ok: true } }]);
});

it("parses structured output with responseSchema", async () => {
const { box, fetchMock } = await createTestBox();

Expand Down Expand Up @@ -437,12 +458,12 @@ describe("box.agent.stream", () => {

it("yields tool-call chunks and calls onToolUse", async () => {
const { box, fetchMock } = await createTestBox();
const tools: Array<{ name: string; input: Record<string, unknown> }> = [];
const tools: Array<{ toolCallId?: string; name: string; input: Record<string, unknown> }> = [];

fetchMock.mockResolvedValueOnce(
mockSSEResponse([
{ event: "run_start", data: { run_id: "r1" } },
{ event: "tool", data: { name: "Write", input: { path: "/x" } } },
{ event: "tool", data: { id: "tool-2", name: "Write", input: { path: "/x" } } },
{ event: "text", data: { text: "done" } },
{ event: "done", data: {} },
]),
Expand All @@ -458,15 +479,97 @@ describe("box.agent.stream", () => {
}

expect(tools).toHaveLength(1);
expect(tools[0]!.toolCallId).toBe("tool-2");
expect(tools[0]!.name).toBe("Write");
const toolChunks = chunks.filter((c) => c.type === "tool-call");
expect(toolChunks).toHaveLength(1);
expect(toolChunks[0]).toEqual({
type: "tool-call",
toolCallId: "tool-2",
toolName: "Write",
input: { path: "/x" },
});
const textChunks = chunks.filter(
(c): c is Extract<Chunk, { type: "text-delta" }> => c.type === "text-delta",
);
expect(textChunks.map((c) => c.text)).toEqual(["done"]);
});

it("yields tool-result chunks and calls onToolResult", async () => {
const { box, fetchMock } = await createTestBox();
const results: Array<{ toolCallId?: string; output: unknown }> = [];

fetchMock.mockResolvedValueOnce(
mockSSEResponse([
{ event: "run_start", data: { run_id: "r1" } },
{ event: "tool_result", data: { tool_use_id: "tool-3", output: "ok" } },
{ event: "done", data: {} },
]),
);

const run = await box.agent.stream({
prompt: "test",
onToolResult: (result) => results.push(result),
});
const chunks: Chunk[] = [];
for await (const chunk of run) {
chunks.push(chunk);
}

expect(results).toEqual([{ toolCallId: "tool-3", output: "ok" }]);
expect(chunks).toContainEqual({
type: "tool-result",
toolCallId: "tool-3",
output: "ok",
});
});

it("prefers explicit tool call identifiers over generic ids", async () => {
const { box, fetchMock } = await createTestBox();

fetchMock.mockResolvedValueOnce(
mockSSEResponse([
{ event: "run_start", data: { run_id: "r1" } },
{
event: "tool",
data: {
id: "event-id",
tool_use_id: "tool-use-id",
name: "Read",
input: { path: "/x" },
},
},
{
event: "tool_result",
data: {
id: "result-event-id",
toolCallId: "tool-call-id",
output: "ok",
},
},
{ event: "done", data: {} },
]),
);

const run = await box.agent.stream({ prompt: "test" });
const chunks: Chunk[] = [];
for await (const chunk of run) {
chunks.push(chunk);
}

expect(chunks).toContainEqual({
type: "tool-call",
toolCallId: "tool-use-id",
toolName: "Read",
input: { path: "/x" },
});
expect(chunks).toContainEqual({
type: "tool-result",
toolCallId: "tool-call-id",
output: "ok",
});
});

it("yields all chunk types in order", async () => {
const { box, fetchMock } = await createTestBox();

Expand All @@ -475,7 +578,8 @@ describe("box.agent.stream", () => {
{ event: "run_start", data: { run_id: "r1" } },
{ event: "text", data: { text: "Hello " } },
{ event: "thinking", data: { text: "trace" } },
{ event: "tool", data: { name: "Write", input: { path: "/x" } } },
{ event: "tool", data: { toolCallId: "tool-4", name: "Write", input: { path: "/x" } } },
{ event: "tool_result", data: { tool_use_id: "tool-4", output: "done" } },
{
event: "done",
data: { output: "Hello world", input_tokens: 7, output_tokens: 9, session_id: "s1" },
Expand All @@ -495,6 +599,7 @@ describe("box.agent.stream", () => {
"text-delta",
"reasoning",
"tool-call",
"tool-result",
"finish",
"stats",
]);
Expand Down
40 changes: 38 additions & 2 deletions packages/sdk/src/client.ts
Original file line number Diff line number Diff line change
Expand Up @@ -943,7 +943,22 @@ export class Box<TProvider = unknown> {
break;
}
case "tool": {
options.onToolUse?.({ name: parsed.name, input: parsed.input });
const toolCallId =
parsed.tool_call_id ?? parsed.tool_use_id ?? parsed.toolCallId ?? parsed.id;
options.onToolUse?.({
toolCallId,
name: parsed.name ?? "",
input: parsed.input ?? {},
});
Comment thread
buggyhunter marked this conversation as resolved.
Outdated
break;
}
case "tool_result": {
const toolCallId =
parsed.tool_call_id ?? parsed.tool_use_id ?? parsed.toolCallId ?? parsed.id;
options.onToolResult?.({
toolCallId,
output: parsed.output,
});
break;
}
case "done": {
Expand Down Expand Up @@ -1095,12 +1110,33 @@ export class Box<TProvider = unknown> {
return null;
}
case "tool": {
const toolCallId =
parsed.tool_call_id ?? parsed.tool_use_id ?? parsed.toolCallId ?? parsed.id;
const chunk: Chunk = {
type: "tool-call",
toolCallId,
toolName: parsed.name ?? "",
input: parsed.input ?? {},
};
Comment thread
buggyhunter marked this conversation as resolved.
Outdated
options.onToolUse?.({ name: parsed.name ?? "", input: parsed.input ?? {} });
options.onToolUse?.({
toolCallId,
name: parsed.name ?? "",
input: parsed.input ?? {},
});
return chunk;
}
case "tool_result": {
const toolCallId =
parsed.tool_call_id ?? parsed.tool_use_id ?? parsed.toolCallId ?? parsed.id;
const chunk: Chunk = {
type: "tool-result",
toolCallId,
output: parsed.output,
};
options.onToolResult?.({
toolCallId,
output: parsed.output,
});
return chunk;
}
case "done": {
Expand Down
11 changes: 8 additions & 3 deletions packages/sdk/src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -428,7 +428,8 @@ export type Chunk =
| { type: "start"; runId: string }
| { type: "text-delta"; text: string }
| { type: "reasoning"; text: string }
| { type: "tool-call"; toolName: string; input: Record<string, unknown> }
| { type: "tool-call"; toolCallId?: string; toolName: string; input: Record<string, unknown> }
| { type: "tool-result"; toolCallId?: string; output: unknown }
| {
type: "finish";
output: string;
Expand Down Expand Up @@ -474,7 +475,9 @@ export interface StreamOptions<TProvider = unknown> {
/** Timeout in milliseconds — aborts if exceeded */
timeout?: number;
/** Tool use callback — called when the agent invokes a tool (Read, Write, Bash, etc.) */
onToolUse?: (tool: { name: string; input: Record<string, unknown> }) => void;
onToolUse?: (tool: { toolCallId?: string; name: string; input: Record<string, unknown> }) => void;
/** Tool result callback — called when a tool invocation completes */
onToolResult?: (result: { toolCallId?: string; output: unknown }) => void;
}

/**
Expand All @@ -494,7 +497,9 @@ export interface RunOptions<T = undefined, TProvider = unknown> {
/** Retries with exponential backoff on transient failures */
maxRetries?: number;
/** Tool use callback — called when the agent invokes a tool (Read, Write, Bash, etc.) */
onToolUse?: (tool: { name: string; input: Record<string, unknown> }) => void;
onToolUse?: (tool: { toolCallId?: string; name: string; input: Record<string, unknown> }) => void;
/** Tool result callback — called when a tool invocation completes */
onToolResult?: (result: { toolCallId?: string; output: unknown }) => void;
/** Webhook — fire-and-forget, POST to URL on completion */
webhook?: WebhookConfig;
}
Expand Down
Loading