Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/loose-adults-glow.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"@upstash/box": minor
---

add toolCallId and first class tool result event to streaming chunks #122
115 changes: 110 additions & 5 deletions packages/sdk/src/__tests__/box-agent-run.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,12 @@ describe("box.agent.run", () => {

it("calls onToolUse callback", async () => {
const { box, fetchMock } = await createTestBox();
const tools: Array<{ name: string; input: Record<string, unknown> }> = [];
const tools: Array<{ toolCallId?: string; name: string; input: Record<string, unknown> }> = [];

fetchMock.mockResolvedValueOnce(
mockSSEResponse([
{ event: "run_start", data: { run_id: "r1" } },
{ event: "tool", data: { name: "Read", input: { path: "/test" } } },
{ event: "tool", data: { id: "tool-1", name: "Read", input: { path: "/test" } } },
{ event: "done", data: {} },
]),
);
Expand All @@ -45,9 +45,30 @@ describe("box.agent.run", () => {
});

expect(tools).toHaveLength(1);
expect(tools[0]!.toolCallId).toBe("tool-1");
expect(tools[0]!.name).toBe("Read");
});

it("calls onToolResult callback", async () => {
const { box, fetchMock } = await createTestBox();
const results: Array<{ toolCallId?: string; output: unknown }> = [];

fetchMock.mockResolvedValueOnce(
mockSSEResponse([
{ event: "run_start", data: { run_id: "r1" } },
{ event: "tool_result", data: { toolCallId: "tool-1", output: { ok: true } } },
{ event: "done", data: {} },
]),
);

await box.agent.run({
prompt: "test",
onToolResult: (result) => results.push(result),
});

expect(results).toEqual([{ toolCallId: "tool-1", output: { ok: true } }]);
});

it("parses structured output with responseSchema", async () => {
const { box, fetchMock } = await createTestBox();

Expand Down Expand Up @@ -437,12 +458,12 @@ describe("box.agent.stream", () => {

it("yields tool-call chunks and calls onToolUse", async () => {
const { box, fetchMock } = await createTestBox();
const tools: Array<{ name: string; input: Record<string, unknown> }> = [];
const tools: Array<{ toolCallId?: string; name: string; input: Record<string, unknown> }> = [];

fetchMock.mockResolvedValueOnce(
mockSSEResponse([
{ event: "run_start", data: { run_id: "r1" } },
{ event: "tool", data: { name: "Write", input: { path: "/x" } } },
{ event: "tool", data: { id: "tool-2", name: "Write", input: { path: "/x" } } },
{ event: "text", data: { text: "done" } },
{ event: "done", data: {} },
]),
Expand All @@ -458,15 +479,97 @@ describe("box.agent.stream", () => {
}

expect(tools).toHaveLength(1);
expect(tools[0]!.toolCallId).toBe("tool-2");
expect(tools[0]!.name).toBe("Write");
const toolChunks = chunks.filter((c) => c.type === "tool-call");
expect(toolChunks).toHaveLength(1);
expect(toolChunks[0]).toEqual({
type: "tool-call",
toolCallId: "tool-2",
toolName: "Write",
input: { path: "/x" },
});
const textChunks = chunks.filter(
(c): c is Extract<Chunk, { type: "text-delta" }> => c.type === "text-delta",
);
expect(textChunks.map((c) => c.text)).toEqual(["done"]);
});

it("yields tool-result chunks and calls onToolResult", async () => {
const { box, fetchMock } = await createTestBox();
const results: Array<{ toolCallId?: string; output: unknown }> = [];

fetchMock.mockResolvedValueOnce(
mockSSEResponse([
{ event: "run_start", data: { run_id: "r1" } },
{ event: "tool_result", data: { tool_use_id: "tool-3", output: "ok" } },
{ event: "done", data: {} },
]),
);

const run = await box.agent.stream({
prompt: "test",
onToolResult: (result) => results.push(result),
});
const chunks: Chunk[] = [];
for await (const chunk of run) {
chunks.push(chunk);
}

expect(results).toEqual([{ toolCallId: "tool-3", output: "ok" }]);
expect(chunks).toContainEqual({
type: "tool-result",
toolCallId: "tool-3",
output: "ok",
});
});

it("prefers explicit tool call identifiers over generic ids", async () => {
const { box, fetchMock } = await createTestBox();

fetchMock.mockResolvedValueOnce(
mockSSEResponse([
{ event: "run_start", data: { run_id: "r1" } },
{
event: "tool",
data: {
id: "event-id",
tool_use_id: "tool-use-id",
name: "Read",
input: { path: "/x" },
},
},
{
event: "tool_result",
data: {
id: "result-event-id",
toolCallId: "tool-call-id",
output: "ok",
},
},
{ event: "done", data: {} },
]),
);

const run = await box.agent.stream({ prompt: "test" });
const chunks: Chunk[] = [];
for await (const chunk of run) {
chunks.push(chunk);
}

expect(chunks).toContainEqual({
type: "tool-call",
toolCallId: "tool-use-id",
toolName: "Read",
input: { path: "/x" },
});
expect(chunks).toContainEqual({
type: "tool-result",
toolCallId: "tool-call-id",
output: "ok",
});
});

it("yields all chunk types in order", async () => {
const { box, fetchMock } = await createTestBox();

Expand All @@ -475,7 +578,8 @@ describe("box.agent.stream", () => {
{ event: "run_start", data: { run_id: "r1" } },
{ event: "text", data: { text: "Hello " } },
{ event: "thinking", data: { text: "trace" } },
{ event: "tool", data: { name: "Write", input: { path: "/x" } } },
{ event: "tool", data: { toolCallId: "tool-4", name: "Write", input: { path: "/x" } } },
{ event: "tool_result", data: { tool_use_id: "tool-4", output: "done" } },
{
event: "done",
data: { output: "Hello world", input_tokens: 7, output_tokens: 9, session_id: "s1" },
Expand All @@ -495,6 +599,7 @@ describe("box.agent.stream", () => {
"text-delta",
"reasoning",
"tool-call",
"tool-result",
"finish",
"stats",
]);
Expand Down
44 changes: 42 additions & 2 deletions packages/sdk/src/client.ts
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,14 @@ function toBackendAgentOptions(
return mapped;
}

function resolveToolCallId(parsed: Record<string, unknown>): string | undefined {
if (typeof parsed.tool_call_id === "string") return parsed.tool_call_id;
if (typeof parsed.tool_use_id === "string") return parsed.tool_use_id;
if (typeof parsed.toolCallId === "string") return parsed.toolCallId;
if (typeof parsed.id === "string") return parsed.id;
return undefined;
}

/**
* Error thrown by the Box SDK
*/
Expand Down Expand Up @@ -943,7 +951,20 @@ export class Box<TProvider = unknown> {
break;
}
case "tool": {
options.onToolUse?.({ name: parsed.name, input: parsed.input });
const toolCallId = resolveToolCallId(parsed);
options.onToolUse?.({
toolCallId,
name: parsed.name ?? "",
input: parsed.input ?? {},
});
break;
}
case "tool_result": {
const toolCallId = resolveToolCallId(parsed);
options.onToolResult?.({
toolCallId,
output: parsed.output,
});
break;
}
case "done": {
Expand Down Expand Up @@ -1095,12 +1116,31 @@ export class Box<TProvider = unknown> {
return null;
}
case "tool": {
const toolCallId = resolveToolCallId(parsed);
const chunk: Chunk = {
type: "tool-call",
toolCallId,
toolName: parsed.name ?? "",
input: parsed.input ?? {},
};
options.onToolUse?.({ name: parsed.name ?? "", input: parsed.input ?? {} });
options.onToolUse?.({
toolCallId,
name: parsed.name ?? "",
input: parsed.input ?? {},
});
return chunk;
}
case "tool_result": {
const toolCallId = resolveToolCallId(parsed);
const chunk: Chunk = {
type: "tool-result",
toolCallId,
output: parsed.output,
};
options.onToolResult?.({
toolCallId,
output: parsed.output,
});
return chunk;
}
case "done": {
Expand Down
11 changes: 8 additions & 3 deletions packages/sdk/src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -428,7 +428,8 @@ export type Chunk =
| { type: "start"; runId: string }
| { type: "text-delta"; text: string }
| { type: "reasoning"; text: string }
| { type: "tool-call"; toolName: string; input: Record<string, unknown> }
| { type: "tool-call"; toolCallId?: string; toolName: string; input: Record<string, unknown> }
| { type: "tool-result"; toolCallId?: string; output: unknown }
| {
type: "finish";
output: string;
Expand Down Expand Up @@ -474,7 +475,9 @@ export interface StreamOptions<TProvider = unknown> {
/** Timeout in milliseconds — aborts if exceeded */
timeout?: number;
/** Tool use callback — called when the agent invokes a tool (Read, Write, Bash, etc.) */
onToolUse?: (tool: { name: string; input: Record<string, unknown> }) => void;
onToolUse?: (tool: { toolCallId?: string; name: string; input: Record<string, unknown> }) => void;
/** Tool result callback — called when a tool invocation completes */
onToolResult?: (result: { toolCallId?: string; output: unknown }) => void;
}

/**
Expand All @@ -494,7 +497,9 @@ export interface RunOptions<T = undefined, TProvider = unknown> {
/** Retries with exponential backoff on transient failures */
maxRetries?: number;
/** Tool use callback — called when the agent invokes a tool (Read, Write, Bash, etc.) */
onToolUse?: (tool: { name: string; input: Record<string, unknown> }) => void;
onToolUse?: (tool: { toolCallId?: string; name: string; input: Record<string, unknown> }) => void;
/** Tool result callback — called when a tool invocation completes */
onToolResult?: (result: { toolCallId?: string; output: unknown }) => void;
/** Webhook — fire-and-forget, POST to URL on completion */
webhook?: WebhookConfig;
}
Expand Down
Loading