Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
107 changes: 107 additions & 0 deletions .changeset/native-feedback-helper.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
---
"@voltagent/core": minor
---

feat: add eval feedback helper for onResult callbacks and VoltOps feedback client support

Example usage:

```ts
import { Agent, buildScorer } from "@voltagent/core";
import { openai } from "@ai-sdk/openai";

const taskTypeScorer = buildScorer({
id: "task-type",
label: "Task Type",
})
.score(async ({ payload }) => {
const text = String(payload.input ?? payload.output ?? "");
const label = text.toLowerCase().includes("billing") ? "billing" : "general";
return {
score: label === "billing" ? 1 : 0.5,
metadata: { label },
};
})
.build();

const agent = new Agent({
name: "support",
model: openai("gpt-4o-mini"),
eval: {
scorers: {
taskType: {
scorer: taskTypeScorer,
onResult: async ({ result, feedback }) => {
await feedback.save({
key: "task_type",
value: result.metadata?.label ?? null,
score: result.score ?? null,
feedbackSourceType: "model",
feedbackSource: { type: "model", metadata: { scorerId: result.scorerId } },
});
},
},
},
},
});
```

LLM judge example:

```ts
import { Agent, buildScorer } from "@voltagent/core";
import { openai } from "@ai-sdk/openai";
import { z } from "zod";

const judgeModel = openai("gpt-4o-mini");
const judgeSchema = z.object({
score: z.number().min(0).max(1),
label: z.string(),
reason: z.string().optional(),
});

const satisfactionJudge = buildScorer({
id: "satisfaction-judge",
label: "Satisfaction Judge",
})
.score(async ({ payload }) => {
const prompt = `Score user satisfaction (0-1) and label it.
User: ${payload.input}
Assistant: ${payload.output}`;
const judge = new Agent({
name: "satisfaction-judge",
model: judgeModel,
instructions: "Return JSON with score and label.",
});
const response = await judge.generateObject(prompt, judgeSchema);
return {
score: response.object.score,
metadata: {
label: response.object.label,
reason: response.object.reason ?? null,
},
};
})
.build();

const agent = new Agent({
name: "support",
model: openai("gpt-4o-mini"),
eval: {
scorers: {
satisfaction: {
scorer: satisfactionJudge,
onResult: async ({ result, feedback }) => {
await feedback.save({
key: "satisfaction",
value: result.metadata?.label ?? null,
score: result.score ?? null,
comment: result.metadata?.reason ?? null,
feedbackSourceType: "model",
});
},
},
},
},
});
```
85 changes: 76 additions & 9 deletions examples/with-live-evals/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,11 @@ const observability = new VoltAgentObservability();

const judgeModel = openai("gpt-4o-mini");
const moderationModel = openai("gpt-4o-mini");
const helpfulnessJudgeAgent = new Agent({
name: "helpfulness-judge",
model: judgeModel,
instructions: "You evaluate helpfulness of responses",
});

const keywordMatchScorer = buildScorer({
id: "keyword-match",
Expand Down Expand Up @@ -62,6 +67,14 @@ const keywordMatchScorer = buildScorer({
})
.build();

const customScorer = buildScorer({
id: "response-length",
})
.score(() => {
return { score: 1 };
})
.build();

const HELPFULNESS_SCHEMA = z.object({
score: z.number().min(0).max(1).describe("Score from 0 to 1 for helpfulness"),
reason: z.string().describe("Explanation of the score"),
Expand Down Expand Up @@ -118,13 +131,7 @@ Assistant Response: ${context.payload.output}

Provide a score from 0 to 1 and explain your reasoning.`;

const agent = new Agent({
name: "helpfulness-judge",
model: judgeModel,
instructions: "You evaluate helpfulness of responses",
});

const response = await agent.generateObject(prompt, HELPFULNESS_SCHEMA);
const response = await helpfulnessJudgeAgent.generateObject(prompt, HELPFULNESS_SCHEMA);

const rawResults = context.results.raw;
rawResults.helpfulnessJudge = response.object;
Expand Down Expand Up @@ -243,6 +250,20 @@ const supportAgent = new Agent({
criteria:
"Reward answers that are specific to VoltAgent features and actionable guidance.",
},
onResult: async ({ result, feedback }) => {
await feedback.save({
key: "helpfulness",
score: result.score ?? null,
comment: typeof result.metadata?.reason === "string" ? result.metadata.reason : null,
feedbackSourceType: "model",
feedbackSource: {
type: "model",
metadata: {
scorerId: result.scorerId,
},
},
});
},
},
levenshtein: {
scorer: scorers.levenshtein,
Expand Down Expand Up @@ -275,15 +296,61 @@ const supportAgent = new Agent({
},
});

const singleEvalAgent = new Agent({
name: "single-eval-demo",
instructions: "You are a helpful assistant that answers questions about VoltAgent.",
model: openai("gpt-4o-mini"),
eval: {
sampling: { type: "ratio", rate: 1 },
scorers: {
responseLength: {
scorer: customScorer,
},
},
},
});

const scorerFeedbackAgent = new Agent({
name: "scorer-feedback-demo",
instructions: "You are a helpful assistant that answers questions about VoltAgent.",
model: openai("gpt-4o-mini"),
eval: {
sampling: { type: "ratio", rate: 1 },
scorers: {
"scorer-feedback": {
scorer: helpfulnessJudgeScorer,
onResult: async ({ result, feedback }) => {
await feedback.save({
key: "helpfulness",
score: result.score ?? null,
comment: typeof result.metadata?.reason === "string" ? result.metadata.reason : null,
feedbackSourceType: "model",
feedbackSource: {
type: "model",
metadata: {
scorerId: result.scorerId,
},
},
});
},
},
},
},
});

new VoltAgent({
agents: { support: supportAgent },
agents: {
support: supportAgent,
singleEval: singleEvalAgent,
scorerFeedback: scorerFeedbackAgent,
},
server: honoServer(),
observability,
});

(async () => {
const question = "How can I enable live eval scorers in VoltAgent?";
const result = await supportAgent.generateText(question);
const result = await singleEvalAgent.generateText(question);

console.log("Question:\n", question, "\n");
console.log("Agent response:\n", result.text, "\n");
Expand Down
10 changes: 10 additions & 0 deletions packages/core/src/agent/agent.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2686,6 +2686,16 @@ export class Agent {
logger: this.logger,
evalConfig: this.evalConfig,
getObservability: () => this.getObservability(),
getVoltOpsClient: () => {
const client = this.voltOpsClient || AgentRegistry.getInstance().getGlobalVoltOpsClient();
if (!client || typeof client.hasValidKeys !== "function") {
return undefined;
}
if (!client.hasValidKeys()) {
return undefined;
}
return client;
},
};
}

Expand Down
72 changes: 71 additions & 1 deletion packages/core/src/agent/eval.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,16 @@ import {
} from "../eval/runtime";
import type { VoltAgentObservability } from "../observability";
import { randomUUID } from "../utils/id";
import type { VoltOpsClient } from "../voltops/client";
import type {
AgentEvalConfig,
AgentEvalContext,
AgentEvalFeedbackHelper,
AgentEvalFeedbackSaveInput,
AgentEvalOperationType,
AgentEvalPayload,
AgentEvalResult,
AgentEvalResultCallbackArgs,
AgentEvalScorerConfig,
OperationContext,
} from "./types";
Expand Down Expand Up @@ -254,6 +258,7 @@ export interface AgentEvalHost {
readonly logger: Logger;
readonly evalConfig?: AgentEvalConfig;
getObservability(): VoltAgentObservability;
getVoltOpsClient?: () => VoltOpsClient | undefined;
}

export interface EnqueueEvalScoringArgs {
Expand Down Expand Up @@ -1092,11 +1097,76 @@ async function invokeEvalResultCallback(
}

try {
await config.onResult(result);
const feedback = createEvalFeedbackHelper(host, result);
const payload: AgentEvalResultCallbackArgs = {
...result,
result,
feedback,
};
await config.onResult(payload);
} catch (error) {
host.logger.warn(`[Agent:${host.name}] Eval scorer onResult callback failed`, {
error: error instanceof Error ? error.message : error,
scorerId: result.scorerId,
});
}
}

function createEvalFeedbackHelper(
host: AgentEvalHost,
result: AgentEvalResult,
): AgentEvalFeedbackHelper {
return {
save: async (input: AgentEvalFeedbackSaveInput) => {
const rawKey = typeof input.key === "string" ? input.key.trim() : "";
if (!rawKey) {
throw new Error("feedback key is required");
}

const traceId = input.traceId ?? result.payload.traceId;
if (!traceId) {
throw new Error("feedback traceId is required");
}

const client = resolveEvalFeedbackClient(host);
if (!client) {
host.logger.debug("Eval feedback save skipped: VoltOps client unavailable", {
scorerId: result.scorerId,
traceId,
});
return null;
}

return await client.createFeedback({
traceId,
key: rawKey,
id: input.id,
score: input.score,
value: input.value,
correction: input.correction,
comment: input.comment,
feedbackConfig: input.feedbackConfig,
feedbackSource: input.feedbackSource,
feedbackSourceType: input.feedbackSourceType,
createdAt: input.createdAt,
});
},
};
}

function resolveEvalFeedbackClient(host: AgentEvalHost): VoltOpsClient | undefined {
if (!host.getVoltOpsClient) {
return undefined;
}

const client = host.getVoltOpsClient();
if (!client) {
return undefined;
}

if (typeof client.hasValidKeys === "function" && !client.hasValidKeys()) {
return undefined;
}

return client;
}
17 changes: 16 additions & 1 deletion packages/core/src/agent/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,9 @@ import type {
DynamicValueOptions,
PromptContent,
PromptHelper,
VoltOpsFeedback,
VoltOpsFeedbackConfig,
VoltOpsFeedbackCreateInput,
VoltOpsFeedbackExpiresIn,
} from "../voltops/types";
import type { ContextInput } from "./agent";
Expand Down Expand Up @@ -560,6 +562,19 @@ export interface AgentEvalResult {
rawPayload: AgentEvalPayload;
}

export type AgentEvalFeedbackSaveInput = Omit<VoltOpsFeedbackCreateInput, "traceId"> & {
traceId?: string;
};

export type AgentEvalFeedbackHelper = {
save: (input: AgentEvalFeedbackSaveInput) => Promise<VoltOpsFeedback | null>;
};

export type AgentEvalResultCallbackArgs = AgentEvalResult & {
result: AgentEvalResult;
feedback: AgentEvalFeedbackHelper;
};

export interface AgentEvalScorerConfig {
scorer: AgentEvalScorerReference;
params?:
Expand All @@ -569,7 +584,7 @@ export interface AgentEvalScorerConfig {
) => AgentEvalParams | undefined | Promise<AgentEvalParams | undefined>);
sampling?: AgentEvalSamplingPolicy;
id?: string;
onResult?: (result: AgentEvalResult) => void | Promise<void>;
onResult?: (result: AgentEvalResultCallbackArgs) => void | Promise<void>;
buildPayload?: (
context: AgentEvalContext,
) => Record<string, unknown> | Promise<Record<string, unknown>>;
Expand Down
Loading