From 6b757abd024e4daf08633043a29209d978af75d7 Mon Sep 17 00:00:00 2001 From: Omer Aplak Date: Thu, 15 Jan 2026 16:33:08 -0800 Subject: [PATCH] feat: add eval feedback helper for onResult callbacks and VoltOps feedback --- .changeset/native-feedback-helper.md | 107 ++++++++++++++++++++++++++ examples/with-live-evals/src/index.ts | 85 +++++++++++++++++--- packages/core/src/agent/agent.ts | 10 +++ packages/core/src/agent/eval.ts | 72 ++++++++++++++++- packages/core/src/agent/types.ts | 17 +++- packages/core/src/index.ts | 3 + packages/core/src/voltops/client.ts | 39 ++++++++++ packages/core/src/voltops/index.ts | 2 + packages/core/src/voltops/types.ts | 34 ++++++++ 9 files changed, 358 insertions(+), 11 deletions(-) create mode 100644 .changeset/native-feedback-helper.md diff --git a/.changeset/native-feedback-helper.md b/.changeset/native-feedback-helper.md new file mode 100644 index 000000000..80f538435 --- /dev/null +++ b/.changeset/native-feedback-helper.md @@ -0,0 +1,107 @@ +--- +"@voltagent/core": minor +--- + +feat: add eval feedback helper for onResult callbacks and VoltOps feedback client support + +Example usage: + +```ts +import { Agent, buildScorer } from "@voltagent/core"; +import { openai } from "@ai-sdk/openai"; + +const taskTypeScorer = buildScorer({ + id: "task-type", + label: "Task Type", +}) + .score(async ({ payload }) => { + const text = String(payload.input ?? payload.output ?? ""); + const label = text.toLowerCase().includes("billing") ? "billing" : "general"; + return { + score: label === "billing" ? 1 : 0.5, + metadata: { label }, + }; + }) + .build(); + +const agent = new Agent({ + name: "support", + model: openai("gpt-4o-mini"), + eval: { + scorers: { + taskType: { + scorer: taskTypeScorer, + onResult: async ({ result, feedback }) => { + await feedback.save({ + key: "task_type", + value: result.metadata?.label ?? null, + score: result.score ?? null, + feedbackSourceType: "model", + feedbackSource: { type: "model", metadata: { scorerId: result.scorerId } }, + }); + }, + }, + }, + }, +}); +``` + +LLM judge example: + +```ts +import { Agent, buildScorer } from "@voltagent/core"; +import { openai } from "@ai-sdk/openai"; +import { z } from "zod"; + +const judgeModel = openai("gpt-4o-mini"); +const judgeSchema = z.object({ + score: z.number().min(0).max(1), + label: z.string(), + reason: z.string().optional(), +}); + +const satisfactionJudge = buildScorer({ + id: "satisfaction-judge", + label: "Satisfaction Judge", +}) + .score(async ({ payload }) => { + const prompt = `Score user satisfaction (0-1) and label it. +User: ${payload.input} +Assistant: ${payload.output}`; + const judge = new Agent({ + name: "satisfaction-judge", + model: judgeModel, + instructions: "Return JSON with score and label.", + }); + const response = await judge.generateObject(prompt, judgeSchema); + return { + score: response.object.score, + metadata: { + label: response.object.label, + reason: response.object.reason ?? null, + }, + }; + }) + .build(); + +const agent = new Agent({ + name: "support", + model: openai("gpt-4o-mini"), + eval: { + scorers: { + satisfaction: { + scorer: satisfactionJudge, + onResult: async ({ result, feedback }) => { + await feedback.save({ + key: "satisfaction", + value: result.metadata?.label ?? null, + score: result.score ?? null, + comment: result.metadata?.reason ?? null, + feedbackSourceType: "model", + }); + }, + }, + }, + }, +}); +``` diff --git a/examples/with-live-evals/src/index.ts b/examples/with-live-evals/src/index.ts index ad745dbb1..e496b379d 100644 --- a/examples/with-live-evals/src/index.ts +++ b/examples/with-live-evals/src/index.ts @@ -21,6 +21,11 @@ const observability = new VoltAgentObservability(); const judgeModel = openai("gpt-4o-mini"); const moderationModel = openai("gpt-4o-mini"); +const helpfulnessJudgeAgent = new Agent({ + name: "helpfulness-judge", + model: judgeModel, + instructions: "You evaluate helpfulness of responses", +}); const keywordMatchScorer = buildScorer({ id: "keyword-match", @@ -62,6 +67,14 @@ const keywordMatchScorer = buildScorer({ }) .build(); +const customScorer = buildScorer({ + id: "response-length", +}) + .score(() => { + return { score: 1 }; + }) + .build(); + const HELPFULNESS_SCHEMA = z.object({ score: z.number().min(0).max(1).describe("Score from 0 to 1 for helpfulness"), reason: z.string().describe("Explanation of the score"), @@ -118,13 +131,7 @@ Assistant Response: ${context.payload.output} Provide a score from 0 to 1 and explain your reasoning.`; - const agent = new Agent({ - name: "helpfulness-judge", - model: judgeModel, - instructions: "You evaluate helpfulness of responses", - }); - - const response = await agent.generateObject(prompt, HELPFULNESS_SCHEMA); + const response = await helpfulnessJudgeAgent.generateObject(prompt, HELPFULNESS_SCHEMA); const rawResults = context.results.raw; rawResults.helpfulnessJudge = response.object; @@ -243,6 +250,20 @@ const supportAgent = new Agent({ criteria: "Reward answers that are specific to VoltAgent features and actionable guidance.", }, + onResult: async ({ result, feedback }) => { + await feedback.save({ + key: "helpfulness", + score: result.score ?? null, + comment: typeof result.metadata?.reason === "string" ? result.metadata.reason : null, + feedbackSourceType: "model", + feedbackSource: { + type: "model", + metadata: { + scorerId: result.scorerId, + }, + }, + }); + }, }, levenshtein: { scorer: scorers.levenshtein, @@ -275,15 +296,61 @@ const supportAgent = new Agent({ }, }); +const singleEvalAgent = new Agent({ + name: "single-eval-demo", + instructions: "You are a helpful assistant that answers questions about VoltAgent.", + model: openai("gpt-4o-mini"), + eval: { + sampling: { type: "ratio", rate: 1 }, + scorers: { + responseLength: { + scorer: customScorer, + }, + }, + }, +}); + +const scorerFeedbackAgent = new Agent({ + name: "scorer-feedback-demo", + instructions: "You are a helpful assistant that answers questions about VoltAgent.", + model: openai("gpt-4o-mini"), + eval: { + sampling: { type: "ratio", rate: 1 }, + scorers: { + "scorer-feedback": { + scorer: helpfulnessJudgeScorer, + onResult: async ({ result, feedback }) => { + await feedback.save({ + key: "helpfulness", + score: result.score ?? null, + comment: typeof result.metadata?.reason === "string" ? result.metadata.reason : null, + feedbackSourceType: "model", + feedbackSource: { + type: "model", + metadata: { + scorerId: result.scorerId, + }, + }, + }); + }, + }, + }, + }, +}); + new VoltAgent({ - agents: { support: supportAgent }, + agents: { + support: supportAgent, + singleEval: singleEvalAgent, + scorerFeedback: scorerFeedbackAgent, + }, server: honoServer(), observability, }); (async () => { const question = "How can I enable live eval scorers in VoltAgent?"; - const result = await supportAgent.generateText(question); + const result = await singleEvalAgent.generateText(question); console.log("Question:\n", question, "\n"); console.log("Agent response:\n", result.text, "\n"); diff --git a/packages/core/src/agent/agent.ts b/packages/core/src/agent/agent.ts index b3657d20f..f7c750281 100644 --- a/packages/core/src/agent/agent.ts +++ b/packages/core/src/agent/agent.ts @@ -2686,6 +2686,16 @@ export class Agent { logger: this.logger, evalConfig: this.evalConfig, getObservability: () => this.getObservability(), + getVoltOpsClient: () => { + const client = this.voltOpsClient || AgentRegistry.getInstance().getGlobalVoltOpsClient(); + if (!client || typeof client.hasValidKeys !== "function") { + return undefined; + } + if (!client.hasValidKeys()) { + return undefined; + } + return client; + }, }; } diff --git a/packages/core/src/agent/eval.ts b/packages/core/src/agent/eval.ts index 9e4fe9f2e..5a1a9bec0 100644 --- a/packages/core/src/agent/eval.ts +++ b/packages/core/src/agent/eval.ts @@ -16,12 +16,16 @@ import { } from "../eval/runtime"; import type { VoltAgentObservability } from "../observability"; import { randomUUID } from "../utils/id"; +import type { VoltOpsClient } from "../voltops/client"; import type { AgentEvalConfig, AgentEvalContext, + AgentEvalFeedbackHelper, + AgentEvalFeedbackSaveInput, AgentEvalOperationType, AgentEvalPayload, AgentEvalResult, + AgentEvalResultCallbackArgs, AgentEvalScorerConfig, OperationContext, } from "./types"; @@ -254,6 +258,7 @@ export interface AgentEvalHost { readonly logger: Logger; readonly evalConfig?: AgentEvalConfig; getObservability(): VoltAgentObservability; + getVoltOpsClient?: () => VoltOpsClient | undefined; } export interface EnqueueEvalScoringArgs { @@ -1092,7 +1097,13 @@ async function invokeEvalResultCallback( } try { - await config.onResult(result); + const feedback = createEvalFeedbackHelper(host, result); + const payload: AgentEvalResultCallbackArgs = { + ...result, + result, + feedback, + }; + await config.onResult(payload); } catch (error) { host.logger.warn(`[Agent:${host.name}] Eval scorer onResult callback failed`, { error: error instanceof Error ? error.message : error, @@ -1100,3 +1111,62 @@ async function invokeEvalResultCallback( }); } } + +function createEvalFeedbackHelper( + host: AgentEvalHost, + result: AgentEvalResult, +): AgentEvalFeedbackHelper { + return { + save: async (input: AgentEvalFeedbackSaveInput) => { + const rawKey = typeof input.key === "string" ? input.key.trim() : ""; + if (!rawKey) { + throw new Error("feedback key is required"); + } + + const traceId = input.traceId ?? result.payload.traceId; + if (!traceId) { + throw new Error("feedback traceId is required"); + } + + const client = resolveEvalFeedbackClient(host); + if (!client) { + host.logger.debug("Eval feedback save skipped: VoltOps client unavailable", { + scorerId: result.scorerId, + traceId, + }); + return null; + } + + return await client.createFeedback({ + traceId, + key: rawKey, + id: input.id, + score: input.score, + value: input.value, + correction: input.correction, + comment: input.comment, + feedbackConfig: input.feedbackConfig, + feedbackSource: input.feedbackSource, + feedbackSourceType: input.feedbackSourceType, + createdAt: input.createdAt, + }); + }, + }; +} + +function resolveEvalFeedbackClient(host: AgentEvalHost): VoltOpsClient | undefined { + if (!host.getVoltOpsClient) { + return undefined; + } + + const client = host.getVoltOpsClient(); + if (!client) { + return undefined; + } + + if (typeof client.hasValidKeys === "function" && !client.hasValidKeys()) { + return undefined; + } + + return client; +} diff --git a/packages/core/src/agent/types.ts b/packages/core/src/agent/types.ts index 10bfbf44c..ff0225d62 100644 --- a/packages/core/src/agent/types.ts +++ b/packages/core/src/agent/types.ts @@ -34,7 +34,9 @@ import type { DynamicValueOptions, PromptContent, PromptHelper, + VoltOpsFeedback, VoltOpsFeedbackConfig, + VoltOpsFeedbackCreateInput, VoltOpsFeedbackExpiresIn, } from "../voltops/types"; import type { ContextInput } from "./agent"; @@ -560,6 +562,19 @@ export interface AgentEvalResult { rawPayload: AgentEvalPayload; } +export type AgentEvalFeedbackSaveInput = Omit & { + traceId?: string; +}; + +export type AgentEvalFeedbackHelper = { + save: (input: AgentEvalFeedbackSaveInput) => Promise; +}; + +export type AgentEvalResultCallbackArgs = AgentEvalResult & { + result: AgentEvalResult; + feedback: AgentEvalFeedbackHelper; +}; + export interface AgentEvalScorerConfig { scorer: AgentEvalScorerReference; params?: @@ -569,7 +584,7 @@ export interface AgentEvalScorerConfig { ) => AgentEvalParams | undefined | Promise); sampling?: AgentEvalSamplingPolicy; id?: string; - onResult?: (result: AgentEvalResult) => void | Promise; + onResult?: (result: AgentEvalResultCallbackArgs) => void | Promise; buildPayload?: ( context: AgentEvalContext, ) => Record | Promise>; diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index 1509affe1..6f5083550 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -174,6 +174,9 @@ export type { AgentEvalScorerFactory, AgentEvalScorerReference, AgentEvalResult, + AgentEvalResultCallbackArgs, + AgentEvalFeedbackHelper, + AgentEvalFeedbackSaveInput, AgentEvalSamplingPolicy, AgentEvalOperationType, AgentEvalPayload, diff --git a/packages/core/src/voltops/client.ts b/packages/core/src/voltops/client.ts index 6abf89e36..45bd37f1c 100644 --- a/packages/core/src/voltops/client.ts +++ b/packages/core/src/voltops/client.ts @@ -55,7 +55,9 @@ import type { VoltOpsEvalRunSummary, VoltOpsEvalsApi, VoltOpsFailEvalRunRequest, + VoltOpsFeedback, VoltOpsFeedbackConfig, + VoltOpsFeedbackCreateInput, VoltOpsFeedbackToken, VoltOpsFeedbackTokenCreateInput, VoltOpsPromptManager, @@ -282,6 +284,43 @@ export class VoltOpsClient implements IVoltOpsClient { }; } + public async createFeedback(input: VoltOpsFeedbackCreateInput): Promise { + const payload: Record = { + trace_id: input.traceId, + key: input.key, + }; + + if (input.id !== undefined) { + payload.id = input.id; + } + if (input.score !== undefined) { + payload.score = input.score; + } + if (input.value !== undefined) { + payload.value = input.value; + } + if (input.correction !== undefined) { + payload.correction = input.correction; + } + if (input.comment !== undefined) { + payload.comment = input.comment; + } + if (input.feedbackConfig !== undefined) { + payload.feedback_config = input.feedbackConfig; + } + if (input.feedbackSource !== undefined) { + payload.feedback_source = input.feedbackSource; + } + if (input.feedbackSourceType !== undefined) { + payload.feedback_source_type = input.feedbackSourceType; + } + if (input.createdAt !== undefined) { + payload.created_at = input.createdAt; + } + + return await this.request("POST", "/api/public/feedback", payload); + } + // getObservabilityExporter removed - observability now handled by VoltAgentObservability /** diff --git a/packages/core/src/voltops/index.ts b/packages/core/src/voltops/index.ts index 0a630ac96..cc0e8212d 100644 --- a/packages/core/src/voltops/index.ts +++ b/packages/core/src/voltops/index.ts @@ -36,6 +36,8 @@ export type { VoltOpsCreateScorerRequest, VoltOpsScorerSummary, VoltOpsFeedbackConfig, + VoltOpsFeedback, + VoltOpsFeedbackCreateInput, VoltOpsFeedbackExpiresIn, VoltOpsFeedbackToken, VoltOpsFeedbackTokenCreateInput, diff --git a/packages/core/src/voltops/types.ts b/packages/core/src/voltops/types.ts index 50cb8b35c..d0cbcddbb 100644 --- a/packages/core/src/voltops/types.ts +++ b/packages/core/src/voltops/types.ts @@ -153,6 +153,37 @@ export type VoltOpsFeedbackTokenCreateInput = { expiresIn?: VoltOpsFeedbackExpiresIn; }; +export type VoltOpsFeedbackCreateInput = { + traceId: string; + key: string; + id?: string; + score?: number | boolean | null; + value?: unknown; + correction?: unknown; + comment?: string | null; + feedbackConfig?: VoltOpsFeedbackConfig | null; + feedbackSource?: Record | null; + feedbackSourceType?: string; + createdAt?: Date | string; +}; + +export type VoltOpsFeedback = { + id: string; + trace_id: string; + key: string; + score?: number | boolean | null; + value?: unknown; + correction?: unknown; + comment?: string | null; + feedback_source?: Record | null; + feedback_source_type?: string | null; + feedback_config?: VoltOpsFeedbackConfig | null; + created_at?: string; + updated_at?: string; + source_info?: Record | null; + [key: string]: unknown; +}; + /** * Cached prompt data for performance optimization */ @@ -942,6 +973,9 @@ export interface VoltOpsClient { /** Create a feedback token for the given trace */ createFeedbackToken(input: VoltOpsFeedbackTokenCreateInput): Promise; + /** Create a feedback entry for the given trace */ + createFeedback(input: VoltOpsFeedbackCreateInput): Promise; + /** Create a prompt helper for agent instructions */ createPromptHelper(agentId: string, historyEntryId?: string): PromptHelper;