Skip to content

Commit 58d7b06

Browse files
committed
test(mcp): eval for generator agent
1 parent ce44b24 commit 58d7b06

File tree

8 files changed

+2014
-26
lines changed

8 files changed

+2014
-26
lines changed

.github/workflows/tests_mcp.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,3 +47,5 @@ jobs:
4747
flakiness-client-id: ${{ secrets.AZURE_FLAKINESS_DASHBOARD_CLIENT_ID }}
4848
flakiness-tenant-id: ${{ secrets.AZURE_FLAKINESS_DASHBOARD_TENANT_ID }}
4949
flakiness-subscription-id: ${{ secrets.AZURE_FLAKINESS_DASHBOARD_SUBSCRIPTION_ID }}
50+
env:
51+
DEBUG: "loop:*"

packages/playwright/src/agents/agent.ts

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@ import { Loop } from '../mcp/sdk/bundle';
1818

1919
import type z from 'zod';
2020
import type { Client } from '@modelcontextprotocol/sdk/client/index.js';
21-
import type { Tool } from '@modelcontextprotocol/sdk/types.js';
2221
import type * as tinyLoop from 'tiny-loop';
2322

2423
type Logger = (category: string, text: string, details?: string) => void;
@@ -33,14 +32,18 @@ export type AgentSpec = {
3332
examples: string[];
3433
};
3534

35+
type LoopOptions = ConstructorParameters<typeof tinyLoop.Loop>[1] & {
36+
loopName: 'copilot' | 'claude' | 'openai';
37+
};
38+
3639
export class Agent<T extends z.ZodSchema<any>> {
3740
readonly loop: tinyLoop.Loop;
3841
readonly spec: AgentSpec;
3942
readonly clients: Map<string, Client>;
40-
readonly resultSchema: Tool['inputSchema'];
43+
readonly resultSchema: tinyLoop.Schema;
4144

42-
constructor(loopName: 'copilot' | 'claude' | 'openai', spec: AgentSpec, clients: Map<string, Client>, resultSchema: Tool['inputSchema']) {
43-
this.loop = new Loop(loopName);
45+
constructor(loopOptions: LoopOptions, spec: AgentSpec, clients: Map<string, Client>, resultSchema: tinyLoop.Schema) {
46+
this.loop = new Loop(loopOptions.loopName, loopOptions);
4447
this.spec = spec;
4548
this.clients = clients;
4649
this.resultSchema = resultSchema;
@@ -52,9 +55,8 @@ export class Agent<T extends z.ZodSchema<any>> {
5255
try {
5356
return await this.loop.run<z.output<T>>(`${prompt}\n\nTask:\n${task}\n\nParams:\n${JSON.stringify(params, null, 2)}`, {
5457
...options,
55-
// TODO: fix types in tiny-loop
56-
tools: tools as any,
57-
callTool: callTool as any,
58+
tools,
59+
callTool,
5860
resultSchema: this.resultSchema
5961
});
6062
} finally {
@@ -65,15 +67,15 @@ export class Agent<T extends z.ZodSchema<any>> {
6567
private async _initClients() {
6668
const clients: Record<string, Client> = {};
6769
const agentToolNames = new Set<string>(this.spec.tools);
68-
const tools: Tool[] = [];
70+
const tools: tinyLoop.Tool[] = [];
6971

7072
for (const [name, client] of this.clients.entries()) {
7173
const list = await client.listTools();
7274
for (const tool of list.tools) {
7375
if (!agentToolNames.has(name + '/' + tool.name))
7476
continue;
7577
agentToolNames.delete(name + '/' + tool.name);
76-
tools.push({ ...tool, name: name + '__' + tool.name });
78+
tools.push({ ...tool as tinyLoop.Tool, name: name + '__' + tool.name });
7779
}
7880
clients[name] = client;
7981
}

packages/playwright/src/agents/performTask.ts

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,9 +36,8 @@ export async function performTask(context: playwright.BrowserContext, task: stri
3636

3737
try {
3838
return await loop.run(task, {
39-
// TODO: fix types in tiny-loop
40-
tools: await backend.listTools() as any,
41-
callTool: callTool as any,
39+
tools: await backend.listTools(),
40+
callTool,
4241
logger,
4342
});
4443
} finally {

0 commit comments

Comments
 (0)