base2 best of n

jahooma · jahooma · commit f9e3ebe2409f · 2025-10-28T22:11:42.000-07:00
diff --git a/.agents/base2/base2.ts b/.agents/base2/base2.ts
@@ -10,9 +10,10 @@ export const createBase2: (
   mode: 'fast' | 'max',
   options?: {
     hasNoValidation?: boolean
+    bestOfN?: boolean
   },
 ) => Omit<SecretAgentDefinition, 'id'> = (mode, options) => {
-  const { hasNoValidation = false } = options ?? {}
+  const { hasNoValidation = false, bestOfN = false } = options ?? {}
   const isFast = mode === 'fast'
   const isMax = mode === 'max'
 
@@ -56,6 +57,7 @@ export const createBase2: (
       'researcher-web',
       'researcher-docs',
       'commander',
+      bestOfN && 'base2-best-of-n-orchestrator',
       isMax && 'base2-gpt-5-worker',
       'context-pruner',
     ),
@@ -145,15 +147,18 @@ ${buildArray(
   `- Consider spawning other agents or reading more files as needed to gather comprehensive context to answer the user's request.`,
   isFast &&
     `- Use the write_todos tool to write out your step-by-step implementation plan.${hasNoValidation ? '' : ' You should include at least one step to validate/test your changes: be specific about whether to typecheck, run tests, run lints, etc.'}`,
-  isFast &&
+  bestOfN &&
+    `- You must spawn the base2-best-of-n-orchestrator agent to implement the code changes, since it will generate multiple implementation proposals and select the best one, which the user wants you to do.`,
+  !bestOfN &&
+    isFast &&
     `- Use the str_replace or write_file tool to make the changes. (Pause after making all the changes to see the tool results of your edits and double check they went through correctly.)`,
   isMax &&
     `- IMPORTANT: You must spawn a base2-gpt-5-worker agent inline (with spawn_agent_inline tool) to do the planning and editing.`,
   !hasNoValidation &&
     `- Test your changes${isFast ? ' briefly' : ''} by running appropriate validation commands for the project (e.g. typechecks, tests, lints, etc.). You may have to explore the project to find the appropriate commands. Don't skip this step!`,
   `- Inform the user that you have completed the task in one sentence or a few short bullet points. Don't create any markdown summary files, unless asked by the user. If you already finished the user request and said you're done, then don't say anything else.`,
 ).join('\n')}`,
-    stepPrompt: `${isMax ? "Keep working until the user's request is completely satisfied. " : ''}After completing the user request, summarize your changes in a sentence or a few short bullet points. Do not create any summary markdown files or example documentation files, unless asked by the user. If you already summarized your changes, then end turn and don't say anything else.`,
+    stepPrompt: `${isMax ? "Keep working until the user's request is completely satisfied. " : ''}${bestOfN ? "You must spawn the base2-best-of-n-orchestrator agent to implement the code changes. Don't forget to do this!" : ''}After completing the user request, summarize your changes in a sentence or a few short bullet points. Do not create any summary markdown files or example documentation files, unless asked by the user. If you already summarized your changes, then end turn and don't say anything else.`,
     handleSteps: function* ({ params }) {
       let steps = 0
       while (true) {
diff --git a/.agents/base2/best-of-n/base2-best-of-n-editor.ts b/.agents/base2/best-of-n/base2-best-of-n-editor.ts
@@ -0,0 +1,56 @@
+import type { SecretAgentDefinition } from '../../types/secret-agent-definition'
+import { publisher } from '../../constants'
+
+const definition: SecretAgentDefinition = {
+  id: 'base2-best-of-n-editor',
+  publisher,
+  model: 'x-ai/grok-4-fast',
+  displayName: 'Best-of-N Editor',
+  spawnerPrompt:
+    'Parses the selected implementation and applies all code changes',
+
+  toolNames: ['str_replace', 'write_file'],
+  spawnableAgents: [],
+
+  inputSchema: {
+    prompt: {
+      type: 'string',
+      description: '',
+    },
+  },
+  outputMode: 'last_message',
+
+  instructionsPrompt: `You are the best-of-n editor agent. You have been provided with a selected implementation.
+
+The implementation contains tool calls in the following format:
+
+<codebuff_tool_call>
+{
+  "cb_tool_name": "str_replace",
+  "path": "path/to/file",
+  "replacements": [...]
+}
+</codebuff_tool_call>
+
+OR
+
+<codebuff_tool_call>
+{
+  "cb_tool_name": "write_file",
+  "path": "path/to/file",
+  "instructions": "...",
+  "content": "..."
+}
+</codebuff_tool_call>
+
+Your task is to:
+1. Parse all the tool calls from the implementation text
+2. Execute each tool call in order using your str_replace and write_file tools
+3. Apply all the changes exactly as specified in the implementation
+
+IMPORTANT: You must execute ALL tool calls from the implementation. Do not skip any changes.
+
+After completing the tool calls with tool results that confirm the changes were applied, please end your turn and do not write anything else.`,
+}
+
+export default definition
diff --git a/.agents/base2/best-of-n/base2-best-of-n-orchestrator.ts b/.agents/base2/best-of-n/base2-best-of-n-orchestrator.ts
@@ -0,0 +1,115 @@
+import type { SecretAgentDefinition } from '../../types/secret-agent-definition'
+import { publisher } from '../../constants'
+
+const definition: SecretAgentDefinition = {
+  id: 'base2-best-of-n-orchestrator',
+  publisher,
+  model: 'anthropic/claude-sonnet-4.5',
+  displayName: 'Best-of-N Implementation Orchestrator',
+  spawnerPrompt:
+    'Orchestrates multiple implementor agents to generate implementation proposals and selects the best one',
+
+  includeMessageHistory: true,
+  inheritParentSystemPrompt: true,
+
+  toolNames: ['spawn_agents', 'set_output'],
+  spawnableAgents: [
+    'base2-implementor',
+    'base2-selector',
+    'base2-best-of-n-editor',
+  ],
+
+  inputSchema: {},
+  outputMode: 'structured_output',
+
+  handleSteps: function* ({ logger }) {
+    // Spawn 5 implementor agents in parallel
+    const { toolResult: implementorsResult } = yield {
+      toolName: 'spawn_agents',
+      input: {
+        agents: [
+          { agent_type: 'base2-implementor' },
+          { agent_type: 'base2-implementor' },
+          { agent_type: 'base2-implementor' },
+          { agent_type: 'base2-implementor' },
+          { agent_type: 'base2-implementor' },
+        ],
+      },
+    }
+
+    // Extract all the plans from the structured outputs
+    const letters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
+    // Parse implementations from tool results
+    const implementations = (implementorsResult ?? [])
+      .filter((result) => result.type === 'json')
+      .map(
+        (result) =>
+          (result as any).value as { agentType: string; value: string }[],
+      )
+      .flatMap((results) =>
+        results.map((result, index) => ({
+          id: letters[index],
+          content: JSON.stringify((result.value as any).value),
+        })),
+      )
+
+    // Spawn selector with implementations as params
+    const { toolResult: selectorResult } = yield {
+      toolName: 'spawn_agents',
+      input: {
+        agents: [
+          {
+            agent_type: 'base2-selector',
+            params: { implementations },
+          },
+        ],
+      },
+    }
+
+    // Extract chosen implementation from selector output
+    const selectorOutput =
+      (selectorResult ?? [])
+        .filter((result) => result.type === 'json')
+        .map(
+          (result) =>
+            result.value as {
+              value: { value: { implementationId: string; reasoning: string } }
+            }[],
+        )[0][0] || {}
+
+    const chosenImplementationId = selectorOutput.value.value.implementationId
+    const chosenImplementation = implementations.find(
+      (implementation) => implementation.id === chosenImplementationId,
+    )
+    if (!chosenImplementation) {
+      yield {
+        toolName: 'set_output',
+        input: { error: 'Failed to choose an implementation.' },
+      }
+      return
+    }
+
+    // Spawn editor to apply the chosen implementation
+    yield {
+      toolName: 'spawn_agents',
+      input: {
+        agents: [
+          {
+            agent_type: 'base2-best-of-n-editor',
+            prompt: chosenImplementation.content,
+          },
+        ],
+      },
+    }
+
+    // Set output with the chosen implementation and reasoning
+    yield {
+      toolName: 'set_output',
+      input: {
+        implementation: chosenImplementation.content,
+      },
+    }
+  },
+}
+
+export default definition
diff --git a/.agents/base2/best-of-n/base2-best-of-n.ts b/.agents/base2/best-of-n/base2-best-of-n.ts
@@ -0,0 +1,11 @@
+import { createBase2 } from '../base2'
+import type { SecretAgentDefinition } from '../../types/secret-agent-definition'
+
+const base2 = createBase2('fast', { bestOfN: true })
+const definition: SecretAgentDefinition = {
+  ...base2,
+  id: 'base2-best-of-n',
+  displayName: 'Buffy Best-of-N Orchestrator',
+}
+
+export default definition
diff --git a/.agents/base2/best-of-n/base2-implementor.ts b/.agents/base2/best-of-n/base2-implementor.ts
@@ -0,0 +1,69 @@
+import type { SecretAgentDefinition } from '../../types/secret-agent-definition'
+import { publisher } from '../../constants'
+
+const definition: SecretAgentDefinition = {
+  id: 'base2-implementor',
+  publisher,
+  model: 'anthropic/claude-sonnet-4.5',
+  displayName: 'Implementation Generator',
+  spawnerPrompt:
+    'Generates a complete implementation plan with all code changes',
+
+  includeMessageHistory: true,
+  inheritParentSystemPrompt: true,
+
+  toolNames: [],
+  spawnableAgents: [],
+
+  inputSchema: {},
+  outputMode: 'last_message',
+
+  instructionsPrompt: `You are an implementation generator agent. Your task is to write out ALL the code changes needed to complete the user's request in a single comprehensive response.
+
+Write out what changes you would make using the tool call format below. Use this exact format for each file change:
+
+<codebuff_tool_call>
+{
+  "cb_tool_name": "str_replace",
+  "path": "path/to/file",
+  "replacements": [
+    {
+      "old": "exact old code",
+      "new": "exact new code"
+    },
+    {
+      "old": "exact old code 2",
+      "new": "exact new code 2"
+    },
+  ]
+}
+</codebuff_tool_call>
+
+OR for new files or major rewrites:
+
+<codebuff_tool_call>
+{
+  "cb_tool_name": "write_file",
+  "path": "path/to/file",
+  "instructions": "What the change does",
+  "content": "Complete file content or edit snippet"
+}
+</codebuff_tool_call>
+
+Your implementation should:
+- Be complete and comprehensive
+- Include all necessary changes to fulfill the user's request
+- Follow the project's conventions and patterns
+- Be as simple and maintainable as possible
+- Reuse existing code wherever possible
+- Include proper error handling
+- Be well-structured and organized
+
+Write out your complete implementation now, formatting all changes as tool calls as shown above.`,
+
+  handleSteps: function* () {
+    yield 'STEP'
+  },
+}
+
+export default definition
diff --git a/.agents/base2/best-of-n/base2-selector.ts b/.agents/base2/best-of-n/base2-selector.ts
@@ -0,0 +1,74 @@
+import type { SecretAgentDefinition } from '../../types/secret-agent-definition'
+import { publisher } from '../../constants'
+
+const definition: SecretAgentDefinition = {
+  id: 'base2-selector',
+  publisher,
+  model: 'anthropic/claude-sonnet-4.5',
+  displayName: 'Implementation Selector',
+  spawnerPrompt:
+    'Analyzes multiple implementation proposals and selects the best one',
+
+  includeMessageHistory: true,
+  inheritParentSystemPrompt: true,
+
+  toolNames: ['set_output'],
+  spawnableAgents: [],
+
+  inputSchema: {
+    params: {
+      type: 'object',
+      properties: {
+        implementations: {
+          type: 'array',
+          items: {
+            type: 'object',
+            properties: {
+              id: { type: 'string' },
+              content: { type: 'string' },
+            },
+            required: ['id', 'content'],
+          },
+        },
+      },
+      required: ['implementations'],
+    },
+  },
+  outputMode: 'structured_output',
+  outputSchema: {
+    type: 'object',
+    properties: {
+      reasoning: { type: 'string' },
+      implementationId: { id: 'string' },
+    },
+    required: ['reasoning', 'implementationId'],
+  },
+
+  instructionsPrompt: `You are the implementation selector agent. You have been provided with multiple implementation proposals via params.
+
+The implementations are available in the params.implementations array, where each has:
+- id: A unique identifier for the implementation
+- content: The full implementation text with tool calls
+
+Your task is to:
+1. Analyze each implementation proposal carefully
+2. Compare them against the original user requirements
+3. Evaluate each based on:
+   - Correctness and completeness
+   - Simplicity and maintainability
+   - Code quality and adherence to project conventions
+   - Minimal changes to existing code
+   - Proper reuse of existing helpers and patterns
+   - Clarity and readability
+
+4. Select the best implementation
+5. Call set_output with the selected implementation
+
+Format your set_output call with:
+{
+  "reasoning": "Brief explanation of why this implementation was selected"
+  "implementationId": "The id of the chosen implementation",
+}`,
+}
+
+export default definition