vercel · macebake · Oct 28, 2025
diff --git a/README.md b/README.md
@@ -101,8 +101,20 @@ bun claude-code-cli.ts --eval 001-server-component --verbose
 
 # Debug mode - keep output folders
 bun claude-code-cli.ts --eval 001-server-component --debug
+
+# Capture full conversation with tool calls (Claude Code only)
+bun claude-code-cli.ts --eval 001-server-component --capture-conversation
 ```
 
+**Conversation Capture (Claude Code only):**
+
+Use the `--capture-conversation` flag to save the full conversation including all tool calls in JSONL format:
+
+- `claude-conversation.jsonl` - Complete conversation with all tool calls and responses in JSONL format
+- `claude-output.txt` - Human-readable summary of the evaluation
+
+These files are saved in the output directory (`output-claude-code/`).
+
 #### Claude Code with Dev Server and Hooks
 
 Run Claude Code with a Next.js dev server and lifecycle hooks (e.g., for MCP server setup):

diff --git a/cli.ts b/cli.ts
@@ -298,6 +298,8 @@ function parseCliArgs(args: string[]) {
       values["with-hooks"] = args[++i];
     } else if (arg === "--with-visual-diff") {
       values["with-visual-diff"] = true;
+    } else if (arg === "--capture-conversation") {
+      values["capture-conversation"] = true;
     } else if (!arg.startsWith("-")) {
       positionals.push(arg);
     }
@@ -336,6 +338,7 @@ Options:
       --dev-server-port   Port for dev server (default: 4000, auto-increments for concurrent evals)
       --with-hooks <name> Use eval hooks from scripts/eval-hooks/<name>-pre.sh and <name>-post.sh
       --with-visual-diff  Enable visual regression testing with screenshot comparison
+      --capture-conversation  [Claude Code only] Capture full conversation with tool calls to JSONL
 
 Examples:
   # Run all evals with LLMs
@@ -1506,6 +1509,7 @@ async function main() {
           : undefined,
         hooks,
         visualDiff: values["with-visual-diff"] || false,
+        captureConversation: values["capture-conversation"] || false,
       };
 
       if (values.all) {

diff --git a/lib/claude-code-runner.ts b/lib/claude-code-runner.ts
@@ -47,6 +47,7 @@ export interface ClaudeCodeEvalOptions {
   visualDiff?: boolean;
   outputFormat?: string;
   outputFile?: string;
+  captureConversation?: boolean;
 }
 
 export class ClaudeCodeRunner {
@@ -58,6 +59,7 @@ export class ClaudeCodeRunner {
   private devServer?: { enabled: boolean; command?: string; port?: number };
   private hooks?: { preEval?: string; postEval?: string };
   private visualDiff: boolean;
+  private captureConversation: boolean;
 
   constructor(options: ClaudeCodeEvalOptions = {}) {
     this.verbose = options.verbose || false;
@@ -66,6 +68,7 @@ export class ClaudeCodeRunner {
     this.devServer = options.devServer;
     this.hooks = options.hooks;
     this.visualDiff = options.visualDiff || false;
+    this.captureConversation = options.captureConversation || false;
   }
 
   async runClaudeCodeEval(
@@ -239,11 +242,14 @@ IMPORTANT: Do not run npm, pnpm, yarn, or any package manager commands. Dependen
       // Additional flags to ensure it works well in automation:
       // --dangerously-skip-permissions: bypass file/execution permission prompts
       // --print: non-interactive mode that prints response and exits
-      const args = [
-        '--print',
-        '--dangerously-skip-permissions',
-        enhancedPrompt
-      ];
+      const args = ['--print', '--dangerously-skip-permissions'];
+
+      // Add conversation capture flags if enabled
+      if (this.captureConversation) {
+        args.push('--verbose', '--output-format', 'stream-json');
+      }
+
+      args.push(enhancedPrompt);
 
       if (this.verbose) {
         console.log('🚀 Spawning claude process with:');
@@ -297,7 +303,7 @@ IMPORTANT: Do not run npm, pnpm, yarn, or any package manager commands. Dependen
         });
       }, timeout);
 
-      claudeProcess.on('exit', (code, signal) => {
+      claudeProcess.on('exit', async (code, signal) => {
         clearTimeout(timeoutId);
         this.processes.delete(processId);
 
@@ -306,21 +312,66 @@ IMPORTANT: Do not run npm, pnpm, yarn, or any package manager commands. Dependen
           console.log(`Claude Code finished with code: ${code}, signal: ${signal}`);
         }
 
+        // Parse JSONL to extract human-readable summary and save files (if conversation capture is enabled)
+        let finalResult = '';
+        if (this.captureConversation) {
+          try {
+            // Save full JSONL conversation (with tool calls)
+            const jsonlFile = path.join(projectDir, 'claude-conversation.jsonl');
+            await fs.writeFile(jsonlFile, stdout);
+
+            // Parse JSONL to extract human-readable summary
+            const lines = stdout.trim().split('\n');
+            const messages: any[] = [];
+
+            for (const line of lines) {
+              try {
+                const msg = JSON.parse(line);
+                messages.push(msg);
+                if (msg.type === 'result') {
+                  finalResult = msg.result || '';
+                }
+              } catch (e) {
+                // Skip invalid JSON lines
+              }
+            }
+
+            // Save human-readable summary
+            const summaryFile = path.join(projectDir, 'claude-output.txt');
+            const summary = `=== Claude Code Output ===\n` +
+              `Exit Code: ${code}\n` +
+              `Signal: ${signal}\n\n` +
+              `=== FINAL RESULT ===\n${finalResult}\n\n` +
+              `=== STDERR ===\n${stderr}\n\n` +
+              `Full conversation with tool calls saved to: claude-conversation.jsonl\n`;
+            await fs.writeFile(summaryFile, summary);
+
+            if (this.verbose) {
+              console.log(`📝 Conversation saved to ${jsonlFile}`);
+              console.log(`📝 Summary saved to ${summaryFile}`);
+            }
+          } catch (error) {
+            if (this.verbose) {
+              console.error(`Failed to save output files: ${error}`);
+            }
+          }
+        }
+
         if (signal) {
           resolve({
             success: false,
-            output: stdout,
+            output: this.captureConversation && finalResult ? finalResult : stdout,
             error: `Claude Code process killed by signal ${signal}`
           });
         } else if (code === 0) {
           resolve({
             success: true,
-            output: stdout
+            output: this.captureConversation && finalResult ? finalResult : stdout
           });
         } else {
           resolve({
             success: false,
-            output: stdout,
+            output: this.captureConversation && finalResult ? finalResult : stdout,
             error: stderr || `Claude Code process exited with code ${code}`
           });
         }