diff --git a/.gitignore b/.gitignore index 40d9069..fd7be61 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,6 @@ typescript/bun.lock *.log .DS_Store .env +.env.local +*.env.local +.cache/ diff --git a/azure-file-type-bug-evidence.zip b/azure-file-type-bug-evidence.zip new file mode 100644 index 0000000..f400bc6 Binary files /dev/null and b/azure-file-type-bug-evidence.zip differ diff --git a/typescript/ai-sdk-v5/package.json b/typescript/ai-sdk-v5/package.json index 3b55f9e..66b81a9 100644 --- a/typescript/ai-sdk-v5/package.json +++ b/typescript/ai-sdk-v5/package.json @@ -9,7 +9,7 @@ }, "dependencies": { "@openrouter-examples/shared": "workspace:*", - "@openrouter/ai-sdk-provider": "1.5.3", + "@openrouter/ai-sdk-provider": "1.5.4", "ai": "5.0.108" }, "devDependencies": { diff --git a/typescript/ai-sdk-v5/src/pdf-openai-regression/pdf-debug.ts b/typescript/ai-sdk-v5/src/pdf-openai-regression/pdf-debug.ts new file mode 100644 index 0000000..faab32e --- /dev/null +++ b/typescript/ai-sdk-v5/src/pdf-openai-regression/pdf-debug.ts @@ -0,0 +1,94 @@ +/** + * Debug script: Inspect the actual payload being sent to OpenRouter + * + * This helps diagnose why OpenAI PDF support fails via AI SDK. + */ + +import { createOpenRouter } from '@openrouter/ai-sdk-provider'; +import { generateText } from 'ai'; +import { readPdfAsDataUrl, readExpectedCode } from '@openrouter-examples/shared/fixtures'; + +async function main() { + console.log('=== PDF Debug: Inspecting AI SDK Payload ===\n'); + + const pdfDataUrl = await readPdfAsDataUrl('small'); + const expectedCode = await readExpectedCode('small'); + console.log(`PDF data URL length: ${pdfDataUrl.length}`); + console.log(`Expected code: ${expectedCode}\n`); + + // Create provider with debug middleware + const openrouter = createOpenRouter({ + apiKey: process.env.OPENROUTER_API_KEY, + // Enable request logging via custom fetch + fetch: async (url, init) => { + console.log('=== REQUEST ==='); + console.log('URL:', url); + + if (init?.body) { + try { + const body = JSON.parse(init.body as string); + console.log('Model:', body.model); + console.log('Messages:', JSON.stringify(body.messages, (key, value) => { + // Truncate base64 data for readability + if (typeof value === 'string' && value.length > 100) { + return value.slice(0, 100) + `... [${value.length} chars total]`; + } + return value; + }, 2)); + } catch { + console.log('Body (raw):', String(init.body).slice(0, 500)); + } + } + console.log('=== END REQUEST ===\n'); + + const response = await fetch(url, init); + + // Clone response to read body without consuming it + const clone = response.clone(); + const text = await clone.text(); + + console.log('=== RESPONSE ==='); + console.log('Status:', response.status); + console.log('Body (truncated):', text.slice(0, 500)); + console.log('=== END RESPONSE ===\n'); + + // Return a new response with the same body + return new Response(text, { + status: response.status, + statusText: response.statusText, + headers: response.headers, + }); + }, + }); + + try { + console.log('Testing OpenAI model...\n'); + const result = await generateText({ + model: openrouter('openai/gpt-4o-mini'), + messages: [ + { + role: 'user', + content: [ + { + type: 'text', + text: 'What is the verification code in this PDF? Reply with just the code.', + }, + { + type: 'file', + data: pdfDataUrl, + mediaType: 'application/pdf', + }, + ], + }, + ], + }); + + console.log('\n=== RESULT ==='); + console.log('Response text:', result.text); + + } catch (err) { + console.error('Error:', err instanceof Error ? err.message : String(err)); + } +} + +main().catch(console.error); diff --git a/typescript/ai-sdk-v5/src/pdf-openai-regression/pdf-openai-test.ts b/typescript/ai-sdk-v5/src/pdf-openai-regression/pdf-openai-test.ts new file mode 100644 index 0000000..64e3459 --- /dev/null +++ b/typescript/ai-sdk-v5/src/pdf-openai-regression/pdf-openai-test.ts @@ -0,0 +1,200 @@ +/** + * Example: PDF Input with OpenAI Models via OpenRouter (AI SDK v5) + * + * This test verifies whether PDF attachments work with OpenAI models + * when using @openrouter/ai-sdk-provider. + * + * Bug hypothesis: PDFs fail for OpenAI models but work for Anthropic/Google. + * + * Expected behavior: + * - All models should be able to read the PDF and extract the verification code + * - The code in small.pdf is: SMALL-7X9Q2 + * + * Caching: Responses are cached to .cache/requests/ to avoid hitting the API + * repeatedly during development. Delete the cache to force fresh requests. + * + * To run: bun run typescript/ai-sdk-v5/src/pdf-openai-regression/pdf-openai-test.ts + */ + +import { createOpenRouter } from '@openrouter/ai-sdk-provider'; +import { generateText } from 'ai'; +import { readPdfAsDataUrl, readExpectedCode } from '@openrouter-examples/shared/fixtures'; +import { createCachedFetch } from '@openrouter-examples/shared/request-cache'; + +const MODELS_TO_TEST = [ + // OpenAI models - testing various variants + 'openai/gpt-4o-mini', + 'openai/gpt-4o', + 'openai/gpt-4-turbo', + // Anthropic + 'anthropic/claude-3-5-sonnet', + // Google + 'google/gemini-2.0-flash-001', + // Other providers for comparison + 'x-ai/grok-3-mini-beta', + 'mistralai/pixtral-large-2411', +] as const; + +interface TestResult { + model: string; + success: boolean; + codeExtracted: string | null; + matches: boolean; + error?: string; +} + +function truncate(str: string, max = 200): string { + return str.length <= max ? str : str.slice(0, max) + '...'; +} + +function extractCode(text: string): string | null { + const match = text.match(/([A-Z]+)\s*[-–—]\s*([A-Z0-9]{5})/i); + if (match) { + return `${match[1].toUpperCase()}-${match[2].toUpperCase()}`; + } + const strict = text.match(/[A-Z]+-[A-Z0-9]{5}/); + return strict ? strict[0] : null; +} + +async function testModel( + model: string, + pdfDataUrl: string, + expectedCode: string, + cachedFetch: typeof fetch, +): Promise { + const openrouter = createOpenRouter({ + apiKey: process.env.OPENROUTER_API_KEY, + fetch: cachedFetch, + }); + + try { + const result = await generateText({ + model: openrouter(model), + messages: [ + { + role: 'user', + content: [ + { + type: 'text', + text: 'What is the verification code in this PDF? Reply with just the code.', + }, + { + type: 'file', + data: pdfDataUrl, + mediaType: 'application/pdf', + }, + ], + }, + ], + }); + + const codeExtracted = extractCode(result.text); + return { + model, + success: true, + codeExtracted, + matches: codeExtracted === expectedCode, + }; + } catch (err) { + const errorMsg = err instanceof Error ? err.message : String(err); + return { + model, + success: false, + codeExtracted: null, + matches: false, + error: truncate(errorMsg), + }; + } +} + +async function main() { + console.log('╔════════════════════════════════════════════════════════════════════════════╗'); + console.log('║ PDF Input Test: OpenAI vs Others via AI SDK + OpenRouter Provider ║'); + console.log('╚════════════════════════════════════════════════════════════════════════════╝'); + console.log(); + + // Create cached fetch + const cachedFetch = createCachedFetch({ enabled: true, ttlMs: 60 * 60 * 1000 }); + + // Load PDF fixture + console.log('Loading PDF fixture (small.pdf)...'); + const pdfDataUrl = await readPdfAsDataUrl('small'); + const expectedCode = await readExpectedCode('small'); + console.log(`Expected code: ${expectedCode}\n`); + + const results: TestResult[] = []; + + // Test each model sequentially to avoid rate limits + for (const model of MODELS_TO_TEST) { + console.log(`Testing: ${model}...`); + const result = await testModel(model, pdfDataUrl, expectedCode, cachedFetch); + results.push(result); + } + + // Print results table + console.log('\n=== Results ===\n'); + console.log('Model | Status | Code | Match'); + console.log('-------------------------------|---------|------------|------'); + + for (const r of results) { + const modelPad = r.model.padEnd(30); + const status = r.success ? 'SUCCESS' : 'FAIL '; + const code = (r.codeExtracted ?? 'N/A').padEnd(10); + const match = r.matches ? 'YES' : 'NO '; + console.log(`${modelPad} | ${status} | ${code} | ${match}`); + } + + // Show errors if any + const failures = results.filter((r) => !r.success); + if (failures.length > 0) { + console.log('\n=== Errors ===\n'); + for (const f of failures) { + console.log(`${f.model}:`); + console.log(` ${f.error}`); + } + } + + // Summary + console.log('\n=== Summary ===\n'); + const openaiResult = results.find((r) => r.model === 'openai/gpt-4o-mini'); + const anthropicResult = results.find((r) => r.model === 'anthropic/claude-3-5-sonnet'); + const googleResult = results.find((r) => r.model === 'google/gemini-2.0-flash-001'); + + if (openaiResult?.matches) { + console.log('✓ OpenAI PDF support: WORKING'); + } else if (openaiResult?.success) { + console.log('⚠ OpenAI PDF support: Request succeeded but code not found'); + } else { + console.log('✗ OpenAI PDF support: FAILING'); + console.log(' BUG CONFIRMED: OpenAI models cannot read PDFs via AI SDK + OpenRouter'); + } + + if (anthropicResult?.matches) { + console.log('✓ Anthropic PDF support: WORKING'); + } else { + console.log('✗ Anthropic PDF support: NOT WORKING'); + } + + if (googleResult?.matches) { + console.log('✓ Google PDF support: WORKING'); + } else { + console.log('✗ Google PDF support: NOT WORKING'); + } + + // Exit with error if OpenAI fails but others work (confirms the bug) + const bugConfirmed = !openaiResult?.matches && (anthropicResult?.matches || googleResult?.matches); + if (bugConfirmed) { + console.log('\n❌ BUG REPRODUCED: OpenAI fails while other providers work'); + process.exit(1); + } + + if (results.every((r) => r.matches)) { + console.log('\n✓ All models working - no bug present'); + process.exit(0); + } +} + +main().catch((err) => { + console.error('Fatal:', err instanceof Error ? err.message : String(err)); + process.exit(1); +}); diff --git a/typescript/ai-sdk-v5/src/pdf-openai-regression/pdf-provider-matrix.ts b/typescript/ai-sdk-v5/src/pdf-openai-regression/pdf-provider-matrix.ts new file mode 100644 index 0000000..8742ae5 --- /dev/null +++ b/typescript/ai-sdk-v5/src/pdf-openai-regression/pdf-provider-matrix.ts @@ -0,0 +1,230 @@ +/** + * PDF Provider Matrix Test + * + * Tests PDF inputs against specific providers to find which ones fail. + * Uses OpenRouter's provider routing to force specific backends. + * + * This helps identify provider-specific bugs (e.g., Azure rejecting 'file' type). + * + * To run: bun run typescript/ai-sdk-v5/src/pdf-openai-regression/pdf-provider-matrix.ts + */ + +import { createOpenRouter } from '@openrouter/ai-sdk-provider'; +import { generateText } from 'ai'; +import { readPdfAsDataUrl, readExpectedCode } from '@openrouter-examples/shared/fixtures'; +import { createCachedFetch } from '@openrouter-examples/shared/request-cache'; + +// Test matrix: model + specific provider combinations +// Provider slugs from: https://openrouter.ai/docs/features/provider-routing +const TEST_MATRIX = [ + // OpenAI model via different providers - THIS IS THE KEY TEST + { model: 'openai/gpt-4o', provider: 'OpenAI', label: 'gpt-4o via OpenAI direct' }, + { model: 'openai/gpt-4o', provider: 'Azure', label: 'gpt-4o via Azure' }, + + // GPT-4o-mini via different providers + { model: 'openai/gpt-4o-mini', provider: 'OpenAI', label: 'gpt-4o-mini via OpenAI direct' }, + { model: 'openai/gpt-4o-mini', provider: 'Azure', label: 'gpt-4o-mini via Azure' }, + + // Claude via different providers + { model: 'anthropic/claude-3-5-sonnet', provider: 'Anthropic', label: 'Claude via Anthropic' }, + { model: 'anthropic/claude-3-5-sonnet', provider: 'Amazon Bedrock', label: 'Claude via Bedrock' }, + { model: 'anthropic/claude-3-5-sonnet', provider: 'Google Vertex', label: 'Claude via Vertex' }, + + // Gemini via different providers + { model: 'google/gemini-2.0-flash-001', provider: 'Google AI Studio', label: 'Gemini via AI Studio' }, + { model: 'google/gemini-2.0-flash-001', provider: 'Google Vertex', label: 'Gemini via Vertex' }, + + // Other providers + { model: 'mistralai/pixtral-large-2411', provider: 'Mistral', label: 'Pixtral via Mistral' }, +] as const; + +interface TestResult { + label: string; + model: string; + provider: string; + success: boolean; + codeExtracted: string | null; + matches: boolean; + error?: string; + actualProvider?: string; +} + +function truncate(str: string, max = 200): string { + return str.length <= max ? str : str.slice(0, max) + '...'; +} + +function extractCode(text: string): string | null { + const match = text.match(/([A-Z]+)\s*[-–—]\s*([A-Z0-9]{5})/i); + if (match) { + return `${match[1].toUpperCase()}-${match[2].toUpperCase()}`; + } + const strict = text.match(/[A-Z]+-[A-Z0-9]{5}/); + return strict ? strict[0] : null; +} + +async function testModelProvider( + model: string, + providerSlug: string, + label: string, + pdfDataUrl: string, + expectedCode: string, + cachedFetch: typeof fetch, +): Promise { + const openrouter = createOpenRouter({ + apiKey: process.env.OPENROUTER_API_KEY, + fetch: cachedFetch, + }); + + try { + const result = await generateText({ + model: openrouter(model, { + // Force specific provider using extraBody + extraBody: { + provider: { + only: [providerSlug], + }, + }, + }), + messages: [ + { + role: 'user', + content: [ + { + type: 'text', + text: 'What is the verification code in this PDF? Reply with just the code.', + }, + { + type: 'file', + data: pdfDataUrl, + mediaType: 'application/pdf', + }, + ], + }, + ], + }); + + const codeExtracted = extractCode(result.text); + + // Try to extract actual provider from response metadata + let actualProvider: string | undefined; + const rawResponse = result.response as unknown; + if (rawResponse && typeof rawResponse === 'object' && 'body' in rawResponse) { + const body = (rawResponse as { body?: { provider?: string } }).body; + actualProvider = body?.provider; + } + + return { + label, + model, + provider: providerSlug, + success: true, + codeExtracted, + matches: codeExtracted === expectedCode, + actualProvider, + }; + } catch (err) { + const errorMsg = err instanceof Error ? err.message : String(err); + return { + label, + model, + provider: providerSlug, + success: false, + codeExtracted: null, + matches: false, + error: truncate(errorMsg), + }; + } +} + +async function main() { + console.log('╔════════════════════════════════════════════════════════════════════════════╗'); + console.log('║ PDF Provider Matrix Test - Targeting Specific Backends ║'); + console.log('╚════════════════════════════════════════════════════════════════════════════╝'); + console.log(); + + const cachedFetch = createCachedFetch({ enabled: true, ttlMs: 60 * 60 * 1000 }); + + console.log('Loading PDF fixture (small.pdf)...'); + const pdfDataUrl = await readPdfAsDataUrl('small'); + const expectedCode = await readExpectedCode('small'); + console.log(`Expected code: ${expectedCode}\n`); + + const results: TestResult[] = []; + + for (const test of TEST_MATRIX) { + console.log(`Testing: ${test.label}...`); + const result = await testModelProvider( + test.model, + test.provider, + test.label, + pdfDataUrl, + expectedCode, + cachedFetch, + ); + results.push(result); + } + + // Print results table + console.log('\n=== Results ===\n'); + console.log('Test Case | Status | Code | Match'); + console.log('---------------------------------------------|---------|------------|------'); + + for (const r of results) { + const labelPad = r.label.padEnd(44); + const status = r.success ? 'SUCCESS' : 'FAIL '; + const code = (r.codeExtracted ?? 'N/A').padEnd(10); + const match = r.matches ? 'YES' : 'NO '; + console.log(`${labelPad} | ${status} | ${code} | ${match}`); + } + + // Show errors + const failures = results.filter((r) => !r.success); + if (failures.length > 0) { + console.log('\n=== Failures ===\n'); + for (const f of failures) { + console.log(`${f.label}:`); + console.log(` Model: ${f.model}`); + console.log(` Provider: ${f.provider}`); + console.log(` Error: ${f.error}`); + console.log(); + } + } + + // Summary + console.log('\n=== Summary ===\n'); + const successCount = results.filter((r) => r.matches).length; + const failCount = results.filter((r) => !r.success).length; + const partialCount = results.filter((r) => r.success && !r.matches).length; + + console.log(`Total tests: ${results.length}`); + console.log(` ✓ Success (code matched): ${successCount}`); + console.log(` ⚠ Partial (response but wrong code): ${partialCount}`); + console.log(` ✗ Failed (error): ${failCount}`); + + // Identify provider-specific issues + const providerIssues = new Map(); + for (const r of results) { + if (!r.success) { + const issues = providerIssues.get(r.provider) || []; + issues.push(`${r.model}: ${r.error}`); + providerIssues.set(r.provider, issues); + } + } + + if (providerIssues.size > 0) { + console.log('\n=== Provider-Specific Issues ===\n'); + for (const [provider, issues] of providerIssues) { + console.log(`${provider}:`); + for (const issue of issues) { + console.log(` - ${issue}`); + } + } + } + + process.exit(failCount > 0 ? 1 : 0); +} + +main().catch((err) => { + console.error('Fatal:', err instanceof Error ? err.message : String(err)); + process.exit(1); +}); diff --git a/typescript/fetch/src/pdf-direct-input/pdf-direct.ts b/typescript/fetch/src/pdf-direct-input/pdf-direct.ts new file mode 100644 index 0000000..8a74ac1 --- /dev/null +++ b/typescript/fetch/src/pdf-direct-input/pdf-direct.ts @@ -0,0 +1,262 @@ +/** + * Example 01: Direct PDF input via raw OpenRouter API + * + * Tests PDF input directly via fetch (base64 data URL) without any SDK. + * Compares behavior across different models AND different message shapes. + * + * Message shapes tested: + * 1. "file" type - OpenRouter/Anthropic native format + * 2. "image_url" type - OpenAI native format (works for PDFs too) + * + * Expected verification code: SMALL-7X9Q2 + * + * Caching: Responses are cached to .cache/requests/ to avoid hitting the API + * repeatedly during development. Delete the cache to force fresh requests. + */ + +import { readPdfAsDataUrl, readExpectedCode } from '@openrouter-examples/shared/fixtures'; +import { createCachedFetch } from '@openrouter-examples/shared/request-cache'; +import type { ChatCompletionResponse } from '@openrouter-examples/shared/types'; + +const OPENROUTER_API_URL = 'https://openrouter.ai/api/v1/chat/completions'; + +const MODELS_TO_TEST = [ + 'openai/gpt-4o-mini', + 'anthropic/claude-3-5-sonnet', + 'google/gemini-2.0-flash-001', +] as const; + +type MessageShape = 'file' | 'image_url'; + +const PROMPT = 'What is the verification code in this PDF? Reply with just the code.'; + +// Use cached fetch to avoid hitting API repeatedly +const cachedFetch = createCachedFetch({ enabled: true, ttlMs: 60 * 60 * 1000 }); + +/** Truncate string to max length */ +function truncate(str: string, maxLen = 200): string { + return str.length <= maxLen ? str : str.slice(0, maxLen) + '...'; +} + +/** Extract error message from OpenRouter error response */ +function extractErrorMessage(errorJson: string): string { + try { + const parsed = JSON.parse(errorJson); + if (parsed?.error?.metadata?.raw) { + const rawParsed = JSON.parse(parsed.error.metadata.raw); + return rawParsed?.error?.message ?? parsed.error.message; + } + return parsed?.error?.message ?? errorJson; + } catch { + return errorJson; + } +} + +/** + * Extract verification code from response text. + */ +function extractCode(text: string): string | null { + const normalized = text.replace(/\*+/g, '').replace(/\s+/g, ' ').trim(); + const match = normalized.match(/([A-Z]+)\s*[-–—]\s*([A-Z0-9]{5})/i); + if (match) { + return `${match[1].toUpperCase()}-${match[2].toUpperCase()}`; + } + const strictMatch = text.match(/[A-Z]+-[A-Z0-9]{5}/); + return strictMatch ? strictMatch[0] : null; +} + +interface TestResult { + model: string; + shape: MessageShape; + success: boolean; + extractedCode: string | null; + matches: boolean; + error?: string; + rawResponse?: string; +} + +function buildMessageContent(shape: MessageShape, pdfDataUrl: string) { + if (shape === 'file') { + return [ + { type: 'text', text: PROMPT }, + { type: 'file', file: { filename: 'small.pdf', file_data: pdfDataUrl } }, + ]; + } + return [ + { type: 'text', text: PROMPT }, + { type: 'image_url', image_url: { url: pdfDataUrl } }, + ]; +} + +async function testPdfWithModel( + model: string, + shape: MessageShape, + pdfDataUrl: string, + expectedCode: string, +): Promise { + if (!process.env.OPENROUTER_API_KEY) { + return { + model, + shape, + success: false, + extractedCode: null, + matches: false, + error: 'OPENROUTER_API_KEY not set', + }; + } + + const requestBody = { + model, + messages: [{ role: 'user', content: buildMessageContent(shape, pdfDataUrl) }], + }; + + try { + const response = await cachedFetch(OPENROUTER_API_URL, { + method: 'POST', + headers: { + Authorization: `Bearer ${process.env.OPENROUTER_API_KEY}`, + 'Content-Type': 'application/json', + 'HTTP-Referer': 'https://github.com/openrouter/examples', + 'X-Title': 'PDF Direct Input Test', + }, + body: JSON.stringify(requestBody), + }); + + if (!response.ok) { + const errorText = await response.text(); + const errorMsg = extractErrorMessage(errorText); + return { + model, + shape, + success: false, + extractedCode: null, + matches: false, + error: `HTTP ${response.status}: ${truncate(errorMsg)}`, + }; + } + + const data = (await response.json()) as ChatCompletionResponse; + const content = data.choices[0]?.message?.content ?? ''; + const extractedCode = extractCode(content); + + return { + model, + shape, + success: true, + extractedCode, + matches: extractedCode === expectedCode, + rawResponse: truncate(content), + }; + } catch (err) { + return { + model, + shape, + success: false, + extractedCode: null, + matches: false, + error: err instanceof Error ? truncate(err.message) : 'Unknown error', + }; + } +} + +async function main() { + console.log('=== Example 01: Direct PDF Input via Raw OpenRouter API ===\n'); + + console.log('Loading PDF fixture (small.pdf)...'); + const pdfDataUrl = await readPdfAsDataUrl('small'); + const expectedCode = await readExpectedCode('small'); + console.log(`Expected verification code: ${expectedCode}\n`); + + const shapes: MessageShape[] = ['file', 'image_url']; + const results: TestResult[] = []; + + for (const model of MODELS_TO_TEST) { + for (const shape of shapes) { + console.log(`Testing: ${model} with "${shape}" shape...`); + const result = await testPdfWithModel(model, shape, pdfDataUrl, expectedCode); + results.push(result); + } + } + + // Print comparison table + console.log('\n=== Results Comparison ===\n'); + console.log('Model | Shape | Status | Code | Match'); + console.log('-------------------------------|-----------|---------|------------|------'); + + for (const r of results) { + const modelPad = r.model.padEnd(30); + const shapePad = r.shape.padEnd(9); + const status = r.success ? 'SUCCESS' : 'FAIL '; + const code = (r.extractedCode ?? 'N/A').slice(0, 10).padEnd(10); + const match = r.matches ? 'YES' : 'NO '; + console.log(`${modelPad} | ${shapePad} | ${status} | ${code} | ${match}`); + } + + // Summary by model + console.log('\n=== Summary by Model ===\n'); + + for (const model of MODELS_TO_TEST) { + const modelResults = results.filter((r) => r.model === model); + const fileResult = modelResults.find((r) => r.shape === 'file'); + const imageUrlResult = modelResults.find((r) => r.shape === 'image_url'); + + console.log(`${model}:`); + const fileStatus = fileResult?.matches ? 'WORKS' : 'FAILS'; + const fileDetail = fileResult?.error + ? truncate(fileResult.error, 80) + : fileResult?.success && !fileResult?.matches + ? `Response: "${truncate(fileResult.rawResponse ?? '', 80)}"` + : ''; + console.log(` - "file" shape: ${fileStatus} ${fileDetail ? `(${fileDetail})` : ''}`); + + const imageUrlStatus = imageUrlResult?.matches ? 'WORKS' : 'FAILS'; + const imageUrlDetail = imageUrlResult?.error + ? truncate(imageUrlResult.error, 80) + : imageUrlResult?.success && !imageUrlResult?.matches + ? `Response: "${truncate(imageUrlResult.rawResponse ?? '', 80)}"` + : ''; + console.log(` - "image_url" shape: ${imageUrlStatus} ${imageUrlDetail ? `(${imageUrlDetail})` : ''}`); + console.log(); + } + + // Key findings + console.log('=== Key Findings ===\n'); + + const fileShapeWorks = results.filter((r) => r.shape === 'file' && r.matches); + const imageUrlShapeWorks = results.filter((r) => r.shape === 'image_url' && r.matches); + + console.log('PDF input via "file" shape (OpenRouter/Anthropic format):'); + if (fileShapeWorks.length === MODELS_TO_TEST.length) { + console.log(' ✓ Works with ALL tested models'); + } else { + console.log(` ✓ Works with: ${fileShapeWorks.map((r) => r.model).join(', ') || 'none'}`); + const fileFails = results.filter((r) => r.shape === 'file' && !r.matches); + console.log(` ✗ Fails with: ${fileFails.map((r) => r.model).join(', ') || 'none'}`); + } + + console.log('\nPDF input via "image_url" shape (OpenAI native format):'); + if (imageUrlShapeWorks.length === MODELS_TO_TEST.length) { + console.log(' ✓ Works with ALL tested models'); + } else { + console.log(` ✓ Works with: ${imageUrlShapeWorks.map((r) => r.model).join(', ') || 'none'}`); + const imageUrlFails = results.filter((r) => r.shape === 'image_url' && !r.matches); + console.log(` ✗ Fails with: ${imageUrlFails.map((r) => r.model).join(', ') || 'none'}`); + } + + console.log('\nConclusion:'); + console.log(' The "file" shape is the universal format for PDF input across OpenRouter models.'); + console.log(' The "image_url" shape only works with Google models for PDFs.'); + + const anyOpenAIWorks = results.some((r) => r.model === 'openai/gpt-4o-mini' && r.matches); + if (!anyOpenAIWorks) { + console.log('\n⚠️ OpenAI PDF support: NOT WORKING with any tested shape'); + process.exit(1); + } else { + console.log('\n✓ OpenAI PDF support: WORKING (via "file" shape)'); + } +} + +main().catch((err) => { + console.error('Fatal error:', err); + process.exit(1); +}); diff --git a/typescript/fetch/src/pdf-message-shape-matrix/shape-matrix.ts b/typescript/fetch/src/pdf-message-shape-matrix/shape-matrix.ts new file mode 100644 index 0000000..08c3e28 --- /dev/null +++ b/typescript/fetch/src/pdf-message-shape-matrix/shape-matrix.ts @@ -0,0 +1,291 @@ +/** + * Example 14: PDF Message Shape Matrix Test + * + * Tests whether PDF failures correlate with message shape and format. + * Tests multiple content type formats: + * - Format 1: `file` type with data/mimeType (AI SDK v5 style) + * - Format 2: `file` type with filename/file_data (OpenRouter style) + * - Format 3: `image_url` type with data URL + * - Format 4: `input_file` type (OpenAI Responses API style) + * + * For each format, tests both: + * - Shape A: File only (no text part) + * - Shape B: Text + File (text part before file) + * + * Uses raw fetch to isolate from SDK behavior. + */ + +import { readPdfAsDataUrl } from '@openrouter-examples/shared/fixtures'; + +const OPENROUTER_API_URL = 'https://openrouter.ai/api/v1/chat/completions'; +const MODEL = 'openai/gpt-4o-mini'; +const EXPECTED_CODE = 'SMALL-7X9Q2'; + +// Helper to truncate strings for display +function truncate(str: string, max = 200): string { + if (str.length <= max) { + return str; + } + return str.slice(0, max) + '...'; +} + +interface TestResult { + format: string; + shape: string; + httpOk: boolean; + codeFound: boolean; + response?: string; + error?: string; +} + +interface TestConfig { + format: string; + shape: string; + messages: unknown[]; + plugins?: unknown[]; +} + +async function testShape(config: TestConfig): Promise { + const { format, shape: shapeName, messages, plugins } = config; + const apiKey = process.env.OPENROUTER_API_KEY; + if (!apiKey) { + return { + format, + shape: shapeName, + httpOk: false, + codeFound: false, + error: 'OPENROUTER_API_KEY not set', + }; + } + + try { + const body: Record = { + model: MODEL, + messages, + }; + if (plugins) { + body.plugins = plugins; + } + + const response = await fetch(OPENROUTER_API_URL, { + method: 'POST', + headers: { + Authorization: `Bearer ${apiKey}`, + 'Content-Type': 'application/json', + 'HTTP-Referer': 'https://github.com/openrouter/examples', + 'X-Title': 'PDF Shape Matrix Test', + }, + body: JSON.stringify(body), + }); + + if (!response.ok) { + const errorText = await response.text(); + return { + format, + shape: shapeName, + httpOk: false, + codeFound: false, + error: truncate(`HTTP ${response.status}: ${errorText}`), + }; + } + + const data = (await response.json()) as { + choices?: Array<{ message?: { content?: string } }>; + }; + const content = data.choices?.[0]?.message?.content ?? ''; + + // Check if verification code is in response + const codeFound = content.includes(EXPECTED_CODE); + + return { + format, + shape: shapeName, + httpOk: true, + codeFound, + response: truncate(content), + }; + } catch (err) { + return { + format, + shape: shapeName, + httpOk: false, + codeFound: false, + error: truncate(err instanceof Error ? err.message : String(err)), + }; + } +} + +async function main() { + console.log('=== PDF Message Shape Matrix Test ===\n'); + console.log(`Model: ${MODEL}`); + console.log('PDF: small.pdf (33KB, code: SMALL-7X9Q2)\n'); + + // Read PDF as base64 + const pdfDataUrl = await readPdfAsDataUrl('small'); + const base64Data = pdfDataUrl.replace(/^data:application\/pdf;base64,/, ''); + + const promptText = 'Please extract the verification code from this PDF.'; + + // Define content parts for different formats + // Format 1: AI SDK v5 style (data + mimeType) + const filePartAiSdk = { + type: 'file', + file: { data: base64Data, mimeType: 'application/pdf' }, + }; + + // Format 2: OpenRouter style (filename + file_data as data URL) + const filePartOpenRouter = { + type: 'file', + file: { filename: 'small.pdf', file_data: pdfDataUrl }, + }; + + // Format 3: image_url with data URL + const imageUrlPart = { + type: 'image_url', + image_url: { url: pdfDataUrl }, + }; + + // Format 4: input_file (OpenAI Responses API style) + const inputFilePart = { + type: 'input_file', + filename: 'small.pdf', + file_data: pdfDataUrl, + }; + + const textPart = { type: 'text', text: promptText }; + + // File-parser plugin config + const fileParserPlugin = [{ id: 'file-parser', pdf: { engine: 'mistral-ocr' } }]; + + // Build test matrix + const tests: TestConfig[] = [ + // Format 1: file type with data/mimeType (AI SDK v5 style) - NO plugin + { + format: 'file(data)', + shape: 'A: file only', + messages: [{ role: 'user', content: [filePartAiSdk] }], + }, + { + format: 'file(data)', + shape: 'B: text+file', + messages: [{ role: 'user', content: [textPart, filePartAiSdk] }], + }, + // Format 2: file type with filename/file_data (OpenRouter style) - WITH plugin + { + format: 'file(OR)', + shape: 'A: file only', + messages: [{ role: 'user', content: [filePartOpenRouter] }], + plugins: fileParserPlugin, + }, + { + format: 'file(OR)', + shape: 'B: text+file', + messages: [{ role: 'user', content: [filePartOpenRouter, textPart] }], + plugins: fileParserPlugin, + }, + // Format 3: image_url type + { + format: 'image_url', + shape: 'A: file only', + messages: [{ role: 'user', content: [imageUrlPart] }], + }, + { + format: 'image_url', + shape: 'B: text+file', + messages: [{ role: 'user', content: [textPart, imageUrlPart] }], + }, + // Format 4: input_file type (OpenAI Responses API style) + { + format: 'input_file', + shape: 'A: file only', + messages: [{ role: 'user', content: [inputFilePart] }], + }, + { + format: 'input_file', + shape: 'B: text+file', + messages: [{ role: 'user', content: [textPart, inputFilePart] }], + }, + ]; + + console.log(`Testing ${tests.length} combinations...\n`); + + // Run tests sequentially to avoid rate limits + const results: TestResult[] = []; + for (const test of tests) { + console.log(` Testing ${test.format} / ${test.shape}...`); + const result = await testShape(test); + results.push(result); + } + + console.log('\n'); + + // Print results table + console.log( + '┌────────────┬─────────────┬─────────┬────────────┬────────────────────────────────────────┐', + ); + console.log( + '│ Format │ Shape │ HTTP OK │ Code Found │ Response/Error │', + ); + console.log( + '├────────────┼─────────────┼─────────┼────────────┼────────────────────────────────────────┤', + ); + + for (const r of results) { + const format = r.format.padEnd(10); + const shape = r.shape.padEnd(11); + const httpOk = r.httpOk ? '✓' : '✗'; + const code = r.codeFound ? '✓' : '✗'; + const detail = truncate(r.response ?? r.error ?? '', 38).padEnd(38); + console.log( + `│ ${format} │ ${shape} │ ${httpOk.padEnd(7)} │ ${code.padEnd(10)} │ ${detail} │`, + ); + } + + console.log( + '└────────────┴─────────────┴─────────┴────────────┴────────────────────────────────────────┘', + ); + + // Summary analysis + console.log('\n=== Summary by Format ==='); + + const formats = ['file(data)', 'file(OR)', 'image_url', 'input_file']; + for (const format of formats) { + const formatResults = results.filter((r) => r.format === format); + const httpOk = formatResults.filter((r) => r.httpOk).length; + const codeOk = formatResults.filter((r) => r.codeFound).length; + console.log(`${format.padEnd(12)}: HTTP OK: ${httpOk}/2, Code Found: ${codeOk}/2`); + } + + console.log('\n=== Summary by Shape ==='); + const shapes = ['A: file only', 'B: text+file']; + for (const shape of shapes) { + const shapeResults = results.filter((r) => r.shape === shape); + const httpOk = shapeResults.filter((r) => r.httpOk).length; + const codeOk = shapeResults.filter((r) => r.codeFound).length; + console.log(`${shape.padEnd(12)}: HTTP OK: ${httpOk}/4, Code Found: ${codeOk}/4`); + } + + // Determine if any format/shape works + const anyCodeFound = results.some((r) => r.codeFound); + const anyHttpOk = results.some((r) => r.httpOk); + + console.log('\n=== Conclusions ==='); + if (anyCodeFound) { + const working = results.filter((r) => r.codeFound); + console.log('Working combinations:'); + for (const w of working) { + console.log(` - ${w.format} / ${w.shape}`); + } + } else if (anyHttpOk) { + console.log('Some formats return HTTP 200 but model cannot read PDF content.'); + console.log('This suggests the PDF is not being properly passed to the model.'); + } else { + console.log('All formats fail with HTTP errors.'); + console.log('OpenRouter may not support inline PDF uploads for this model.'); + } + + // Exit code based on whether any test found the code + process.exit(anyCodeFound ? 0 : 1); +} + +main().catch(console.error); diff --git a/typescript/fetch/src/pdf-vs-image-min-repro/pdf-vs-image.ts b/typescript/fetch/src/pdf-vs-image-min-repro/pdf-vs-image.ts new file mode 100644 index 0000000..db73f0f --- /dev/null +++ b/typescript/fetch/src/pdf-vs-image-min-repro/pdf-vs-image.ts @@ -0,0 +1,237 @@ +/** + * Example 13: PDF vs Image Minimal Reproduction + * + * This test proves whether PDFs fail while images succeed for OpenAI models via OpenRouter. + * Uses raw fetch (NOT AI SDK) to isolate the issue. + * + * Expected results: + * - Test A (Image via image_url): SUCCESS - OpenAI supports images natively + * - Test B (PDF via image_url): FAIL - OpenAI rejects PDFs in image_url format + * - Test C (PDF via file + plugin): SUCCESS - FileParserPlugin converts PDF to text + * + * This demonstrates that AI SDK's current approach of sending PDFs as image_url is wrong. + * PDFs need to be sent using the "file" content type with FileParserPlugin enabled. + * + * To run: cd typescript/fetch && bun run src/pdf-vs-image-min-repro/pdf-vs-image.ts + */ + +import { readPdfAsDataUrl } from '@openrouter-examples/shared/fixtures'; + +const OPENROUTER_API_URL = 'https://openrouter.ai/api/v1/chat/completions'; +const MODEL = 'openai/gpt-4o-mini'; +const PROMPT = 'What file type was attached? Describe what you see briefly.'; + +// 1x1 red PNG pixel as base64 data URL (smallest valid PNG) +const TINY_RED_PNG_DATA_URL = + 'data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8z8DwHwAFBQIAX8jx0gAAAABJRU5ErkJggg=='; + +function truncate(str: string, maxLen = 200): string { + if (str.length <= maxLen) { + return str; + } + return str.slice(0, maxLen) + '... [truncated]'; +} + +interface TestResult { + name: string; + success: boolean; + httpStatus: number; + content: string; + error?: string; +} + +async function makeRequest( + name: string, + contentParts: Array<{ type: string; [key: string]: unknown }>, + withPlugin = false, +): Promise { + if (!process.env.OPENROUTER_API_KEY) { + throw new Error('OPENROUTER_API_KEY environment variable is not set'); + } + + const requestBody: Record = { + model: MODEL, + messages: [ + { + role: 'user', + content: contentParts, + }, + ], + max_tokens: 150, + }; + + // Add FileParserPlugin if requested + if (withPlugin) { + requestBody.plugins = [ + { + id: 'file-parser', + pdf: { + engine: 'mistral-ocr', + }, + }, + ]; + } + + try { + const response = await fetch(OPENROUTER_API_URL, { + method: 'POST', + headers: { + Authorization: `Bearer ${process.env.OPENROUTER_API_KEY}`, + 'Content-Type': 'application/json', + 'HTTP-Referer': 'https://github.com/openrouter/examples', + 'X-Title': 'PDF vs Image Min Repro', + }, + body: JSON.stringify(requestBody), + }); + + const responseText = await response.text(); + + if (!response.ok) { + return { + name, + success: false, + httpStatus: response.status, + content: '', + error: truncate(responseText), + }; + } + + const data = JSON.parse(responseText); + const content = data.choices?.[0]?.message?.content || '[No content]'; + + return { + name, + success: true, + httpStatus: response.status, + content: truncate(content), + }; + } catch (err) { + return { + name, + success: false, + httpStatus: 0, + content: '', + error: err instanceof Error ? truncate(err.message) : 'Unknown error', + }; + } +} + +function printResult(result: TestResult) { + const status = result.success ? '✅ SUCCESS' : '❌ FAILED'; + console.log(`\n${result.name}: ${status}`); + console.log(` HTTP Status: ${result.httpStatus}`); + if (result.success) { + console.log(` Response: ${result.content}`); + } else { + console.log(` Error: ${result.error}`); + } +} + +async function main() { + console.log('╔════════════════════════════════════════════════════════════════════════════╗'); + console.log('║ Example 13: PDF vs Image Minimal Reproduction Test ║'); + console.log('╚════════════════════════════════════════════════════════════════════════════╝'); + console.log(); + console.log(`Model: ${MODEL}`); + console.log(`Prompt: "${PROMPT}"`); + console.log(); + + // Prepare PDF data + const pdfDataUrl = await readPdfAsDataUrl('small'); + + console.log('Running 3 tests in parallel...'); + console.log(' A) Image via image_url (should work - native OpenAI support)'); + console.log(' B) PDF via image_url (should FAIL - OpenAI rejects PDFs here)'); + console.log(' C) PDF via file + plugin (should work - FileParserPlugin)'); + + // Test A: Image (using image_url format - OpenAI native) + const imagePromise = makeRequest('A) Image (image_url)', [ + { + type: 'image_url', + image_url: { + url: TINY_RED_PNG_DATA_URL, + }, + }, + { + type: 'text', + text: PROMPT, + }, + ]); + + // Test B: PDF (using image_url format - WRONG approach, what broken AI SDK does) + const pdfViaImageUrlPromise = makeRequest('B) PDF (image_url) - WRONG', [ + { + type: 'image_url', + image_url: { + url: pdfDataUrl, + }, + }, + { + type: 'text', + text: PROMPT, + }, + ]); + + // Test C: PDF (using file format with plugin - CORRECT approach) + const pdfViaFilePromise = makeRequest( + 'C) PDF (file + plugin) - CORRECT', + [ + { + type: 'file', + file: { + filename: 'small.pdf', + file_data: pdfDataUrl, + }, + }, + { + type: 'text', + text: PROMPT, + }, + ], + true, // withPlugin + ); + + const [imageResult, pdfViaImageUrlResult, pdfViaFileResult] = await Promise.all([ + imagePromise, + pdfViaImageUrlPromise, + pdfViaFilePromise, + ]); + + // Print results + console.log('\n════════════════════════════════════════════════════════════════════════════'); + console.log('RESULTS:'); + printResult(imageResult); + printResult(pdfViaImageUrlResult); + printResult(pdfViaFileResult); + + // Summary + console.log('\n════════════════════════════════════════════════════════════════════════════'); + console.log('SUMMARY:'); + + const expectedPattern = + imageResult.success && !pdfViaImageUrlResult.success && pdfViaFileResult.success; + + if (expectedPattern) { + console.log(''); + console.log('🔍 CONFIRMED: The issue is reproduced!'); + console.log(''); + console.log(' - Images work via image_url (OpenAI native support)'); + console.log(' - PDFs FAIL via image_url (OpenAI rejects non-image data URLs)'); + console.log(' - PDFs WORK via file + FileParserPlugin'); + console.log(''); + console.log(' CONCLUSION: AI SDK must NOT send PDFs as image_url.'); + console.log(' PDFs need the "file" content type with FileParserPlugin enabled.'); + } else { + console.log(''); + console.log('Results differ from expected pattern:'); + console.log(' Expected: A=success, B=fail, C=success'); + console.log( + ` Actual: A=${imageResult.success ? 'success' : 'fail'}, B=${pdfViaImageUrlResult.success ? 'success' : 'fail'}, C=${pdfViaFileResult.success ? 'success' : 'fail'}`, + ); + } +} + +main().catch((err) => { + console.error('Fatal error:', err instanceof Error ? err.message : String(err)); + process.exit(1); +}); diff --git a/typescript/shared/package.json b/typescript/shared/package.json index 6d2739a..f70fb98 100644 --- a/typescript/shared/package.json +++ b/typescript/shared/package.json @@ -9,7 +9,8 @@ "exports": { "./constants": "./src/constants.ts", "./types": "./src/types.ts", - "./fixtures": "./src/fixtures.ts" + "./fixtures": "./src/fixtures.ts", + "./request-cache": "./src/request-cache.ts" }, "devDependencies": { "@types/bun": "1.3.2", diff --git a/typescript/shared/src/json-sidecar.ts b/typescript/shared/src/json-sidecar.ts new file mode 100644 index 0000000..7f9cdbf --- /dev/null +++ b/typescript/shared/src/json-sidecar.ts @@ -0,0 +1,232 @@ +/** + * JSON Sidecar - Store large string values in separate files + * + * When stringifying, any string value over a threshold is replaced with a + * reference and the value is written to a separate file. + * + * Special handling for data URLs: the prefix (e.g., "data:application/pdf;base64,") + * is kept in the main file, only the base64 blob goes to the sidecar. + * + * Reference format: + * - Plain string: `__SIDECAR__:{hash}` + * - Data URL: `data:application/pdf;base64,__SIDECAR__:{hash}` + * + * This keeps the main JSON file small and readable while preserving large + * blobs (like base64 PDFs) in sidecars. + */ + +import { createHash } from 'node:crypto'; +import { existsSync, readFileSync, writeFileSync, mkdirSync } from 'node:fs'; +import { dirname, join } from 'node:path'; + +const SIDECAR_MARKER = '__SIDECAR__:'; +const DEFAULT_THRESHOLD = 1000; // Strings larger than this go to sidecar + +// Pattern to match data URLs: data:;base64, +const DATA_URL_REGEX = /^(data:[^;]+;base64,)(.+)$/; + +interface SidecarOptions { + /** Directory to store sidecar files (defaults to same dir as main file) */ + sidecarDir?: string; + /** Threshold in chars - strings larger than this go to sidecar (default: 1000) */ + threshold?: number; +} + +/** + * Generate a short hash for a string value + */ +function hashValue(value: string): string { + return createHash('sha256').update(value).digest('hex').slice(0, 12); +} + +/** + * Check if a string contains a sidecar reference + */ +function hasSidecarRef(value: string): boolean { + return value.includes(SIDECAR_MARKER); +} + +/** + * Create a sidecar reference string + */ +function makeSidecarRef(hash: string): string { + return `${SIDECAR_MARKER}${hash}`; +} + +/** + * Extract sidecar hash from a reference + */ +function extractSidecarHash(ref: string): string | null { + const idx = ref.indexOf(SIDECAR_MARKER); + if (idx === -1) { + return null; + } + return ref.slice(idx + SIDECAR_MARKER.length); +} + +/** + * Process a large string for storage - returns the reference string + * and the content to store in the sidecar. + */ +function processLargeString(value: string): { ref: string; content: string } { + const dataUrlMatch = value.match(DATA_URL_REGEX); + + if (dataUrlMatch) { + // Data URL: keep prefix in main file, store base64 in sidecar + const prefix = dataUrlMatch[1]; // e.g., "data:application/pdf;base64," + const base64Data = dataUrlMatch[2]; + const hash = hashValue(base64Data); + return { + ref: `${prefix}${makeSidecarRef(hash)}`, + content: base64Data, + }; + } + + // Plain large string: store entire value in sidecar + const hash = hashValue(value); + return { + ref: makeSidecarRef(hash), + content: value, + }; +} + +/** + * Restore a sidecar reference to its original value + */ +function restoreSidecarRef(value: string, sidecarDir: string): string { + const hash = extractSidecarHash(value); + if (!hash) { + return value; + } + + const sidecarPath = join(sidecarDir, `${hash}.sidecar`); + if (!existsSync(sidecarPath)) { + console.warn(`Sidecar file not found: ${sidecarPath}`); + return value; + } + + const content = readFileSync(sidecarPath, 'utf-8'); + + // Check if this was a data URL (has prefix before the marker) + const markerIdx = value.indexOf(SIDECAR_MARKER); + if (markerIdx > 0) { + // Restore data URL: prefix + content + const prefix = value.slice(0, markerIdx); + return prefix + content; + } + + // Plain sidecar reference + return content; +} + +/** + * Stringify JSON with large strings stored in sidecar files. + * + * @param value - The value to stringify + * @param mainFilePath - Path where the main JSON file will be written + * @param options - Sidecar options + * @returns The JSON string (with sidecar references for large values) + */ +export function stringify( + value: unknown, + mainFilePath: string, + options: SidecarOptions = {}, +): string { + const { threshold = DEFAULT_THRESHOLD } = options; + const sidecarDir = options.sidecarDir ?? dirname(mainFilePath); + const sidecars: Map = new Map(); + + // Ensure sidecar directory exists + if (!existsSync(sidecarDir)) { + mkdirSync(sidecarDir, { recursive: true }); + } + + // Replacer that extracts large strings + const replacer = (_key: string, val: unknown): unknown => { + if (typeof val === 'string' && val.length > threshold) { + const { ref, content } = processLargeString(val); + const hash = extractSidecarHash(ref); + if (hash) { + sidecars.set(`${hash}.sidecar`, content); + } + return ref; + } + return val; + }; + + const json = JSON.stringify(value, replacer, 2); + + // Write all sidecar files + for (const [filename, content] of sidecars) { + const sidecarPath = join(sidecarDir, filename); + writeFileSync(sidecarPath, content); + } + + return json; +} + +/** + * Parse JSON with sidecar references restored to original values. + * + * @param json - The JSON string to parse + * @param mainFilePath - Path where the main JSON file is located + * @param options - Sidecar options + * @returns The parsed value with sidecars restored + */ +export function parse( + json: string, + mainFilePath: string, + options: SidecarOptions = {}, +): T { + const sidecarDir = options.sidecarDir ?? dirname(mainFilePath); + + // Reviver that restores sidecar references + const reviver = (_key: string, val: unknown): unknown => { + if (typeof val === 'string' && hasSidecarRef(val)) { + return restoreSidecarRef(val, sidecarDir); + } + return val; + }; + + return JSON.parse(json, reviver) as T; +} + +/** + * Write a value to a JSON file with sidecars for large strings. + */ +export function writeFile( + filePath: string, + value: unknown, + options: SidecarOptions = {}, +): void { + const json = stringify(value, filePath, options); + writeFileSync(filePath, json); +} + +/** + * Read a JSON file and restore any sidecar references. + */ +export function readFile( + filePath: string, + options: SidecarOptions = {}, +): T { + const json = readFileSync(filePath, 'utf-8'); + return parse(json, filePath, options); +} + +/** + * Check if a parsed JSON object has any unresolved sidecar references + * (useful for debugging missing sidecars) + */ +export function hasUnresolvedRefs(obj: unknown): boolean { + if (typeof obj === 'string' && hasSidecarRef(obj)) { + return true; + } + if (Array.isArray(obj)) { + return obj.some(hasUnresolvedRefs); + } + if (obj && typeof obj === 'object') { + return Object.values(obj).some(hasUnresolvedRefs); + } + return false; +} diff --git a/typescript/shared/src/request-cache.ts b/typescript/shared/src/request-cache.ts new file mode 100644 index 0000000..ec60e51 --- /dev/null +++ b/typescript/shared/src/request-cache.ts @@ -0,0 +1,407 @@ +/** + * Request/Response caching for OpenRouter API calls + * + * This module provides caching to avoid hitting the API repeatedly during development. + * Cache is keyed by a hash of the request body (excluding volatile fields). + * + * Cache structure (folder per request): + * .cache/requests/{key}/ + * - meta.json - Small metadata: url, model, status, timestamp, summary, stack trace + * - request.json - Request body (large base64 strings in sidecars) + * - response.json - Response body + * - *.sidecar - Large string values (base64 blobs) + */ + +import { createHash } from 'node:crypto'; +import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'node:fs'; +import { dirname, join, relative } from 'node:path'; +import { fileURLToPath } from 'node:url'; +import * as JsonSidecar from './json-sidecar.js'; + +const __filename = fileURLToPath(import.meta.url); +const __dirname = dirname(__filename); +const CACHE_DIR = join(__dirname, '../../../.cache/requests'); +const SIDECAR_DIR = join(__dirname, '../../../.cache/sidecars'); // Shared sidecars to avoid duplication + +/** Metadata file - safe to read, contains no large blobs */ +export interface CacheMeta { + key: string; + url: string; + method: string; + model: string | null; + /** Provider routing config if specified */ + provider?: unknown; + status: number; + statusText: string; + timestamp: number; + /** ISO timestamp for human readability */ + timestampISO: string; + /** First 500 chars of response text for quick inspection */ + responseSummary: string; + /** Whether the request succeeded (2xx status) */ + success: boolean; + /** Error message if failed */ + errorMessage?: string; + /** Stack trace showing where the request originated */ + stackTrace: string[]; + /** Caller file (first non-library frame) */ + callerFile?: string; +} + +export interface CachedResponseBody { + /** Parsed JSON if body was valid JSON */ + json?: unknown; + /** Raw text if body was not valid JSON */ + text?: string; +} + +export interface CachedResponse { + status: number; + statusText: string; + headers: Record; + body: CachedResponseBody; + timestamp: number; +} + +/** + * Extract the relevant parts of a stack trace for debugging + */ +function getStackTrace(): { frames: string[]; callerFile?: string } { + const stack = new Error().stack ?? ''; + const lines = stack.split('\n').slice(1); // Remove "Error" line + + const frames: string[] = []; + let callerFile: string | undefined; + + for (const line of lines) { + const trimmed = line.trim(); + // Skip internal frames + if ( + trimmed.includes('request-cache.ts') || + trimmed.includes('node:internal') || + trimmed.includes('node_modules') + ) { + continue; + } + + // Extract file:line from the frame + const match = trimmed.match(/at\s+(?:.*?\s+)?\(?(.+?):(\d+):(\d+)\)?$/); + if (match) { + const [, filePath, line, col] = match; + // Make path relative to workspace + const relPath = filePath.startsWith('/') + ? relative(join(__dirname, '../../../../..'), filePath) + : filePath; + const frame = `${relPath}:${line}:${col}`; + frames.push(frame); + + // First frame is the caller + if (!callerFile) { + callerFile = frame; + } + } + } + + return { frames: frames.slice(0, 5), callerFile }; // Keep top 5 frames +} + +/** + * Generate a cache key from request details. + * Normalizes the body to improve cache hits (removes volatile fields). + */ +function getCacheKey(url: string, body: unknown): string { + const hash = createHash('sha256'); + hash.update(url); + + // Normalize body for better cache hits + const normalized = normalizeRequestBody(body); + hash.update(JSON.stringify(normalized)); + + return hash.digest('hex').slice(0, 16); +} + +/** + * Normalize request body to improve cache hits. + * Removes/normalizes volatile fields that don't affect the semantic request. + */ +function normalizeRequestBody(body: unknown): unknown { + if (!body || typeof body !== 'object') { + return body; + } + + const obj = body as Record; + const normalized: Record = {}; + + for (const [key, value] of Object.entries(obj)) { + // Keep most fields as-is + normalized[key] = value; + } + + return normalized; +} + +/** + * Get the cache folder path for a given key + */ +function getCacheFolder(key: string): string { + return join(CACHE_DIR, key); +} + +/** + * Get file paths for cache entry (inside a folder) + */ +function getCachePaths(key: string) { + const folder = getCacheFolder(key); + return { + folder, + meta: join(folder, 'meta.json'), + request: join(folder, 'request.json'), + response: join(folder, 'response.json'), + }; +} + +/** + * Extract model from request body + */ +function extractModel(body: unknown): string | null { + if (body && typeof body === 'object' && 'model' in body) { + return String((body as { model: unknown }).model); + } + return null; +} + +/** + * Extract provider config from request body + */ +function extractProvider(body: unknown): unknown | undefined { + if (body && typeof body === 'object' && 'provider' in body) { + return (body as { provider: unknown }).provider; + } + return undefined; +} + +/** + * Extract error message from response + */ +function extractErrorMessage(body: CachedResponseBody): string | undefined { + if (body.json && typeof body.json === 'object') { + const json = body.json as Record; + if (json.error && typeof json.error === 'object') { + const error = json.error as Record; + if (typeof error.message === 'string') { + return error.message; + } + } + } + return undefined; +} + +/** + * Get response summary for quick inspection + */ +function getResponseSummary(body: CachedResponseBody, maxLen = 500): string { + if (body.json) { + const str = JSON.stringify(body.json); + return str.length > maxLen ? str.slice(0, maxLen) + '...' : str; + } + if (body.text) { + return body.text.length > maxLen ? body.text.slice(0, maxLen) + '...' : body.text; + } + return ''; +} + +/** + * Check if a cached response exists and is valid + */ +export function getCachedResponse( + url: string, + body: unknown, +): { meta: CacheMeta; response: CachedResponse } | null { + const key = getCacheKey(url, body); + const paths = getCachePaths(key); + + if (!existsSync(paths.meta) || !existsSync(paths.response)) { + return null; + } + + try { + const meta = JSON.parse(readFileSync(paths.meta, 'utf-8')) as CacheMeta; + const response = JSON.parse(readFileSync(paths.response, 'utf-8')) as CachedResponse; + return { meta, response }; + } catch { + return null; + } +} + +/** + * Save a response to the cache (in a folder) + */ +export function cacheResponse( + url: string, + requestBody: unknown, + response: CachedResponse, + stackInfo?: { frames: string[]; callerFile?: string }, +): void { + const key = getCacheKey(url, requestBody); + const paths = getCachePaths(key); + + // Ensure cache folder exists + if (!existsSync(paths.folder)) { + mkdirSync(paths.folder, { recursive: true }); + } + + const model = extractModel(requestBody); + const provider = extractProvider(requestBody); + const success = response.status >= 200 && response.status < 300; + const errorMessage = success ? undefined : extractErrorMessage(response.body); + const { frames, callerFile } = stackInfo ?? getStackTrace(); + + // Write metadata (small, safe to read) + const meta: CacheMeta = { + key, + url, + method: 'POST', + model, + provider, + status: response.status, + statusText: response.statusText, + timestamp: response.timestamp, + timestampISO: new Date(response.timestamp).toISOString(), + responseSummary: getResponseSummary(response.body), + success, + errorMessage, + stackTrace: frames, + callerFile, + }; + writeFileSync(paths.meta, JSON.stringify(meta, null, 2)); + + // Ensure shared sidecar directory exists + if (!existsSync(SIDECAR_DIR)) { + mkdirSync(SIDECAR_DIR, { recursive: true }); + } + + // Write full request body using SHARED sidecar dir to avoid duplicating large blobs + JsonSidecar.writeFile(paths.request, requestBody, { sidecarDir: SIDECAR_DIR }); + + // Write full response (typically small, but use sidecar just in case) + JsonSidecar.writeFile(paths.response, response, { sidecarDir: SIDECAR_DIR }); +} + +/** + * Create a cached fetch function for OpenRouter API calls + * + * @param options.enabled - Whether caching is enabled (default: true) + * @param options.ttlMs - Cache TTL in milliseconds (default: 1 hour) + * @returns A fetch function that caches responses + */ +export function createCachedFetch( + options: { enabled?: boolean; ttlMs?: number } = {}, +): typeof fetch { + const { enabled = true, ttlMs = 60 * 60 * 1000 } = options; + + return async (input: RequestInfo | URL, init?: RequestInit): Promise => { + const url = typeof input === 'string' ? input : input.toString(); + + // Capture stack trace early (before async operations) + const stackInfo = getStackTrace(); + + // Only cache POST requests with JSON body + if (!enabled || init?.method !== 'POST' || !init.body) { + return fetch(input, init); + } + + let requestBody: unknown; + try { + requestBody = JSON.parse(init.body as string); + } catch { + return fetch(input, init); + } + + // Check cache + const cached = getCachedResponse(url, requestBody); + if (cached) { + const age = Date.now() - cached.response.timestamp; + if (age < ttlMs) { + const model = cached.meta.model ?? 'unknown'; + console.log(`[CACHE HIT] ${model} (age: ${Math.round(age / 1000)}s)`); + // Reconstruct body from cached format + const bodyText = + cached.response.body.json !== undefined + ? JSON.stringify(cached.response.body.json) + : (cached.response.body.text ?? ''); + return new Response(bodyText, { + status: cached.response.status, + statusText: cached.response.statusText, + headers: cached.response.headers, + }); + } + console.log(`[CACHE EXPIRED] ${cached.meta.model ?? url}`); + } + + // Make actual request + const model = extractModel(requestBody); + const provider = extractProvider(requestBody); + const providerInfo = provider ? ` via ${JSON.stringify(provider)}` : ''; + console.log(`[CACHE MISS] ${model ?? url}${providerInfo}`); + + const response = await fetch(input, init); + + // Clone response to read body without consuming it + const clone = response.clone(); + const bodyText = await clone.text(); + + // Try to parse as JSON, fall back to text + let body: CachedResponseBody; + try { + body = { json: JSON.parse(bodyText) }; + } catch { + body = { text: bodyText }; + } + + // Cache the response + const headers: Record = {}; + response.headers.forEach((value, key) => { + headers[key] = value; + }); + + cacheResponse( + url, + requestBody, + { + status: response.status, + statusText: response.statusText, + headers, + body, + timestamp: Date.now(), + }, + stackInfo, + ); + + // Return a new response with the same body + return new Response(bodyText, { + status: response.status, + statusText: response.statusText, + headers: response.headers, + }); + }; +} + +/** + * Truncate base64 data in objects for logging + */ +export function truncateForLog(obj: unknown, maxLen = 100): unknown { + if (typeof obj === 'string') { + return obj.length > maxLen ? obj.slice(0, maxLen) + `... [${obj.length} chars]` : obj; + } + if (Array.isArray(obj)) { + return obj.map((item) => truncateForLog(item, maxLen)); + } + if (obj && typeof obj === 'object') { + const result: Record = {}; + for (const [key, value] of Object.entries(obj)) { + result[key] = truncateForLog(value, maxLen); + } + return result; + } + return obj; +}