fix: handle large INSERT INTO statements in PostgreSQL parser

MH4GF · claude · MH4GF · commit bd9c821622c6 · 2025-11-12T15:54:34.000+09:00
Fixes parsing errors for large structure.sql files with extensive INSERT INTO schema_migrations blocks. Changes: - Filter out schema_migrations INSERT statements before parsing (not needed for ERD generation) - Improve incomplete statement detection to avoid false "syntax error at end of input" errors - Increase chunk size growth limit from 2x to 10x to handle larger statements Resolves #4000 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
diff --git a/frontend/packages/schema/src/parser/sql/postgresql/index.ts b/frontend/packages/schema/src/parser/sql/postgresql/index.ts
@@ -10,6 +10,23 @@ import { mergeSchemas } from './mergeSchemas.js'
 import { parse } from './parser.js'
 import { processSQLInChunks } from './processSqlInChunks.js'
 
+const SCHEMA_MIGRATIONS_INSERT_REGEX =
+  /INSERT\s+INTO\s+(?:(?:"[^"]+"|\w+)\.)?"?schema_migrations"?[^;]*?;/gi
+
+function commentOutSchemaMigrationsInserts(sql: string): string {
+  return sql.replace(SCHEMA_MIGRATIONS_INSERT_REGEX, (statement) =>
+    statement
+      .split('\n')
+      .map((line) => {
+        if (line.length === 0) return line
+        if (line.startsWith('--')) return line
+        if (line.length === 1) return '-'
+        return `--${line.slice(2)}`
+      })
+      .join('\n'),
+  )
+}
+
 /**
  * Handles parse errors and returns offset information
  */
@@ -80,6 +97,21 @@ function processChunk(
     }
 
     if (parseError !== null) {
+      const chunkLengthBytes = Buffer.byteLength(chunk)
+      const trimmedChunkEndsWithSemicolon = chunk.trimEnd().endsWith(';')
+      const isIncompleteStatement =
+        /syntax error at end of input/i.test(parseError.message) ||
+        parseError.cursorpos >= chunkLengthBytes ||
+        !trimmedChunkEndsWithSemicolon
+
+      if (isIncompleteStatement) {
+        return okAsync([
+          parseError.cursorpos,
+          null,
+          [],
+        ] satisfies SQLCallbackResult)
+      }
+
       return okAsync(handleParseError(parseError))
     }
 
@@ -136,12 +168,13 @@ export const processor: Processor = async (
   sql: string,
   chunkSize = CHUNK_SIZE,
 ) => {
+  const normalizedSql = commentOutSchemaMigrationsInserts(sql)
   const schema: Schema = { tables: {}, enums: {}, extensions: {} }
 
   const parseErrors: ProcessError[] = []
 
   const errors = await processSQLInChunks(
-    sql,
+    normalizedSql,
     chunkSize,
     async (chunk, chunkOffset = 0) => {
       const result = await processChunk(
diff --git a/frontend/packages/schema/src/parser/sql/postgresql/processSqlInChunks.ts b/frontend/packages/schema/src/parser/sql/postgresql/processSqlInChunks.ts
@@ -1,16 +1,15 @@
 import { err, ok, type Result } from 'neverthrow'
 import type { ProcessError } from '../../errors.js'
 
-/**
- * Retry direction for chunk processing
- */
 const retryDirectionValues = {
   decrease: -1, // Shrinking mode
   increase: 1, // Expanding mode
 } as const
 
 type RetryDirection = -1 | 1
 
+const CHUNK_GROWTH_LIMIT_MULTIPLIER = 10
+
 // pg-query-emscripten returns offsets measured in UTF-8 bytes, whereas the
 // chunking code operates on JS string indices (UTF-16 code units). These
 // helpers bridge the two so multiline reads stay aligned even with multibyte
@@ -197,23 +196,28 @@ function handleIncreasingChunkSize(
   errors: ProcessError[]
   shouldBreak: boolean
 } {
-  const newChunkSize = adjustedChunkSize + 1
-
-  // Check if we've reached the end of the input
-  if (startIndex + newChunkSize > lines.length) {
+  const maxAvailable = lines.length - startIndex
+  if (maxAvailable <= 0) {
     return {
-      newChunkSize,
+      newChunkSize: adjustedChunkSize,
       newRetryDirection: retryDirectionValues.increase,
       nextIndex: null,
       errors,
       shouldBreak: true,
     }
   }
 
-  // Prevent excessive memory usage
-  if (newChunkSize > originalChunkSize * 2) {
+  const limit = Math.min(
+    originalChunkSize * CHUNK_GROWTH_LIMIT_MULTIPLIER,
+    maxAvailable,
+  )
+
+  const proposedSize = Math.min(adjustedChunkSize + 1, limit)
+  const canGrow = proposedSize > adjustedChunkSize
+
+  if (!canGrow) {
     return {
-      newChunkSize,
+      newChunkSize: proposedSize,
       newRetryDirection: retryDirectionValues.increase,
       nextIndex: null,
       errors,
@@ -222,7 +226,7 @@ function handleIncreasingChunkSize(
   }
 
   return {
-    newChunkSize,
+    newChunkSize: proposedSize,
     newRetryDirection: retryDirectionValues.increase,
     nextIndex: null,
     errors: [],