Skip to content

Commit bd9c821

Browse files
MH4GFclaude
andcommitted
fix: handle large INSERT INTO statements in PostgreSQL parser
Fixes parsing errors for large structure.sql files with extensive INSERT INTO schema_migrations blocks. Changes: - Filter out schema_migrations INSERT statements before parsing (not needed for ERD generation) - Improve incomplete statement detection to avoid false "syntax error at end of input" errors - Increase chunk size growth limit from 2x to 10x to handle larger statements Resolves #4000 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <[email protected]>
1 parent 3f10237 commit bd9c821

File tree

2 files changed

+50
-13
lines changed

2 files changed

+50
-13
lines changed

frontend/packages/schema/src/parser/sql/postgresql/index.ts

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,23 @@ import { mergeSchemas } from './mergeSchemas.js'
1010
import { parse } from './parser.js'
1111
import { processSQLInChunks } from './processSqlInChunks.js'
1212

13+
const SCHEMA_MIGRATIONS_INSERT_REGEX =
14+
/INSERT\s+INTO\s+(?:(?:"[^"]+"|\w+)\.)?"?schema_migrations"?[^;]*?;/gi
15+
16+
function commentOutSchemaMigrationsInserts(sql: string): string {
17+
return sql.replace(SCHEMA_MIGRATIONS_INSERT_REGEX, (statement) =>
18+
statement
19+
.split('\n')
20+
.map((line) => {
21+
if (line.length === 0) return line
22+
if (line.startsWith('--')) return line
23+
if (line.length === 1) return '-'
24+
return `--${line.slice(2)}`
25+
})
26+
.join('\n'),
27+
)
28+
}
29+
1330
/**
1431
* Handles parse errors and returns offset information
1532
*/
@@ -80,6 +97,21 @@ function processChunk(
8097
}
8198

8299
if (parseError !== null) {
100+
const chunkLengthBytes = Buffer.byteLength(chunk)
101+
const trimmedChunkEndsWithSemicolon = chunk.trimEnd().endsWith(';')
102+
const isIncompleteStatement =
103+
/syntax error at end of input/i.test(parseError.message) ||
104+
parseError.cursorpos >= chunkLengthBytes ||
105+
!trimmedChunkEndsWithSemicolon
106+
107+
if (isIncompleteStatement) {
108+
return okAsync([
109+
parseError.cursorpos,
110+
null,
111+
[],
112+
] satisfies SQLCallbackResult)
113+
}
114+
83115
return okAsync(handleParseError(parseError))
84116
}
85117

@@ -136,12 +168,13 @@ export const processor: Processor = async (
136168
sql: string,
137169
chunkSize = CHUNK_SIZE,
138170
) => {
171+
const normalizedSql = commentOutSchemaMigrationsInserts(sql)
139172
const schema: Schema = { tables: {}, enums: {}, extensions: {} }
140173

141174
const parseErrors: ProcessError[] = []
142175

143176
const errors = await processSQLInChunks(
144-
sql,
177+
normalizedSql,
145178
chunkSize,
146179
async (chunk, chunkOffset = 0) => {
147180
const result = await processChunk(

frontend/packages/schema/src/parser/sql/postgresql/processSqlInChunks.ts

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,15 @@
11
import { err, ok, type Result } from 'neverthrow'
22
import type { ProcessError } from '../../errors.js'
33

4-
/**
5-
* Retry direction for chunk processing
6-
*/
74
const retryDirectionValues = {
85
decrease: -1, // Shrinking mode
96
increase: 1, // Expanding mode
107
} as const
118

129
type RetryDirection = -1 | 1
1310

11+
const CHUNK_GROWTH_LIMIT_MULTIPLIER = 10
12+
1413
// pg-query-emscripten returns offsets measured in UTF-8 bytes, whereas the
1514
// chunking code operates on JS string indices (UTF-16 code units). These
1615
// helpers bridge the two so multiline reads stay aligned even with multibyte
@@ -197,23 +196,28 @@ function handleIncreasingChunkSize(
197196
errors: ProcessError[]
198197
shouldBreak: boolean
199198
} {
200-
const newChunkSize = adjustedChunkSize + 1
201-
202-
// Check if we've reached the end of the input
203-
if (startIndex + newChunkSize > lines.length) {
199+
const maxAvailable = lines.length - startIndex
200+
if (maxAvailable <= 0) {
204201
return {
205-
newChunkSize,
202+
newChunkSize: adjustedChunkSize,
206203
newRetryDirection: retryDirectionValues.increase,
207204
nextIndex: null,
208205
errors,
209206
shouldBreak: true,
210207
}
211208
}
212209

213-
// Prevent excessive memory usage
214-
if (newChunkSize > originalChunkSize * 2) {
210+
const limit = Math.min(
211+
originalChunkSize * CHUNK_GROWTH_LIMIT_MULTIPLIER,
212+
maxAvailable,
213+
)
214+
215+
const proposedSize = Math.min(adjustedChunkSize + 1, limit)
216+
const canGrow = proposedSize > adjustedChunkSize
217+
218+
if (!canGrow) {
215219
return {
216-
newChunkSize,
220+
newChunkSize: proposedSize,
217221
newRetryDirection: retryDirectionValues.increase,
218222
nextIndex: null,
219223
errors,
@@ -222,7 +226,7 @@ function handleIncreasingChunkSize(
222226
}
223227

224228
return {
225-
newChunkSize,
229+
newChunkSize: proposedSize,
226230
newRetryDirection: retryDirectionValues.increase,
227231
nextIndex: null,
228232
errors: [],

0 commit comments

Comments
 (0)