Skip to content

Commit ea569cd

Browse files
zthmediremi
andauthored
fix JS regex literal parsing in char classes (#7790)
* fix JS regex literal parsing in char classes * Simplify regex character class parsing and reject more invalid regexes * Add CHANGELOG entry --------- Co-authored-by: Médi-Rémi Hashim <[email protected]>
1 parent 644ccd8 commit ea569cd

File tree

6 files changed

+50
-277
lines changed

6 files changed

+50
-277
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626

2727
#### :bug: Bug fix
2828

29+
- Fix JS regex literal parsing in character classes. https://github.com/rescript-lang/rescript/pull/7790
2930
- Fix creating interface for functions with upper bounded polymorphic args. https://github.com/rescript-lang/rescript/pull/7786
3031

3132
#### :memo: Documentation

compiler/syntax/src/res_scanner.ml

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -584,9 +584,9 @@ let scan_regex scanner =
584584
bring_buf_up_to_date ~start_offset:last_char_offset;
585585
Buffer.contents buf)
586586
in
587-
let rec scan () =
587+
let rec scan ?(in_char_class = false) () =
588588
match scanner.ch with
589-
| '/' ->
589+
| '/' when not in_char_class ->
590590
let last_char_offset = scanner.offset in
591591
next scanner;
592592
let pattern = result ~first_char_offset ~last_char_offset in
@@ -610,10 +610,16 @@ let scan_regex scanner =
610610
| '\\' ->
611611
next scanner;
612612
next scanner;
613-
scan ()
613+
scan ~in_char_class ()
614+
| '[' when not in_char_class ->
615+
next scanner;
616+
scan ~in_char_class:true ()
617+
| ']' when in_char_class ->
618+
next scanner;
619+
scan ~in_char_class:false ()
614620
| _ ->
615621
next scanner;
616-
scan ()
622+
scan ~in_char_class ()
617623
in
618624
let pattern, flags = scan () in
619625
let end_pos = position scanner in
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
2+
Syntax error!
3+
syntax_tests/data/parsing/errors/expressions/regexCharacterClasses.res:1:14
4+
5+
1 │ let re = /[]/]/
6+
2 │
7+
8+
I'm not sure what to parse here when looking at "]".
9+
10+
11+
Syntax error!
12+
syntax_tests/data/parsing/errors/expressions/regexCharacterClasses.res:1:16
13+
14+
1 │ let re = /[]/]/
15+
2 │
16+
17+
unterminated regex
18+
19+
let re = [%re {js|/[]/|js}]
20+
;;[%re {js|//|js}]
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
let re = /[]/]/

tests/syntax_tests/data/parsing/grammar/expressions/expected/regex.res.txt

Lines changed: 7 additions & 135 deletions
Original file line numberDiff line numberDiff line change
@@ -76,10 +76,7 @@ let re = [%re {js|/a[b-d]/|js}]
7676
let re = [%re {js|/a[-b]/|js}]
7777
let re = [%re {js|/a[\\-b]/|js}]
7878
let re = [%re {js|/a[]b/|js}]
79-
let re = [%re {js|/a[/|js}]
8079
let re = [%re {js|/a\\/|js}]
81-
let re = [%re {js|/abc)/|js}]
82-
let re = [%re {js|/(abc/|js}]
8380
let re = [%re {js|/a]/|js}]
8481
let re = [%re {js|/a[]]b/|js}]
8582
let re = [%re {js|/a[\\]]b/|js}]
@@ -239,7 +236,6 @@ let re = [%re {js|/a[-b]/|js}]
239236
let re = [%re {js|/a[b-]/|js}]
240237
let re = [%re {js|/a[b-a]/|js}]
241238
let re = [%re {js|/a[]b/|js}]
242-
let re = [%re {js|/a[/|js}]
243239
let re = [%re {js|/a]/|js}]
244240
let re = [%re {js|/a[]]b/|js}]
245241
let re = [%re {js|/a[^bc]d/|js}]
@@ -258,8 +254,6 @@ let re = [%re {js|/a\\(b/|js}]
258254
let re = [%re {js|/a\\(*b/|js}]
259255
let re = [%re {js|/a\\(*b/|js}]
260256
let re = [%re {js|/a\\\\b/|js}]
261-
let re = [%re {js|/abc)/|js}]
262-
let re = [%re {js|/(abc/|js}]
263257
let re = [%re {js|/((a))/|js}]
264258
let re = [%re {js|/(a)b(c)/|js}]
265259
let re = [%re {js|/a+b+c/|js}]
@@ -317,134 +311,6 @@ let re = [%re {js|/[k]/|js}]
317311
let re = [%re {js|/a[-]?c/|js}]
318312
let re = [%re {js|/(abc)\\1/|js}]
319313
let re = [%re {js|/([a-c]*)\\1/|js}]
320-
let re = [%re {js|/(?i)abc/|js}]
321-
let re = [%re {js|/(?i)abc/|js}]
322-
let re = [%re {js|/(?i)abc/|js}]
323-
let re = [%re {js|/(?i)abc/|js}]
324-
let re = [%re {js|/(?i)abc/|js}]
325-
let re = [%re {js|/(?i)abc/|js}]
326-
let re = [%re {js|/(?i)ab*c/|js}]
327-
let re = [%re {js|/(?i)ab*bc/|js}]
328-
let re = [%re {js|/(?i)ab*bc/|js}]
329-
let re = [%re {js|/(?i)ab*?bc/|js}]
330-
let re = [%re {js|/(?i)ab{0,}?bc/|js}]
331-
let re = [%re {js|/(?i)ab+?bc/|js}]
332-
let re = [%re {js|/(?i)ab+bc/|js}]
333-
let re = [%re {js|/(?i)ab+bc/|js}]
334-
let re = [%re {js|/(?i)ab{1,}bc/|js}]
335-
let re = [%re {js|/(?i)ab+bc/|js}]
336-
let re = [%re {js|/(?i)ab{1,}?bc/|js}]
337-
let re = [%re {js|/(?i)ab{1,3}?bc/|js}]
338-
let re = [%re {js|/(?i)ab{3,4}?bc/|js}]
339-
let re = [%re {js|/(?i)ab{4,5}?bc/|js}]
340-
let re = [%re {js|/(?i)ab??bc/|js}]
341-
let re = [%re {js|/(?i)ab??bc/|js}]
342-
let re = [%re {js|/(?i)ab{0,1}?bc/|js}]
343-
let re = [%re {js|/(?i)ab??bc/|js}]
344-
let re = [%re {js|/(?i)ab??c/|js}]
345-
let re = [%re {js|/(?i)ab{0,1}?c/|js}]
346-
let re = [%re {js|/(?i)^abc$/|js}]
347-
let re = [%re {js|/(?i)^abc$/|js}]
348-
let re = [%re {js|/(?i)^abc/|js}]
349-
let re = [%re {js|/(?i)^abc$/|js}]
350-
let re = [%re {js|/(?i)abc$/|js}]
351-
let re = [%re {js|/(?i)^/|js}]
352-
let re = [%re {js|/(?i)$/|js}]
353-
let re = [%re {js|/(?i)a.c/|js}]
354-
let re = [%re {js|/(?i)a.c/|js}]
355-
let re = [%re {js|/(?i)a.*?c/|js}]
356-
let re = [%re {js|/(?i)a.*c/|js}]
357-
let re = [%re {js|/(?i)a[bc]d/|js}]
358-
let re = [%re {js|/(?i)a[bc]d/|js}]
359-
let re = [%re {js|/(?i)a[b-d]e/|js}]
360-
let re = [%re {js|/(?i)a[b-d]e/|js}]
361-
let re = [%re {js|/(?i)a[b-d]/|js}]
362-
let re = [%re {js|/(?i)a[-b]/|js}]
363-
let re = [%re {js|/(?i)a[b-]/|js}]
364-
let re = [%re {js|/(?i)a[b-a]/|js}]
365-
let re = [%re {js|/(?i)a[]b/|js}]
366-
let re = [%re {js|/(?i)a[/|js}]
367-
let re = [%re {js|/(?i)a]/|js}]
368-
let re = [%re {js|/(?i)a[]]b/|js}]
369-
let re = [%re {js|/(?i)a[^bc]d/|js}]
370-
let re = [%re {js|/(?i)a[^bc]d/|js}]
371-
let re = [%re {js|/(?i)a[^-b]c/|js}]
372-
let re = [%re {js|/(?i)a[^-b]c/|js}]
373-
let re = [%re {js|/(?i)a[^]b]c/|js}]
374-
let re = [%re {js|/(?i)a[^]b]c/|js}]
375-
let re = [%re {js|/(?i)ab|cd/|js}]
376-
let re = [%re {js|/(?i)ab|cd/|js}]
377-
let re = [%re {js|/(?i)()ef/|js}]
378-
let re = [%re {js|/(?i)*a/|js}]
379-
let re = [%re {js|/(?i)(*)b/|js}]
380-
let re = [%re {js|/(?i)$b/|js}]
381-
let re = [%re {js|/(?i)a\\/|js}]
382-
let re = [%re {js|/(?i)a\\(b/|js}]
383-
let re = [%re {js|/(?i)a\\(*b/|js}]
384-
let re = [%re {js|/(?i)a\\(*b/|js}]
385-
let re = [%re {js|/(?i)a\\\\b/|js}]
386-
let re = [%re {js|/(?i)abc)/|js}]
387-
let re = [%re {js|/(?i)(abc/|js}]
388-
let re = [%re {js|/(?i)((a))/|js}]
389-
let re = [%re {js|/(?i)(a)b(c)/|js}]
390-
let re = [%re {js|/(?i)a+b+c/|js}]
391-
let re = [%re {js|/(?i)a{1,}b{1,}c/|js}]
392-
let re = [%re {js|/(?i)a**/|js}]
393-
let re = [%re {js|/(?i)a.+?c/|js}]
394-
let re = [%re {js|/(?i)a.*?c/|js}]
395-
let re = [%re {js|/(?i)a.{0,5}?c/|js}]
396-
let re = [%re {js|/(?i)(a+|b)*/|js}]
397-
let re = [%re {js|/(?i)(a+|b){0,}/|js}]
398-
let re = [%re {js|/(?i)(a+|b)+/|js}]
399-
let re = [%re {js|/(?i)(a+|b){1,}/|js}]
400-
let re = [%re {js|/(?i)(a+|b)?/|js}]
401-
let re = [%re {js|/(?i)(a+|b){0,1}/|js}]
402-
let re = [%re {js|/(?i)(a+|b){0,1}?/|js}]
403-
let re = [%re {js|/(?i))(/|js}]
404-
let re = [%re {js|/(?i)[^ab]*/|js}]
405-
let re = [%re {js|/(?i)abc/|js}]
406-
let re = [%re {js|/(?i)a*/|js}]
407-
let re = [%re {js|/(?i)([abc])*d/|js}]
408-
let re = [%re {js|/(?i)([abc])*bcd/|js}]
409-
let re = [%re {js|/(?i)a|b|c|d|e/|js}]
410-
let re = [%re {js|/(?i)(a|b|c|d|e)f/|js}]
411-
let re = [%re {js|/(?i)abcd*efg/|js}]
412-
let re = [%re {js|/(?i)ab*/|js}]
413-
let re = [%re {js|/(?i)ab*/|js}]
414-
let re = [%re {js|/(?i)(ab|cd)e/|js}]
415-
let re = [%re {js|/(?i)[abhgefdc]ij/|js}]
416-
let re = [%re {js|/(?i)^(ab|cd)e/|js}]
417-
let re = [%re {js|/(?i)(abc|)ef/|js}]
418-
let re = [%re {js|/(?i)(a|b)c*d/|js}]
419-
let re = [%re {js|/(?i)(ab|ab*)bc/|js}]
420-
let re = [%re {js|/(?i)a([bc]*)c*/|js}]
421-
let re = [%re {js|/(?i)a([bc]*)(c*d)/|js}]
422-
let re = [%re {js|/(?i)a([bc]+)(c*d)/|js}]
423-
let re = [%re {js|/(?i)a([bc]*)(c+d)/|js}]
424-
let re = [%re {js|/(?i)a[bcd]*dcdcde/|js}]
425-
let re = [%re {js|/(?i)a[bcd]+dcdcde/|js}]
426-
let re = [%re {js|/(?i)(ab|a)b*c/|js}]
427-
let re = [%re {js|/(?i)((a)(b)c)(d)/|js}]
428-
let re = [%re {js|/(?i)[a-zA-Z_][a-zA-Z0-9_]*/|js}]
429-
let re = [%re {js|/(?i)^a(bc+|b[eh])g|.h$/|js}]
430-
let re = [%re {js|/(?i)(bc+d$|ef*g.|h?i(j|k))/|js}]
431-
let re = [%re {js|/(?i)(bc+d$|ef*g.|h?i(j|k))/|js}]
432-
let re = [%re {js|/(?i)(bc+d$|ef*g.|h?i(j|k))/|js}]
433-
let re = [%re {js|/(?i)(bc+d$|ef*g.|h?i(j|k))/|js}]
434-
let re = [%re {js|/(?i)(bc+d$|ef*g.|h?i(j|k))/|js}]
435-
let re = [%re {js|/(?i)((((((((((a))))))))))/|js}]
436-
let re = [%re {js|/(?i)((((((((((a))))))))))\\10/|js}]
437-
let re = [%re {js|/(?i)(((((((((a)))))))))/|js}]
438-
let re = [%re {js|/(?i)(?:(?:(?:(?:(?:(?:(?:(?:(?:(a))))))))))/|js}]
439-
let re = [%re {js|/(?i)(?:(?:(?:(?:(?:(?:(?:(?:(?:(a|b|c))))))))))/|js}]
440-
let re = [%re {js|/(?i)multiple words of text/|js}]
441-
let re = [%re {js|/(?i)multiple words/|js}]
442-
let re = [%re {js|/(?i)(.*)c(.*)/|js}]
443-
let re = [%re {js|/(?i)\\((.*), (.*)\\)/|js}]
444-
let re = [%re {js|/(?i)[k]/|js}]
445-
let re = [%re {js|/(?i)a[-]?c/|js}]
446-
let re = [%re {js|/(?i)(abc)\\1/|js}]
447-
let re = [%re {js|/(?i)([a-c]*)\\1/|js}]
448314
let re = [%re {js|/a(?!b)./|js}]
449315
let re = [%re {js|/a(?=d)./|js}]
450316
let re = [%re {js|/a(?=c|d)./|js}]
@@ -499,4 +365,10 @@ let re = [%re {js|/^a*?$/|js}]
499365
let re = [%re {js|/^((a)c)?(ab)$/|js}]
500366
let re = [%re {js|/^([ab]*?)(?=(b)?)c/|js}]
501367
let re = [%re {js|/^([ab]*?)(?!(b))c/|js}]
502-
let re = [%re {js|/^([ab]*?)(?<!(a))c/|js}]
368+
let re = [%re {js|/^([ab]*?)(?<!(a))c/|js}]
369+
let re = [%re {js|/\.[^/.]+$/|js}]
370+
let re = [%re {js|/[^]]/|js}]
371+
let re = [%re {js|/[/]/|js}]
372+
let re = [%re {js|/[]]/|js}]
373+
let re = [%re {js|/[\]]/|js}]
374+
let re = [%re {js|/[[]]/|js}]

0 commit comments

Comments
 (0)