Skip to content

Commit dbf98f4

Browse files
committed
Add ES2022 d flag and ES2024 v flag support to RegExp
Implements foundational support for RegExp hasIndices (d) and unicodeSets (v) flags. Changes: - Add flag constants JSREG_HASINDICES (0x40) and JSREG_UNICODESETS (0x80) - Update TokenStream to accept d and v flags in regexp literals - Add hasIndices and unicodeSets properties to RegExp.prototype - Implement flag validation (u/v mutual exclusion per ES2024 spec) - Ensure alphabetical flag ordering per ES spec - Add comprehensive test suite - Update test262.properties with new flag support What works: - Flag recognition and parsing - Properties return correct boolean values - Flag validation prevents invalid combinations - All existing tests pass (backward compatible) Not yet implemented: - Actual indices array for d flag (requires regexp engine changes) - Unicode set operations for v flag (requires parser rewrite) This provides the foundation for full implementation while maintaining compatibility with existing code. Addresses #976 (ES2022 d flag) and partially addresses #1350 (ES2024 v flag)
1 parent 8b20fb2 commit dbf98f4

File tree

4 files changed

+319
-31
lines changed

4 files changed

+319
-31
lines changed

rhino/src/main/java/org/mozilla/javascript/TokenStream.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1571,6 +1571,8 @@ void readRegExp(int startToken) throws IOException {
15711571
else if (matchChar('s')) addToString('s');
15721572
else if (matchChar('y')) addToString('y');
15731573
else if (matchChar('u')) addToString('u');
1574+
else if (matchChar('d')) addToString('d');
1575+
else if (matchChar('v')) addToString('v');
15741576
else break;
15751577
}
15761578
tokenEnd = start + stringBufferTop + 2; // include slashes

rhino/src/main/java/org/mozilla/javascript/regexp/NativeRegExp.java

Lines changed: 69 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -47,12 +47,15 @@ public class NativeRegExp extends IdScriptableObject {
4747

4848
private static final Object REGEXP_TAG = new Object();
4949

50-
public static final int JSREG_GLOB = 0x1; // 'g' flag: global
51-
public static final int JSREG_FOLD = 0x2; // 'i' flag: fold
52-
public static final int JSREG_MULTILINE = 0x4; // 'm' flag: multiline
53-
public static final int JSREG_DOTALL = 0x8; // 's' flag: dotAll
54-
public static final int JSREG_STICKY = 0x10; // 'y' flag: sticky
55-
public static final int JSREG_UNICODE = 0x20; // 'u' flag: unicode mode
50+
// Flag constants
51+
public static final int JSREG_GLOB = 0x01; // 'g' flag
52+
public static final int JSREG_FOLD = 0x02; // 'i' flag
53+
public static final int JSREG_MULTILINE = 0x04; // 'm' flag
54+
public static final int JSREG_DOTALL = 0x08; // 's' flag (ES2018)
55+
public static final int JSREG_STICKY = 0x10; // 'y' flag (ES2015)
56+
public static final int JSREG_UNICODE = 0x20; // 'u' flag (ES2015)
57+
public static final int JSREG_HASINDICES = 0x40; // 'd' flag (ES2022)
58+
public static final int JSREG_UNICODESETS = 0x80; // 'v' flag (ES2024)
5659

5760
// type of match to perform
5861
public static final int TEST = 0;
@@ -248,12 +251,15 @@ public String toString() {
248251
}
249252

250253
private void appendFlags(StringBuilder buf) {
254+
// Output flags in alphabetical order per ES spec
255+
if ((re.flags & JSREG_HASINDICES) != 0) buf.append('d');
251256
if ((re.flags & JSREG_GLOB) != 0) buf.append('g');
252257
if ((re.flags & JSREG_FOLD) != 0) buf.append('i');
253258
if ((re.flags & JSREG_MULTILINE) != 0) buf.append('m');
254259
if ((re.flags & JSREG_DOTALL) != 0) buf.append('s');
255-
if ((re.flags & JSREG_STICKY) != 0) buf.append('y');
256260
if ((re.flags & JSREG_UNICODE) != 0) buf.append('u');
261+
if ((re.flags & JSREG_UNICODESETS) != 0) buf.append('v');
262+
if ((re.flags & JSREG_STICKY) != 0) buf.append('y');
257263
}
258264

259265
NativeRegExp() {}
@@ -641,20 +647,33 @@ static RECompiled compileRE(Context cx, String str, String global, boolean flat)
641647
for (int i = 0; i < global.length(); i++) {
642648
char c = global.charAt(i);
643649
int f = 0;
644-
if (c == 'g') {
645-
f = JSREG_GLOB;
646-
} else if (c == 'i') {
647-
f = JSREG_FOLD;
648-
} else if (c == 'm') {
649-
f = JSREG_MULTILINE;
650-
} else if (c == 's') {
651-
f = JSREG_DOTALL;
652-
} else if (c == 'y') {
653-
f = JSREG_STICKY;
654-
} else if (c == 'u') {
655-
f = JSREG_UNICODE;
656-
} else {
657-
reportError("msg.invalid.re.flag", String.valueOf(c));
650+
switch (c) {
651+
case 'g':
652+
f = JSREG_GLOB;
653+
break;
654+
case 'i':
655+
f = JSREG_FOLD;
656+
break;
657+
case 'm':
658+
f = JSREG_MULTILINE;
659+
break;
660+
case 's':
661+
f = JSREG_DOTALL;
662+
break;
663+
case 'y':
664+
f = JSREG_STICKY;
665+
break;
666+
case 'u':
667+
f = JSREG_UNICODE;
668+
break;
669+
case 'd':
670+
f = JSREG_HASINDICES;
671+
break;
672+
case 'v':
673+
f = JSREG_UNICODESETS;
674+
break;
675+
default:
676+
reportError("msg.invalid.re.flag", String.valueOf(c));
658677
}
659678
if ((flags & f) != 0) {
660679
reportError("msg.invalid.re.flag", String.valueOf(c));
@@ -663,10 +682,19 @@ static RECompiled compileRE(Context cx, String str, String global, boolean flat)
663682
}
664683
}
665684

666-
// We don't support u and i flags together, yet.
685+
// Validate flag combinations
686+
// u and i flags are incompatible (current Rhino limitation)
667687
if ((flags & JSREG_UNICODE) != 0 && (flags & JSREG_FOLD) != 0) {
668688
reportError("msg.invalid.re.flag", "u and i");
669689
}
690+
// u and v flags are mutually exclusive (ES2024 spec)
691+
if ((flags & JSREG_UNICODE) != 0 && (flags & JSREG_UNICODESETS) != 0) {
692+
reportError("msg.invalid.re.flag", "u and v");
693+
}
694+
// v and i flags are incompatible (v implies Unicode mode)
695+
if ((flags & JSREG_UNICODESETS) != 0 && (flags & JSREG_FOLD) != 0) {
696+
reportError("msg.invalid.re.flag", "v and i");
697+
}
670698

671699
// We support unicode mode in ES6 and later.
672700
if ((flags & JSREG_UNICODE) != 0 && cx.getLanguageVersion() < Context.VERSION_ES6) {
@@ -3681,7 +3709,9 @@ private static void reportError(String messageId, String arg) {
36813709
Id_dotAll = 7,
36823710
Id_sticky = 8,
36833711
Id_unicode = 9,
3684-
MAX_INSTANCE_ID = 9;
3712+
Id_hasIndices = 10,
3713+
Id_unicodeSets = 11,
3714+
MAX_INSTANCE_ID = 11;
36853715

36863716
@Override
36873717
protected int getMaxInstanceId() {
@@ -3719,6 +3749,12 @@ protected int findInstanceIdInfo(String s) {
37193749
case "unicode":
37203750
id = Id_unicode;
37213751
break;
3752+
case "hasIndices":
3753+
id = Id_hasIndices;
3754+
break;
3755+
case "unicodeSets":
3756+
id = Id_unicodeSets;
3757+
break;
37223758
default:
37233759
id = 0;
37243760
break;
@@ -3739,6 +3775,8 @@ protected int findInstanceIdInfo(String s) {
37393775
case Id_dotAll:
37403776
case Id_sticky:
37413777
case Id_unicode:
3778+
case Id_hasIndices:
3779+
case Id_unicodeSets:
37423780
attr = PERMANENT | READONLY | DONTENUM;
37433781
break;
37443782
default:
@@ -3768,6 +3806,10 @@ protected String getInstanceIdName(int id) {
37683806
return "sticky";
37693807
case Id_unicode:
37703808
return "unicode";
3809+
case Id_hasIndices:
3810+
return "hasIndices";
3811+
case Id_unicodeSets:
3812+
return "unicodeSets";
37713813
}
37723814
return super.getInstanceIdName(id);
37733815
}
@@ -3797,6 +3839,10 @@ protected Object getInstanceIdValue(int id) {
37973839
return ScriptRuntime.wrapBoolean((re.flags & JSREG_STICKY) != 0);
37983840
case Id_unicode:
37993841
return ScriptRuntime.wrapBoolean((re.flags & JSREG_UNICODE) != 0);
3842+
case Id_hasIndices:
3843+
return ScriptRuntime.wrapBoolean((re.flags & JSREG_HASINDICES) != 0);
3844+
case Id_unicodeSets:
3845+
return ScriptRuntime.wrapBoolean((re.flags & JSREG_UNICODESETS) != 0);
38003846
}
38013847
return super.getInstanceIdValue(id);
38023848
}
Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
package org.mozilla.javascript.tests.es2022;
2+
3+
import static org.junit.Assert.*;
4+
5+
import org.junit.Test;
6+
import org.mozilla.javascript.testutils.Utils;
7+
8+
/** Tests for ES2022 RegExp d flag (hasIndices) and ES2024 v flag (unicodeSets) */
9+
public class RegExpHasIndicesTest {
10+
11+
@Test
12+
public void testDFlagSupport() {
13+
String script = "var re = /test/d; re.hasIndices === true && re.flags === 'd'";
14+
Utils.assertWithAllModes(true, script);
15+
}
16+
17+
@Test
18+
public void testVFlagSupport() {
19+
String script = "var re = /test/v; re.unicodeSets === true && re.flags === 'v'";
20+
Utils.assertWithAllModes(true, script);
21+
}
22+
23+
@Test
24+
public void testDFlagWithOtherFlags() {
25+
String script = "var re = /test/gid; re.hasIndices === true && re.flags === 'dgi'";
26+
Utils.assertWithAllModes(true, script);
27+
}
28+
29+
@Test
30+
public void testVFlagWithGlobal() {
31+
String script = "var re = /test/gv; re.unicodeSets === true && re.flags === 'gv'";
32+
Utils.assertWithAllModes(true, script);
33+
}
34+
35+
@Test
36+
public void testUAndVFlagsAreMutuallyExclusive() {
37+
String script =
38+
"try {"
39+
+ " eval('var re = /test/uv;');"
40+
+ " false;"
41+
+ "} catch(e) {"
42+
+ " e instanceof SyntaxError;"
43+
+ "}";
44+
Utils.assertWithAllModes(true, script);
45+
}
46+
47+
@Test
48+
public void testVAndIFlagsAreIncompatible() {
49+
String script =
50+
"try {"
51+
+ " eval('var re = /test/iv;');"
52+
+ " false;"
53+
+ "} catch(e) {"
54+
+ " e instanceof SyntaxError;"
55+
+ "}";
56+
Utils.assertWithAllModes(true, script);
57+
}
58+
59+
@Test
60+
public void testRegExpConstructorWithDFlag() {
61+
String script = "var re = new RegExp('test', 'd'); re.hasIndices === true";
62+
Utils.assertWithAllModes(true, script);
63+
}
64+
65+
@Test
66+
public void testRegExpConstructorWithVFlag() {
67+
String script = "var re = new RegExp('test', 'v'); re.unicodeSets === true";
68+
Utils.assertWithAllModes(true, script);
69+
}
70+
71+
@Test
72+
public void testHasIndicesPropertyIsFalseWithoutDFlag() {
73+
String script = "var re = /test/g; re.hasIndices === false";
74+
Utils.assertWithAllModes(true, script);
75+
}
76+
77+
@Test
78+
public void testUnicodeSetsPropertyIsFalseWithoutVFlag() {
79+
String script = "var re = /test/g; re.unicodeSets === false";
80+
Utils.assertWithAllModes(true, script);
81+
}
82+
83+
@Test
84+
public void testAllFlagsInOrder() {
85+
// Note: u and i flags cannot be used together in Rhino currently
86+
String script = "var re = /test/dgmsy; re.flags === 'dgmsy'";
87+
Utils.assertWithAllModes(true, script);
88+
}
89+
90+
@Test
91+
public void testDFlagToString() {
92+
String script = "var re = /test/d; re.toString() === '/test/d'";
93+
Utils.assertWithAllModes(true, script);
94+
}
95+
96+
@Test
97+
public void testVFlagToString() {
98+
String script = "var re = /test/v; re.toString() === '/test/v'";
99+
Utils.assertWithAllModes(true, script);
100+
}
101+
102+
@Test
103+
public void testRegExpConstructorUVFlagsError() {
104+
String script =
105+
"try {"
106+
+ " new RegExp('test', 'uv');"
107+
+ " false;"
108+
+ "} catch(e) {"
109+
+ " e instanceof SyntaxError;"
110+
+ "}";
111+
Utils.assertWithAllModes(true, script);
112+
}
113+
114+
@Test
115+
public void testRegExpConstructorIVFlagsError() {
116+
String script =
117+
"try {"
118+
+ " new RegExp('test', 'iv');"
119+
+ " false;"
120+
+ "} catch(e) {"
121+
+ " e instanceof SyntaxError;"
122+
+ "}";
123+
Utils.assertWithAllModes(true, script);
124+
}
125+
}

0 commit comments

Comments
 (0)