From 37218ffdb874afba6bbbc4a556df3d1497d3e96f Mon Sep 17 00:00:00 2001 From: anivar Date: Sat, 29 Nov 2025 22:36:27 +0000 Subject: [PATCH 1/2] Implement ES2025 RegExp.escape static method Adds support for RegExp.escape() which escapes special regex characters in strings for safe use in regular expressions. Implementation: - Escapes syntax chars (.*+?^$|()[]{}\/) with backslash - Escapes control chars (\t\n\v\f\r) with special sequences - Escapes initial digits/letters with \xNN to prevent ambiguity - Escapes surrogates with \uXXXX format - Escapes other punctuators (,-=<>#&\!%:;@~'`") with \xNN - Escapes whitespace and line terminators appropriately - Throws TypeError for non-string inputs Testing: - 15 Java unit tests (all passing) - test262: 19/20 tests passing (95% compliance) - Manual verification confirms spec-compliant output --- .../javascript/regexp/NativeRegExpCtor.java | 158 +++++++++++++++++ .../tests/es2025/RegExpEscapeTest.java | 166 ++++++++++++++++++ tests/testsrc/test262.properties | 4 +- 3 files changed, 326 insertions(+), 2 deletions(-) create mode 100644 tests/src/test/java/org/mozilla/javascript/tests/es2025/RegExpEscapeTest.java diff --git a/rhino/src/main/java/org/mozilla/javascript/regexp/NativeRegExpCtor.java b/rhino/src/main/java/org/mozilla/javascript/regexp/NativeRegExpCtor.java index d4ca9e3928..d2e0c8f7f3 100644 --- a/rhino/src/main/java/org/mozilla/javascript/regexp/NativeRegExpCtor.java +++ b/rhino/src/main/java/org/mozilla/javascript/regexp/NativeRegExpCtor.java @@ -6,7 +6,9 @@ package org.mozilla.javascript.regexp; +import static org.mozilla.javascript.ScriptableObject.DONTENUM; import static org.mozilla.javascript.ScriptableObject.PERMANENT; +import static org.mozilla.javascript.ScriptableObject.READONLY; import org.mozilla.javascript.Context; import org.mozilla.javascript.LambdaConstructor; @@ -81,6 +83,11 @@ public static LambdaConstructor init(Context cx, Scriptable scopeArg, boolean se null, PERMANENT); } + + // ES2025: RegExp.escape + ctor.defineConstructorMethod( + scope, "escape", 1, NativeRegExpCtor::escape, DONTENUM, DONTENUM | READONLY); + return ctor; } @@ -113,4 +120,155 @@ private static RegExpImpl getImpl() { Context cx = Context.getCurrentContext(); return (RegExpImpl) ScriptRuntime.getRegExpProxy(cx); } + + /** + * ES2025 RegExp.escape ( string ) + * + *

This method escapes special regex characters in the input string. + * + * @param cx the current context + * @param scope the scope + * @param thisObj the this object + * @param args the arguments (first arg must be a string) + * @return escaped string + */ + private static Object escape(Context cx, Scriptable scope, Scriptable thisObj, Object[] args) { + // RegExp.escape requires a string argument - throw TypeError if not string (no coercion) + if (args.length == 0) { + throw ScriptRuntime.typeErrorById("msg.not.a.string"); + } + + Object arg = args[0]; + // Check if argument is NOT a string - throw TypeError (no coercion per spec) + if (arg == null + || arg == Undefined.instance + || arg == Undefined.SCRIPTABLE_UNDEFINED + || !(arg instanceof CharSequence)) { + throw ScriptRuntime.typeErrorById("msg.not.a.string"); + } + + String input = ScriptRuntime.toString(arg); + if (input.isEmpty()) { + return ""; + } + + StringBuilder result = new StringBuilder(); + boolean isFirst = true; + + for (int i = 0; i < input.length(); i++) { + char c = input.charAt(i); + + // SPEC STEP 1: Escape initial character if it's a decimal digit or ASCII letter + if (isFirst && (isDecimalDigit(c) || isAsciiLetter(c))) { + result.append(String.format("\\x%02x", (int) c)); + isFirst = false; + continue; + } + isFirst = false; + + // SPEC: Surrogates must be escaped as unicode + if (Character.isSurrogate(c)) { + result.append(String.format("\\u%04x", (int) c)); + continue; + } + + // SPEC STEP 2: Control escapes (Table 64) + switch (c) { + case '\t': + result.append("\\t"); + continue; + case '\n': + result.append("\\n"); + continue; + case '\u000B': // vertical tab + result.append("\\v"); + continue; + case '\f': + result.append("\\f"); + continue; + case '\r': + result.append("\\r"); + continue; + } + + // SPEC STEP 3: Syntax characters (backslash escape) + if (isSyntaxCharacter(c)) { + result.append('\\').append(c); + continue; + } + + // SPEC STEP 4-5: Other punctuators, WhiteSpace, LineTerminator (hex/unicode escape) + if (isOtherPunctuator(c) || isWhiteSpace(c) || isLineTerminator(c)) { + if (c <= 0xFF) { + result.append(String.format("\\x%02x", (int) c)); + } else { + result.append(String.format("\\u%04x", (int) c)); + } + continue; + } + + // SPEC STEP 6: Return code point as-is (including underscore, other letters, digits) + result.append(c); + } + + return result.toString(); + } + + /** Check if character is a decimal digit (0-9) */ + private static boolean isDecimalDigit(char c) { + return c >= '0' && c <= '9'; + } + + /** Check if character is an ASCII letter (a-z, A-Z) */ + private static boolean isAsciiLetter(char c) { + return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); + } + + /** + * Check if character is a RegExp syntax character. These characters are: . * + ? ^ $ | ( ) [ ] + * { } \ / + */ + private static boolean isSyntaxCharacter(char c) { + return c == '.' || c == '*' || c == '+' || c == '?' || c == '^' || c == '$' || c == '|' + || c == '(' || c == ')' || c == '[' || c == ']' || c == '{' || c == '}' || c == '\\' + || c == '/'; + } + + /** + * Check if character is in "other punctuators" list per spec: ,-=<>#&!%:;@~'`" These are the + * exact characters from the ES2025 spec that must be escaped with \xNN format + */ + private static boolean isOtherPunctuator(char c) { + // Spec: otherPunctuators = ",-=<>#&!%:;@~'`" + code unit 0x0022 (QUOTATION MARK) + return c == 0x002c // , COMMA + || c == 0x002d // - HYPHEN-MINUS + || c == 0x003d // = EQUALS SIGN + || c == 0x003c // < LESS-THAN SIGN + || c == 0x003e // > GREATER-THAN SIGN + || c == 0x0023 // # NUMBER SIGN + || c == 0x0026 // & AMPERSAND + || c == 0x0021 // ! EXCLAMATION MARK + || c == 0x0025 // % PERCENT SIGN + || c == 0x003a // : COLON + || c == 0x003b // ; SEMICOLON + || c == 0x0040 // @ COMMERCIAL AT + || c == 0x007e // ~ TILDE + || c == 0x0027 // ' APOSTROPHE + || c == 0x0060 // ` GRAVE ACCENT + || c == 0x0022; // " QUOTATION MARK + } + + /** Check if character is WhiteSpace (excluding control escapes already handled) */ + private static boolean isWhiteSpace(char c) { + return c == '\u0020' // SPACE + || c == '\u00A0' // NO-BREAK SPACE + || c == '\uFEFF' // ZERO WIDTH NO-BREAK SPACE + || c == '\u202F'; // NARROW NO-BREAK SPACE + } + + /** Check if character is LineTerminator */ + private static boolean isLineTerminator(char c) { + return c == '\u2028' // LINE SEPARATOR + || c == '\u2029'; // PARAGRAPH SEPARATOR + } } diff --git a/tests/src/test/java/org/mozilla/javascript/tests/es2025/RegExpEscapeTest.java b/tests/src/test/java/org/mozilla/javascript/tests/es2025/RegExpEscapeTest.java new file mode 100644 index 0000000000..86b3dc8db3 --- /dev/null +++ b/tests/src/test/java/org/mozilla/javascript/tests/es2025/RegExpEscapeTest.java @@ -0,0 +1,166 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +package org.mozilla.javascript.tests.es2025; + +import org.junit.Test; +import org.mozilla.javascript.testutils.Utils; + +public class RegExpEscapeTest { + + // Basic functionality + @Test + public void testRegExpEscapeExists() { + String script = "typeof RegExp.escape === 'function'"; + Utils.assertWithAllModes(true, script); + } + + // Syntax characters + @Test + public void testEscapeSyntaxCharacters() { + String script = + "RegExp.escape('.') === '\\\\.' && " + + "RegExp.escape('*') === '\\\\*' && " + + "RegExp.escape('+') === '\\\\+' && " + + "RegExp.escape('?') === '\\\\?' && " + + "RegExp.escape('^') === '\\\\^' && " + + "RegExp.escape('$') === '\\\\$' && " + + "RegExp.escape('|') === '\\\\|' && " + + "RegExp.escape('(') === '\\\\(' && " + + "RegExp.escape(')') === '\\\\)' && " + + "RegExp.escape('[') === '\\\\[' && " + + "RegExp.escape(']') === '\\\\]' && " + + "RegExp.escape('{') === '\\\\{' && " + + "RegExp.escape('}') === '\\\\}' && " + + "RegExp.escape('\\\\') === '\\\\\\\\' && " + + "RegExp.escape('/') === '\\\\/'"; + Utils.assertWithAllModes(true, script); + } + + // Control characters + @Test + public void testEscapeControlCharacters() { + String script = + "RegExp.escape('\\t') === '\\\\t' && " + + "RegExp.escape('\\n') === '\\\\n' && " + + "RegExp.escape('\\v') === '\\\\v' && " + + "RegExp.escape('\\f') === '\\\\f' && " + + "RegExp.escape('\\r') === '\\\\r'"; + Utils.assertWithAllModes(true, script); + } + + // Other punctuators (should use hex escapes) + @Test + public void testEscapeOtherPunctuators() { + String script = + "RegExp.escape(',') === '\\\\x2c' && " + + "RegExp.escape('-') === '\\\\x2d' && " + + "RegExp.escape('=') === '\\\\x3d' && " + + "RegExp.escape('<') === '\\\\x3c' && " + + "RegExp.escape('>') === '\\\\x3e' && " + + "RegExp.escape('#') === '\\\\x23' && " + + "RegExp.escape('&') === '\\\\x26' && " + + "RegExp.escape('!') === '\\\\x21' && " + + "RegExp.escape('%') === '\\\\x25' && " + + "RegExp.escape(':') === '\\\\x3a' && " + + "RegExp.escape(';') === '\\\\x3b' && " + + "RegExp.escape('@') === '\\\\x40' && " + + "RegExp.escape('~') === '\\\\x7e' && " + + "RegExp.escape(\"'\") === '\\\\x27' && " + + "RegExp.escape('`') === '\\\\x60' && " + + "RegExp.escape('\"') === '\\\\x22'"; + Utils.assertWithAllModes(true, script); + } + + // WhiteSpace characters + @Test + public void testEscapeWhiteSpace() { + String script = + "RegExp.escape('\\u0020') === '\\\\x20' && " + + "RegExp.escape('\\u00A0') === '\\\\xa0' && " + + "RegExp.escape('\\uFEFF') === '\\\\ufeff' && " + + "RegExp.escape('\\u202F') === '\\\\u202f'"; + Utils.assertWithAllModes(true, script); + } + + // Line terminators + @Test + public void testEscapeLineTerminators() { + String script = + "RegExp.escape('\\u2028') === '\\\\u2028' && " + + "RegExp.escape('\\u2029') === '\\\\u2029'"; + Utils.assertWithAllModes(true, script); + } + + // Initial digits/letters are escaped, non-initial are not + @Test + public void testNotEscaped() { + String script = + "RegExp.escape('abc') === '\\\\x61bc' && " + + "RegExp.escape('123') === '\\\\x3123' && " + + "RegExp.escape('_') === '_' && " + + "RegExp.escape('ABC') === '\\\\x41BC' && " + + "RegExp.escape('_abc') === '_abc'"; + Utils.assertWithAllModes(true, script); + } + + // Empty string + @Test + public void testEmptyString() { + String script = "RegExp.escape('') === ''"; + Utils.assertWithAllModes(true, script); + } + + // Mixed string (spaces are escaped, initial H is escaped) + @Test + public void testMixedString() { + String script = + "RegExp.escape('Hello. How are you?') === '\\\\x48ello\\\\.\\\\x20How\\\\x20are\\\\x20you\\\\?'"; + Utils.assertWithAllModes(true, script); + } + + // Complex example + @Test + public void testComplexExample() { + String script = "RegExp.escape('(*.*)') === '\\\\(\\\\*\\\\.\\\\*\\\\)'"; + Utils.assertWithAllModes(true, script); + } + + // TypeError for non-string inputs + @Test + public void testTypeErrorNumber() { + String script = "try { RegExp.escape(123); false; } catch(e) { e instanceof TypeError; }"; + Utils.assertWithAllModes(true, script); + } + + @Test + public void testTypeErrorObject() { + String script = "try { RegExp.escape({}); false; } catch(e) { e instanceof TypeError; }"; + Utils.assertWithAllModes(true, script); + } + + @Test + public void testTypeErrorNull() { + String script = "try { RegExp.escape(null); false; } catch(e) { e instanceof TypeError; }"; + Utils.assertWithAllModes(true, script); + } + + @Test + public void testTypeErrorUndefined() { + String script = + "try { RegExp.escape(undefined); false; } catch(e) { e instanceof TypeError; }"; + Utils.assertWithAllModes(true, script); + } + + // Practical use case: user input escaping + @Test + public void testPracticalUseCase() { + String script = + "var userInput = 'example.com';" + + "var pattern = new RegExp(RegExp.escape(userInput));" + + "pattern.test('example.com') === true && " + + "pattern.test('exampleXcom') === false"; + Utils.assertWithAllModes(true, script); + } +} diff --git a/tests/testsrc/test262.properties b/tests/testsrc/test262.properties index c367b35bbd..52e16051de 100644 --- a/tests/testsrc/test262.properties +++ b/tests/testsrc/test262.properties @@ -2101,10 +2101,10 @@ built-ins/Reflect 12/153 (7.84%) set/return-false-if-receiver-is-not-writable.js set/return-false-if-target-is-not-writable.js -built-ins/RegExp 974/1868 (52.14%) +built-ins/RegExp 956/1868 (51.18%) CharacterClassEscapes 12/12 (100.0%) dotall 4/4 (100.0%) - escape 20/20 (100.0%) + escape/escaped-otherpunctuators.js match-indices/indices-array.js match-indices/indices-array-element.js match-indices/indices-array-matched.js From a8107b2862e2fe976afe3d2282a5f7833b059289 Mon Sep 17 00:00:00 2001 From: anivar Date: Sat, 29 Nov 2025 22:51:10 +0000 Subject: [PATCH 2/2] Update test262.properties after rebase RegExp test failures improved from 956 to 955 after rebasing on latest master. --- tests/testsrc/test262.properties | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testsrc/test262.properties b/tests/testsrc/test262.properties index 52e16051de..c8dea3590f 100644 --- a/tests/testsrc/test262.properties +++ b/tests/testsrc/test262.properties @@ -2101,7 +2101,7 @@ built-ins/Reflect 12/153 (7.84%) set/return-false-if-receiver-is-not-writable.js set/return-false-if-target-is-not-writable.js -built-ins/RegExp 956/1868 (51.18%) +built-ins/RegExp 955/1868 (51.12%) CharacterClassEscapes 12/12 (100.0%) dotall 4/4 (100.0%) escape/escaped-otherpunctuators.js