Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@

package org.mozilla.javascript.regexp;

import static org.mozilla.javascript.ScriptableObject.DONTENUM;
import static org.mozilla.javascript.ScriptableObject.PERMANENT;
import static org.mozilla.javascript.ScriptableObject.READONLY;

import org.mozilla.javascript.Context;
import org.mozilla.javascript.LambdaConstructor;
Expand Down Expand Up @@ -81,6 +83,11 @@ public static LambdaConstructor init(Context cx, Scriptable scopeArg, boolean se
null,
PERMANENT);
}

// ES2025: RegExp.escape
ctor.defineConstructorMethod(
scope, "escape", 1, NativeRegExpCtor::escape, DONTENUM, DONTENUM | READONLY);

return ctor;
}

Expand Down Expand Up @@ -113,4 +120,155 @@ private static RegExpImpl getImpl() {
Context cx = Context.getCurrentContext();
return (RegExpImpl) ScriptRuntime.getRegExpProxy(cx);
}

/**
* ES2025 RegExp.escape ( string )
*
* <p>This method escapes special regex characters in the input string.
*
* @param cx the current context
* @param scope the scope
* @param thisObj the this object
* @param args the arguments (first arg must be a string)
* @return escaped string
*/
private static Object escape(Context cx, Scriptable scope, Scriptable thisObj, Object[] args) {
// RegExp.escape requires a string argument - throw TypeError if not string (no coercion)
if (args.length == 0) {
throw ScriptRuntime.typeErrorById("msg.not.a.string");
}

Object arg = args[0];
// Check if argument is NOT a string - throw TypeError (no coercion per spec)
if (arg == null
|| arg == Undefined.instance
|| arg == Undefined.SCRIPTABLE_UNDEFINED
|| !(arg instanceof CharSequence)) {
throw ScriptRuntime.typeErrorById("msg.not.a.string");
}

String input = ScriptRuntime.toString(arg);
if (input.isEmpty()) {
return "";
}

StringBuilder result = new StringBuilder();
boolean isFirst = true;

for (int i = 0; i < input.length(); i++) {
char c = input.charAt(i);

// SPEC STEP 1: Escape initial character if it's a decimal digit or ASCII letter
if (isFirst && (isDecimalDigit(c) || isAsciiLetter(c))) {
result.append(String.format("\\x%02x", (int) c));
isFirst = false;
continue;
}
isFirst = false;

// SPEC: Surrogates must be escaped as unicode
if (Character.isSurrogate(c)) {
result.append(String.format("\\u%04x", (int) c));
continue;
}

// SPEC STEP 2: Control escapes (Table 64)
switch (c) {
case '\t':
result.append("\\t");
continue;
case '\n':
result.append("\\n");
continue;
case '\u000B': // vertical tab
result.append("\\v");
continue;
case '\f':
result.append("\\f");
continue;
case '\r':
result.append("\\r");
continue;
}

// SPEC STEP 3: Syntax characters (backslash escape)
if (isSyntaxCharacter(c)) {
result.append('\\').append(c);
continue;
}

// SPEC STEP 4-5: Other punctuators, WhiteSpace, LineTerminator (hex/unicode escape)
if (isOtherPunctuator(c) || isWhiteSpace(c) || isLineTerminator(c)) {
if (c <= 0xFF) {
result.append(String.format("\\x%02x", (int) c));
} else {
result.append(String.format("\\u%04x", (int) c));
}
continue;
}

// SPEC STEP 6: Return code point as-is (including underscore, other letters, digits)
result.append(c);
}

return result.toString();
}

/** Check if character is a decimal digit (0-9) */
private static boolean isDecimalDigit(char c) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm sure that we already have implementations of these checks either elsewhere in the codebase, or in the Java Character class. Can you please check first to see if we can reuse those or make them more generic rather than encode this here? I won't be surprised if we need a few new ones but checks like isWhiteSpace and isDecimalDigit are certainly not unique to this method. Thanks!

return c >= '0' && c <= '9';
}

/** Check if character is an ASCII letter (a-z, A-Z) */
private static boolean isAsciiLetter(char c) {
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
}

/**
* Check if character is a RegExp syntax character. These characters are: . * + ? ^ $ | ( ) [ ]
* { } \ /
*/
private static boolean isSyntaxCharacter(char c) {
return c == '.' || c == '*' || c == '+' || c == '?' || c == '^' || c == '$' || c == '|'
|| c == '(' || c == ')' || c == '[' || c == ']' || c == '{' || c == '}' || c == '\\'
|| c == '/';
}

/**
* Check if character is in "other punctuators" list per spec: ,-=<>#&!%:;@~'`" These are the
* exact characters from the ES2025 spec that must be escaped with \xNN format
*/
private static boolean isOtherPunctuator(char c) {
// Spec: otherPunctuators = ",-=<>#&!%:;@~'`" + code unit 0x0022 (QUOTATION MARK)
return c == 0x002c // , COMMA
|| c == 0x002d // - HYPHEN-MINUS
|| c == 0x003d // = EQUALS SIGN
|| c == 0x003c // < LESS-THAN SIGN
|| c == 0x003e // > GREATER-THAN SIGN
|| c == 0x0023 // # NUMBER SIGN
|| c == 0x0026 // & AMPERSAND
|| c == 0x0021 // ! EXCLAMATION MARK
|| c == 0x0025 // % PERCENT SIGN
|| c == 0x003a // : COLON
|| c == 0x003b // ; SEMICOLON
|| c == 0x0040 // @ COMMERCIAL AT
|| c == 0x007e // ~ TILDE
|| c == 0x0027 // ' APOSTROPHE
|| c == 0x0060 // ` GRAVE ACCENT
|| c == 0x0022; // " QUOTATION MARK
}

/** Check if character is WhiteSpace (excluding control escapes already handled) */
private static boolean isWhiteSpace(char c) {
return c == '\u0020' // SPACE
|| c == '\u00A0' // NO-BREAK SPACE
|| c == '\uFEFF' // ZERO WIDTH NO-BREAK SPACE
|| c == '\u202F'; // NARROW NO-BREAK SPACE
}

/** Check if character is LineTerminator */
private static boolean isLineTerminator(char c) {
return c == '\u2028' // LINE SEPARATOR
|| c == '\u2029'; // PARAGRAPH SEPARATOR
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */

package org.mozilla.javascript.tests.es2025;

import org.junit.Test;
import org.mozilla.javascript.testutils.Utils;

public class RegExpEscapeTest {

// Basic functionality
@Test
public void testRegExpEscapeExists() {
String script = "typeof RegExp.escape === 'function'";
Utils.assertWithAllModes(true, script);
}

// Syntax characters
@Test
public void testEscapeSyntaxCharacters() {
String script =
"RegExp.escape('.') === '\\\\.' && "
+ "RegExp.escape('*') === '\\\\*' && "
+ "RegExp.escape('+') === '\\\\+' && "
+ "RegExp.escape('?') === '\\\\?' && "
+ "RegExp.escape('^') === '\\\\^' && "
+ "RegExp.escape('$') === '\\\\$' && "
+ "RegExp.escape('|') === '\\\\|' && "
+ "RegExp.escape('(') === '\\\\(' && "
+ "RegExp.escape(')') === '\\\\)' && "
+ "RegExp.escape('[') === '\\\\[' && "
+ "RegExp.escape(']') === '\\\\]' && "
+ "RegExp.escape('{') === '\\\\{' && "
+ "RegExp.escape('}') === '\\\\}' && "
+ "RegExp.escape('\\\\') === '\\\\\\\\' && "
+ "RegExp.escape('/') === '\\\\/'";
Utils.assertWithAllModes(true, script);
}

// Control characters
@Test
public void testEscapeControlCharacters() {
String script =
"RegExp.escape('\\t') === '\\\\t' && "
+ "RegExp.escape('\\n') === '\\\\n' && "
+ "RegExp.escape('\\v') === '\\\\v' && "
+ "RegExp.escape('\\f') === '\\\\f' && "
+ "RegExp.escape('\\r') === '\\\\r'";
Utils.assertWithAllModes(true, script);
}

// Other punctuators (should use hex escapes)
@Test
public void testEscapeOtherPunctuators() {
String script =
"RegExp.escape(',') === '\\\\x2c' && "
+ "RegExp.escape('-') === '\\\\x2d' && "
+ "RegExp.escape('=') === '\\\\x3d' && "
+ "RegExp.escape('<') === '\\\\x3c' && "
+ "RegExp.escape('>') === '\\\\x3e' && "
+ "RegExp.escape('#') === '\\\\x23' && "
+ "RegExp.escape('&') === '\\\\x26' && "
+ "RegExp.escape('!') === '\\\\x21' && "
+ "RegExp.escape('%') === '\\\\x25' && "
+ "RegExp.escape(':') === '\\\\x3a' && "
+ "RegExp.escape(';') === '\\\\x3b' && "
+ "RegExp.escape('@') === '\\\\x40' && "
+ "RegExp.escape('~') === '\\\\x7e' && "
+ "RegExp.escape(\"'\") === '\\\\x27' && "
+ "RegExp.escape('`') === '\\\\x60' && "
+ "RegExp.escape('\"') === '\\\\x22'";
Utils.assertWithAllModes(true, script);
}

// WhiteSpace characters
@Test
public void testEscapeWhiteSpace() {
String script =
"RegExp.escape('\\u0020') === '\\\\x20' && "
+ "RegExp.escape('\\u00A0') === '\\\\xa0' && "
+ "RegExp.escape('\\uFEFF') === '\\\\ufeff' && "
+ "RegExp.escape('\\u202F') === '\\\\u202f'";
Utils.assertWithAllModes(true, script);
}

// Line terminators
@Test
public void testEscapeLineTerminators() {
String script =
"RegExp.escape('\\u2028') === '\\\\u2028' && "
+ "RegExp.escape('\\u2029') === '\\\\u2029'";
Utils.assertWithAllModes(true, script);
}

// Initial digits/letters are escaped, non-initial are not
@Test
public void testNotEscaped() {
String script =
"RegExp.escape('abc') === '\\\\x61bc' && "
+ "RegExp.escape('123') === '\\\\x3123' && "
+ "RegExp.escape('_') === '_' && "
+ "RegExp.escape('ABC') === '\\\\x41BC' && "
+ "RegExp.escape('_abc') === '_abc'";
Utils.assertWithAllModes(true, script);
}

// Empty string
@Test
public void testEmptyString() {
String script = "RegExp.escape('') === ''";
Utils.assertWithAllModes(true, script);
}

// Mixed string (spaces are escaped, initial H is escaped)
@Test
public void testMixedString() {
String script =
"RegExp.escape('Hello. How are you?') === '\\\\x48ello\\\\.\\\\x20How\\\\x20are\\\\x20you\\\\?'";
Utils.assertWithAllModes(true, script);
}

// Complex example
@Test
public void testComplexExample() {
String script = "RegExp.escape('(*.*)') === '\\\\(\\\\*\\\\.\\\\*\\\\)'";
Utils.assertWithAllModes(true, script);
}

// TypeError for non-string inputs
@Test
public void testTypeErrorNumber() {
String script = "try { RegExp.escape(123); false; } catch(e) { e instanceof TypeError; }";
Utils.assertWithAllModes(true, script);
}

@Test
public void testTypeErrorObject() {
String script = "try { RegExp.escape({}); false; } catch(e) { e instanceof TypeError; }";
Utils.assertWithAllModes(true, script);
}

@Test
public void testTypeErrorNull() {
String script = "try { RegExp.escape(null); false; } catch(e) { e instanceof TypeError; }";
Utils.assertWithAllModes(true, script);
}

@Test
public void testTypeErrorUndefined() {
String script =
"try { RegExp.escape(undefined); false; } catch(e) { e instanceof TypeError; }";
Utils.assertWithAllModes(true, script);
}

// Practical use case: user input escaping
@Test
public void testPracticalUseCase() {
String script =
"var userInput = 'example.com';"
+ "var pattern = new RegExp(RegExp.escape(userInput));"
+ "pattern.test('example.com') === true && "
+ "pattern.test('exampleXcom') === false";
Utils.assertWithAllModes(true, script);
}
}
4 changes: 2 additions & 2 deletions tests/testsrc/test262.properties
Original file line number Diff line number Diff line change
Expand Up @@ -2101,10 +2101,10 @@ built-ins/Reflect 12/153 (7.84%)
set/return-false-if-receiver-is-not-writable.js
set/return-false-if-target-is-not-writable.js

built-ins/RegExp 974/1868 (52.14%)
built-ins/RegExp 955/1868 (51.12%)
CharacterClassEscapes 12/12 (100.0%)
dotall 4/4 (100.0%)
escape 20/20 (100.0%)
escape/escaped-otherpunctuators.js
match-indices/indices-array.js
match-indices/indices-array-element.js
match-indices/indices-array-matched.js
Expand Down