Skip to content

Commit 198c2b7

Browse files
authored
Merge pull request #265 from openzim/wabac_changes
Backport wabac changes
2 parents 517143e + 7855070 commit 198c2b7

File tree

2 files changed

+54
-8
lines changed

2 files changed

+54
-8
lines changed

src/zimscraperlib/rewriting/js.py

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,10 @@
1111
it appropriately
1212
- a specific JS file (provided in `statics` folder) for JS modules is included in the
1313
ZIM at `_zim_static/__wb_module_decl.js`
14+
15+
This code is based on https://github.com/webrecorder/wabac.js/blob/main/src/rewrite/jsrewriter.ts
16+
Last backport of upstream changes is from Sept 13, 2025
17+
Commit 6dd2d9ae664cfcd2ea8637d7d6c7ed7a0ca332a0
1418
"""
1519

1620
import re
@@ -29,13 +33,13 @@
2933
from zimscraperlib.rewriting.url_rewriting import ArticleUrlRewriter, ZimPath
3034

3135
# The regex used to rewrite `import ...` in module code.
32-
IMPORT_MATCH_RX = re.compile(
33-
r"""^\s*?import(?:['"\s]*(?:[\w*${}\s,]+from\s*)?['"\s]?['"\s])(?:.*?)['"\s]""",
36+
IMPORT_EXPORT_MATCH_RX = re.compile(
37+
r"""(^|;)\s*?(?:im|ex)port(?:['"\s]*(?:[\w*${}\s,]+from\s*)?['"\s]?['"\s])(?:.*?)['"\s]""",
3438
)
3539

3640
# A sub regex used inside `import ...` rewrite to rewrite http url imported
37-
IMPORT_HTTP_RX = re.compile(
38-
r"""(import(?:['"\s]*(?:[\w*${}\s,]+from\s*)?['"\s]?['"\s]))((?:https?|[./]).*?)(['"\s])""",
41+
IMPORT_EXPORT_HTTP_RX = re.compile(
42+
r"""((?:im|ex)port(?:['"\s]*(?:[\w*${}\s,]+from\s*)?['"\s]?['"\s]))((?:https?|[./]).*?)(['"\s])""",
3943
)
4044

4145
# This list of global variables we want to wrap.
@@ -153,7 +157,10 @@ def create_js_rules() -> list[TransformationRule]:
153157

154158
return [
155159
# rewriting `eval(...)` - invocation
156-
(re.compile(r"(?:^|\s)\beval\s*\("), replace_prefix_from(eval_str, "eval")),
160+
(
161+
re.compile(r"(?<!static)(?<!function)(?<!})(?:^|\s)\beval\s*\("),
162+
replace_prefix_from(eval_str, "eval"),
163+
),
157164
(re.compile(r"\([\w]+,\s*eval\)\("), m2str(lambda _: f" {eval_str}")),
158165
# rewriting `x = eval` - no invocation
159166
(re.compile(r"[=]\s*\beval\b(?![(:.$])"), replace("eval", "self.eval")),
@@ -162,7 +169,7 @@ def create_js_rules() -> list[TransformationRule]:
162169
(re.compile(r"\.postMessage\b\("), add_prefix(".__WB_pmw(self)")),
163170
# rewriting `location = ` to custom expression `(...).href =` assignement
164171
(
165-
re.compile(r"(?:^|[^$.+*/%^-])\s?\blocation\b\s*[=]\s*(?![\s\d=])"),
172+
re.compile(r"(?:^|[^$.+*/%^-])\s?\blocation\b\s*[=]\s*(?![\s\d=>])"),
166173
add_suffix_non_prop(check_loc),
167174
),
168175
# rewriting `return this`
@@ -312,8 +319,8 @@ def sub_funct(match: re.Match[str]) -> str:
312319
f"{match.group(3)}"
313320
)
314321

315-
return IMPORT_HTTP_RX.sub(sub_funct, m_object[0])
322+
return IMPORT_EXPORT_HTTP_RX.sub(sub_funct, m_object[0])
316323

317324
return func
318325

319-
return (IMPORT_MATCH_RX, rewrite_import())
326+
return (IMPORT_EXPORT_MATCH_RX, rewrite_import())

tests/rewriting/test_js_rewriting.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,10 @@ def wrap_script(text: str) -> str:
186186
"self.__WB_check_loc(location, argument"
187187
"s)) || {}).href = http://example.com/",
188188
),
189+
WrappedTestContent(
190+
input_='location => "http://example.com/"',
191+
expected='location => "http://example.com/"',
192+
),
189193
WrappedTestContent(
190194
input_=" location = http://example.com/2",
191195
expected=" location = ((self.__WB_check_loc && "
@@ -220,6 +224,24 @@ def wrap_script(text: str) -> str:
220224
expected="if (self.foo) { console.log('blah') }",
221225
),
222226
WrappedTestContent(input_="window.x = 5", expected="window.x = 5"),
227+
WrappedTestContent(
228+
input_="""
229+
class A {}
230+
const B = 5;
231+
let C = 4;
232+
var D = 3;
233+
234+
location = "http://example.com/2" """,
235+
expected="""
236+
class A {}
237+
const B = 5;
238+
let C = 4;
239+
var D = 3;
240+
241+
location = ((self.__WB_check_loc && """
242+
"self.__WB_check_loc(location, arguments)) || {}).href "
243+
"""= "http://example.com/2" """,
244+
),
223245
WrappedTestContent(input_=" var self ", expected=" let self "),
224246
]
225247
)
@@ -288,6 +310,20 @@ def wrap_import(text: str) -> str:
288310
export { a };
289311
""",
290312
),
313+
# rewrite import same line
314+
ImportTestContent(
315+
input_='import{A, B} from "https://example.com/";'
316+
'import{C, D} from "https://example.org"',
317+
expected='import{A, B} from "../../../example.com/";'
318+
'import{C, D} from "../../../example.org/"',
319+
),
320+
# rewrite import / export same line
321+
ImportTestContent(
322+
input_='import{A, B} from "https://example.com/";'
323+
'export{C, D} from "/another/path/to/file"',
324+
expected='import{A, B} from "../../../example.com/";'
325+
'export{C, D} from "../../another/path/to/file"',
326+
),
291327
# rewrite ESM module import
292328
ImportTestContent(
293329
input_='import "https://example.com/file.js"',
@@ -387,6 +423,9 @@ def test_import_rewrite(rewrite_import_content: ImportTestContent):
387423
",eval(a)",
388424
"this.$eval(a)",
389425
"x = $eval; x(a);",
426+
"static eval(a,b){ }",
427+
"function eval(a,b){ }",
428+
"} eval(a,b){ }",
390429
"obj = { eval : 1 }",
391430
"x = obj.eval",
392431
"x = obj.eval(a)",

0 commit comments

Comments
 (0)