Skip to content

Commit e1f7f15

Browse files
committed
[IMP] util.explode_query{,_range}: only format the {parallel_filter} placeholder
The usage of `str.format` to inject the parallel filter used to explode queries is not robust to the presence of other curly braces. Examples: 1. `JSON` strings (typically to leverage their mapping capabilities): See 79f3d71, where a query had to be modified to accommodate that. 2. Hardcoded sets of curly braces: ```python >>> "UPDATE t SET c = '{usage as literal characters}' WHERE {parallel_filter}".format(parallel_filter="…") Traceback (most recent call last): File "<stdin>", line 1, in <module> KeyError: 'usage as literal characters' ``` Which can be (unelegantly) solved adding even more braces, leveraging one side effect of `str.format`: ```python >>> "UPDATE t SET c = '{{usage as literal characters}}' WHERE {parallel_filter}".format(parallel_filter="…") "UPDATE t SET c = '{usage as literal characters}' WHERE …" ``` 3. Hardcoded single unpaired curly braces (AFAICT no way to solve this): ```python >>> "UPDATE t SET c = 'this is an open curly brace = {' WHERE {parallel_filter}".format(parallel_filter="…") Traceback (most recent call last): File "<stdin>", line 1, in <module> ValueError: unexpected '{' in field name ``` ```python >>> "UPDATE t SET c = 'this is a close brace = }' WHERE {parallel_filter}".format(parallel_filter="…") Traceback (most recent call last): File "<stdin>", line 1, in <module> ValueError: Single '}' encountered in format string ``` To circumvent this, we now use a dedicated Formatter that only handle the `{parallel_filter}` placeholder. This has the advantage of still deduplicate the doubled curly braces (see point 2 above) and thus being retro-compatible. This doesn't solve the single unpaired curly braces case, but this is rare enough to be handled by other means.
1 parent fbd0b4f commit e1f7f15

2 files changed

Lines changed: 48 additions & 4 deletions

File tree

src/base/tests/test_util.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -768,6 +768,30 @@ def test_pg_text2html(self, value, expected):
768768
result = cr.fetchone()[0]
769769
self.assertEqual(result, expected)
770770

771+
@parametrize(
772+
[
773+
("{parallel_filter}", "…"),
774+
("{{parallel_filter}}", "{parallel_filter}"),
775+
("{}", "{}"),
776+
("{0}", "{0}"),
777+
("{{}}", "{}"),
778+
("{{", "{"),
779+
("test", "test"),
780+
("", ""),
781+
("WHERE {parallel_filter} AND true", "WHERE … AND true"),
782+
("WHERE {parallel_filter} AND {other}", "WHERE … AND {other}"),
783+
("WHERE {parallel_filter} AND {other!r}", "WHERE … AND {other!r}"),
784+
("WHERE {parallel_filter} AND {{other}}", "WHERE … AND {other}"),
785+
("WHERE {parallel_filter} AND {}", "WHERE … AND {}"),
786+
("WHERE {parallel_filter} AND {{}}", "WHERE … AND {}"),
787+
("WHERE {parallel_filter} AND {parallel_filter}", "WHERE … AND …"),
788+
("using { with other things inside } and {parallel_filter}", "using { with other things inside } and …"),
789+
]
790+
)
791+
def test_ExplodeFormatter(self, value, expected):
792+
formatted = util.pg._ExplodeFormatter().format(value, parallel_filter="…")
793+
self.assertEqual(formatted, expected)
794+
771795
def _get_cr(self):
772796
cr = self.registry.cursor()
773797
self.addCleanup(cr.close)

src/util/pg.py

Lines changed: 24 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import logging
66
import os
77
import re
8+
import string
89
import threading
910
import time
1011
import uuid
@@ -220,6 +221,22 @@ def wrap(arg):
220221
return SQLStr(sql.SQL(query).format(*args, **kwargs).as_string(cr._cnx))
221222

222223

224+
class _ExplodeFormatter(string.Formatter):
225+
def parse(self, format_string):
226+
for literal_text, field_name, format_spec, conversion in super(_ExplodeFormatter, self).parse(format_string):
227+
if field_name is not None and field_name != "parallel_filter":
228+
yield literal_text + "{", None, None, None
229+
composed = (
230+
field_name
231+
+ (("!" + conversion) if conversion else "")
232+
+ ((":" + format_spec) if format_spec else "")
233+
+ "}"
234+
)
235+
yield composed, None, None, None
236+
else:
237+
yield literal_text, field_name, format_spec, conversion
238+
239+
223240
def explode_query(cr, query, alias=None, num_buckets=8, prefix=None):
224241
"""
225242
Explode a query to multiple queries that can be executed in parallel.
@@ -256,7 +273,7 @@ def explode_query(cr, query, alias=None, num_buckets=8, prefix=None):
256273
if num_buckets < 1:
257274
raise ValueError("num_buckets should be greater than zero")
258275
parallel_filter = "mod(abs({prefix}id), %s) = %s".format(prefix=prefix)
259-
query = query.replace("%", "%%").format(parallel_filter=parallel_filter)
276+
query = _ExplodeFormatter().format(query.replace("%", "%%"), parallel_filter=parallel_filter)
260277
return [cr.mogrify(query, [num_buckets, index]).decode() for index in range(num_buckets)]
261278

262279

@@ -286,6 +303,8 @@ def explode_query_range(cr, query, table, alias=None, bucket_size=DEFAULT_BUCKET
286303
sep_kw = " AND " if re.search(r"\sWHERE\s", query, re.M | re.I) else " WHERE "
287304
query += sep_kw + "{parallel_filter}"
288305

306+
fmt = _ExplodeFormatter().format
307+
289308
cr.execute(format_query(cr, "SELECT min(id), max(id) FROM {}", table))
290309
min_id, max_id = cr.fetchone()
291310
if min_id is None:
@@ -294,7 +313,7 @@ def explode_query_range(cr, query, table, alias=None, bucket_size=DEFAULT_BUCKET
294313
# Even if there are any records, return one query to be executed to validate its correctness and avoid
295314
# scripts that pass the CI but fail in production.
296315
parallel_filter = "{alias}.id IS NOT NULL".format(alias=alias)
297-
return [query.format(parallel_filter=parallel_filter)]
316+
return [fmt(query, parallel_filter=parallel_filter)]
298317
else:
299318
return []
300319

@@ -335,10 +354,11 @@ def explode_query_range(cr, query, table, alias=None, bucket_size=DEFAULT_BUCKET
335354
# Still, since the query may only be valid if there is no split, we force the usage of `prefix` in the query to
336355
# validate its correctness and avoid scripts that pass the CI but fail in production.
337356
parallel_filter = "{alias}.id IS NOT NULL".format(alias=alias)
338-
return [query.format(parallel_filter=parallel_filter)]
357+
return [fmt(query, parallel_filter=parallel_filter)]
339358

340359
parallel_filter = "{alias}.id BETWEEN %(lower-bound)s AND %(upper-bound)s".format(alias=alias)
341-
query = query.replace("%", "%%").format(parallel_filter=parallel_filter)
360+
query = fmt(query.replace("%", "%%"), parallel_filter=parallel_filter)
361+
342362
return [
343363
cr.mogrify(query, {"lower-bound": ids[i], "upper-bound": ids[i + 1] - 1}).decode() for i in range(len(ids) - 1)
344364
]

0 commit comments

Comments
 (0)