diff --git a/Lib/test/test_tools/i18n_data/ascii-escapes.pot b/Lib/test/test_tools/i18n_data/ascii-escapes.pot index f8e0f53b256934..cc5a9f6ba619db 100644 --- a/Lib/test/test_tools/i18n_data/ascii-escapes.pot +++ b/Lib/test/test_tools/i18n_data/ascii-escapes.pot @@ -41,7 +41,7 @@ msgstr "" #. some characters in the 128-255 range #: escapes.py:20 -msgid "€   ÿ" +msgid "\302\200 \302\240 ÿ" msgstr "" #. some characters >= 256 encoded as 2, 3 and 4 bytes, respectively diff --git a/Misc/NEWS.d/next/Tools-Demos/2025-04-05-14-52-36.gh-issue-132121.QNoDih.rst b/Misc/NEWS.d/next/Tools-Demos/2025-04-05-14-52-36.gh-issue-132121.QNoDih.rst new file mode 100644 index 00000000000000..1235360f9c6fad --- /dev/null +++ b/Misc/NEWS.d/next/Tools-Demos/2025-04-05-14-52-36.gh-issue-132121.QNoDih.rst @@ -0,0 +1 @@ +Always escape non-printable Unicode characters in :program:`pygettext`. diff --git a/Tools/i18n/pygettext.py b/Tools/i18n/pygettext.py index f0ee2ea386f18f..a4af1d2be82914 100755 --- a/Tools/i18n/pygettext.py +++ b/Tools/i18n/pygettext.py @@ -190,12 +190,10 @@ def make_escapes(pass_nonascii): # Allow non-ascii characters to pass through so that e.g. 'msgid # "Höhe"' would not result in 'msgid "H\366he"'. Otherwise we # escape any character outside the 32..126 range. - mod = 128 escape = escape_ascii else: - mod = 256 escape = escape_nonascii - escapes = [r"\%03o" % i for i in range(mod)] + escapes = [r"\%03o" % i for i in range(256)] for i in range(32, 127): escapes[i] = chr(i) escapes[ord('\\')] = r'\\' @@ -206,7 +204,9 @@ def make_escapes(pass_nonascii): def escape_ascii(s, encoding): - return ''.join(escapes[ord(c)] if ord(c) < 128 else c for c in s) + return ''.join(escapes[ord(c)] if ord(c) < 128 else c + if c.isprintable() else escape_nonascii(c, encoding) + for c in s) def escape_nonascii(s, encoding):