Skip to content

Commit ef66fb5

Browse files
authored
gh-135148: Correctly handle f/t strings with comments and debug expressions (#135198)
1 parent e89923d commit ef66fb5

File tree

3 files changed

+83
-20
lines changed

3 files changed

+83
-20
lines changed

Lib/test/test_fstring.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1651,6 +1651,18 @@ def __repr__(self):
16511651
self.assertEqual(f"{1+2 = # my comment
16521652
}", '1+2 = \n 3')
16531653

1654+
self.assertEqual(f'{""" # booo
1655+
"""=}', '""" # booo\n """=\' # booo\\n \'')
1656+
1657+
self.assertEqual(f'{" # nooo "=}', '" # nooo "=\' # nooo \'')
1658+
self.assertEqual(f'{" \" # nooo \" "=}', '" \\" # nooo \\" "=\' " # nooo " \'')
1659+
1660+
self.assertEqual(f'{ # some comment goes here
1661+
"""hello"""=}', ' \n """hello"""=\'hello\'')
1662+
self.assertEqual(f'{"""# this is not a comment
1663+
a""" # this is a comment
1664+
}', '# this is not a comment\n a')
1665+
16541666
# These next lines contains tabs. Backslash escapes don't
16551667
# work in f-strings.
16561668
# patchcheck doesn't like these tabs. So the only way to test
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Fixed a bug where f-string debug expressions (using =) would incorrectly
2+
strip out parts of strings containing escaped quotes and # characters. Patch
3+
by Pablo Galindo.

Parser/lexer/lexer.c

Lines changed: 68 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -121,38 +121,88 @@ set_ftstring_expr(struct tok_state* tok, struct token *token, char c) {
121121
}
122122
PyObject *res = NULL;
123123

124-
// Check if there is a # character in the expression
124+
// Look for a # character outside of string literals
125125
int hash_detected = 0;
126+
int in_string = 0;
127+
char quote_char = 0;
128+
126129
for (Py_ssize_t i = 0; i < tok_mode->last_expr_size - tok_mode->last_expr_end; i++) {
127-
if (tok_mode->last_expr_buffer[i] == '#') {
130+
char ch = tok_mode->last_expr_buffer[i];
131+
132+
// Skip escaped characters
133+
if (ch == '\\') {
134+
i++;
135+
continue;
136+
}
137+
138+
// Handle quotes
139+
if (ch == '"' || ch == '\'') {
140+
// The following if/else block works becase there is an off number
141+
// of quotes in STRING tokens and the lexer only ever reaches this
142+
// function with valid STRING tokens.
143+
// For example: """hello"""
144+
// First quote: in_string = 1
145+
// Second quote: in_string = 0
146+
// Third quote: in_string = 1
147+
if (!in_string) {
148+
in_string = 1;
149+
quote_char = ch;
150+
}
151+
else if (ch == quote_char) {
152+
in_string = 0;
153+
}
154+
continue;
155+
}
156+
157+
// Check for # outside strings
158+
if (ch == '#' && !in_string) {
128159
hash_detected = 1;
129160
break;
130161
}
131162
}
132-
163+
// If we found a # character in the expression, we need to handle comments
133164
if (hash_detected) {
134-
Py_ssize_t input_length = tok_mode->last_expr_size - tok_mode->last_expr_end;
135-
char *result = (char *)PyMem_Malloc((input_length + 1) * sizeof(char));
165+
// Allocate buffer for processed result
166+
char *result = (char *)PyMem_Malloc((tok_mode->last_expr_size - tok_mode->last_expr_end + 1) * sizeof(char));
136167
if (!result) {
137168
return -1;
138169
}
139170

140-
Py_ssize_t i = 0;
141-
Py_ssize_t j = 0;
171+
Py_ssize_t i = 0; // Input position
172+
Py_ssize_t j = 0; // Output position
173+
in_string = 0; // Whether we're in a string
174+
quote_char = 0; // Current string quote char
142175

143-
for (i = 0, j = 0; i < input_length; i++) {
144-
if (tok_mode->last_expr_buffer[i] == '#') {
145-
// Skip characters until newline or end of string
146-
while (i < input_length && tok_mode->last_expr_buffer[i] != '\0') {
147-
if (tok_mode->last_expr_buffer[i] == '\n') {
148-
result[j++] = tok_mode->last_expr_buffer[i];
149-
break;
150-
}
176+
// Process each character
177+
while (i < tok_mode->last_expr_size - tok_mode->last_expr_end) {
178+
char ch = tok_mode->last_expr_buffer[i];
179+
180+
// Handle string quotes
181+
if (ch == '"' || ch == '\'') {
182+
// See comment above to understand this part
183+
if (!in_string) {
184+
in_string = 1;
185+
quote_char = ch;
186+
} else if (ch == quote_char) {
187+
in_string = 0;
188+
}
189+
result[j++] = ch;
190+
}
191+
// Skip comments
192+
else if (ch == '#' && !in_string) {
193+
while (i < tok_mode->last_expr_size - tok_mode->last_expr_end &&
194+
tok_mode->last_expr_buffer[i] != '\n') {
151195
i++;
152196
}
153-
} else {
154-
result[j++] = tok_mode->last_expr_buffer[i];
197+
if (i < tok_mode->last_expr_size - tok_mode->last_expr_end) {
198+
result[j++] = '\n';
199+
}
200+
}
201+
// Copy other chars
202+
else {
203+
result[j++] = ch;
155204
}
205+
i++;
156206
}
157207

158208
result[j] = '\0'; // Null-terminate the result string
@@ -164,11 +214,9 @@ set_ftstring_expr(struct tok_state* tok, struct token *token, char c) {
164214
tok_mode->last_expr_size - tok_mode->last_expr_end,
165215
NULL
166216
);
167-
168217
}
169218

170-
171-
if (!res) {
219+
if (!res) {
172220
return -1;
173221
}
174222
token->metadata = res;

0 commit comments

Comments
 (0)