@@ -121,38 +121,88 @@ set_ftstring_expr(struct tok_state* tok, struct token *token, char c) {
121
121
}
122
122
PyObject * res = NULL ;
123
123
124
- // Check if there is a # character in the expression
124
+ // Look for a # character outside of string literals
125
125
int hash_detected = 0 ;
126
+ int in_string = 0 ;
127
+ char quote_char = 0 ;
128
+
126
129
for (Py_ssize_t i = 0 ; i < tok_mode -> last_expr_size - tok_mode -> last_expr_end ; i ++ ) {
127
- if (tok_mode -> last_expr_buffer [i ] == '#' ) {
130
+ char ch = tok_mode -> last_expr_buffer [i ];
131
+
132
+ // Skip escaped characters
133
+ if (ch == '\\' ) {
134
+ i ++ ;
135
+ continue ;
136
+ }
137
+
138
+ // Handle quotes
139
+ if (ch == '"' || ch == '\'' ) {
140
+ // The following if/else block works becase there is an off number
141
+ // of quotes in STRING tokens and the lexer only ever reaches this
142
+ // function with valid STRING tokens.
143
+ // For example: """hello"""
144
+ // First quote: in_string = 1
145
+ // Second quote: in_string = 0
146
+ // Third quote: in_string = 1
147
+ if (!in_string ) {
148
+ in_string = 1 ;
149
+ quote_char = ch ;
150
+ }
151
+ else if (ch == quote_char ) {
152
+ in_string = 0 ;
153
+ }
154
+ continue ;
155
+ }
156
+
157
+ // Check for # outside strings
158
+ if (ch == '#' && !in_string ) {
128
159
hash_detected = 1 ;
129
160
break ;
130
161
}
131
162
}
132
-
163
+ // If we found a # character in the expression, we need to handle comments
133
164
if (hash_detected ) {
134
- Py_ssize_t input_length = tok_mode -> last_expr_size - tok_mode -> last_expr_end ;
135
- char * result = (char * )PyMem_Malloc ((input_length + 1 ) * sizeof (char ));
165
+ // Allocate buffer for processed result
166
+ char * result = (char * )PyMem_Malloc ((tok_mode -> last_expr_size - tok_mode -> last_expr_end + 1 ) * sizeof (char ));
136
167
if (!result ) {
137
168
return -1 ;
138
169
}
139
170
140
- Py_ssize_t i = 0 ;
141
- Py_ssize_t j = 0 ;
171
+ Py_ssize_t i = 0 ; // Input position
172
+ Py_ssize_t j = 0 ; // Output position
173
+ in_string = 0 ; // Whether we're in a string
174
+ quote_char = 0 ; // Current string quote char
142
175
143
- for (i = 0 , j = 0 ; i < input_length ; i ++ ) {
144
- if (tok_mode -> last_expr_buffer [i ] == '#' ) {
145
- // Skip characters until newline or end of string
146
- while (i < input_length && tok_mode -> last_expr_buffer [i ] != '\0' ) {
147
- if (tok_mode -> last_expr_buffer [i ] == '\n' ) {
148
- result [j ++ ] = tok_mode -> last_expr_buffer [i ];
149
- break ;
150
- }
176
+ // Process each character
177
+ while (i < tok_mode -> last_expr_size - tok_mode -> last_expr_end ) {
178
+ char ch = tok_mode -> last_expr_buffer [i ];
179
+
180
+ // Handle string quotes
181
+ if (ch == '"' || ch == '\'' ) {
182
+ // See comment above to understand this part
183
+ if (!in_string ) {
184
+ in_string = 1 ;
185
+ quote_char = ch ;
186
+ } else if (ch == quote_char ) {
187
+ in_string = 0 ;
188
+ }
189
+ result [j ++ ] = ch ;
190
+ }
191
+ // Skip comments
192
+ else if (ch == '#' && !in_string ) {
193
+ while (i < tok_mode -> last_expr_size - tok_mode -> last_expr_end &&
194
+ tok_mode -> last_expr_buffer [i ] != '\n' ) {
151
195
i ++ ;
152
196
}
153
- } else {
154
- result [j ++ ] = tok_mode -> last_expr_buffer [i ];
197
+ if (i < tok_mode -> last_expr_size - tok_mode -> last_expr_end ) {
198
+ result [j ++ ] = '\n' ;
199
+ }
200
+ }
201
+ // Copy other chars
202
+ else {
203
+ result [j ++ ] = ch ;
155
204
}
205
+ i ++ ;
156
206
}
157
207
158
208
result [j ] = '\0' ; // Null-terminate the result string
@@ -164,11 +214,9 @@ set_ftstring_expr(struct tok_state* tok, struct token *token, char c) {
164
214
tok_mode -> last_expr_size - tok_mode -> last_expr_end ,
165
215
NULL
166
216
);
167
-
168
217
}
169
218
170
-
171
- if (!res ) {
219
+ if (!res ) {
172
220
return -1 ;
173
221
}
174
222
token -> metadata = res ;
0 commit comments