Skip to content

Commit 33a7a90

Browse files
authored
Enhance and cleanup lexer-parser interface (#107)
This commit separates lex operations, allowing developers to control the behavior of preprocessor aliasing. It also replaces 'preproc_aliasing'. Additionally, the commit refines control flow functions related to preprocessor directives: it consolidates if_elif_skip_lines and ifdef_else_skip_lines into a single function. This change is due to their similar functionalities, and it now encompasses all possible combinations of control flow. In the previous design, the former did not cover #else, and the latter overlooked #elif. Furthermore, this commit improves the naming of lex-related functions.
1 parent 4b83bb2 commit 33a7a90

File tree

2 files changed

+80
-97
lines changed

2 files changed

+80
-97
lines changed

src/lexer.c

+56-54
Original file line numberDiff line numberDiff line change
@@ -87,11 +87,6 @@ int skip_newline = 1;
8787

8888
int preproc_match;
8989

90-
/* Allow replacing identifiers with alias value if alias exists. This is
91-
* disabled in certain cases, e.g. #undef.
92-
*/
93-
int preproc_aliasing = 1;
94-
9590
/* Point to the first character after where the macro has been called. It is
9691
* needed when returning from the macro body.
9792
*/
@@ -174,44 +169,15 @@ char read_char(int is_skip_space)
174169
return next_char;
175170
}
176171

177-
/* get alias name from defined() directive
178-
* i.e., get __arm__ from defined(__arm__)
179-
*/
180-
void read_alias_name_from_defined(char *alias_name, char *src)
181-
{
182-
int i;
183-
184-
src = src + 8; /* skip defined( */
185-
i = 0;
186-
while (src[i] != ')') {
187-
alias_name[i] = src[i];
188-
i++;
189-
}
190-
alias_name[i] = 0;
191-
}
192-
193172
char peek_char(int offset)
194173
{
195174
return SOURCE[source_idx + offset];
196175
}
197176

198-
/* check alias defined or not */
199-
void chk_def(int defined)
200-
{
201-
char *alias = NULL;
202-
char alias_name[MAX_TOKEN_LEN];
203-
204-
if (defined) {
205-
read_alias_name_from_defined(alias_name, token_str);
206-
alias = find_alias(alias_name);
207-
} else
208-
alias = find_alias(token_str);
209-
210-
if (alias)
211-
preproc_match = 1;
212-
}
213-
214-
token_t get_next_token()
177+
/* Lex next token and returns its token type. Parameter `aliasing` is used for
178+
* disable preprocessor aliasing on identifier tokens.
179+
*/
180+
token_t lex_token_internal(int aliasing)
215181
{
216182
token_str[0] = 0;
217183

@@ -257,7 +223,7 @@ token_t get_next_token()
257223
read_char(0);
258224
if (next_char == '/') {
259225
read_char(1);
260-
return get_next_token();
226+
return lex_token_internal(aliasing);
261227
}
262228
}
263229
} while (next_char);
@@ -549,7 +515,7 @@ token_t get_next_token()
549515
if (!strcmp(token_str, "continue"))
550516
return T_continue;
551517

552-
if (preproc_aliasing) {
518+
if (aliasing) {
553519
alias = find_alias(token_str);
554520
if (alias) {
555521
token_t t = is_numeric(alias) ? T_numeric : T_string;
@@ -570,7 +536,7 @@ token_t get_next_token()
570536
next_char = SOURCE[source_idx];
571537
} else
572538
next_char = read_char(1);
573-
return get_next_token();
539+
return lex_token_internal(aliasing);
574540
}
575541

576542
if (next_char == 0)
@@ -582,30 +548,45 @@ token_t get_next_token()
582548
return T_eof;
583549
}
584550

551+
/* Lex next token and returns its token type. To disable aliasing on next
552+
* token, use `lex_token_internal`. */
553+
token_t lex_token()
554+
{
555+
return lex_token_internal(1);
556+
}
557+
585558
/* Skip the content. We only need the index where the macro body begins. */
586559
void skip_macro_body()
587560
{
588561
while (!is_newline(next_char))
589-
next_token = get_next_token();
562+
next_token = lex_token();
590563

591564
skip_newline = 1;
592-
next_token = get_next_token();
565+
next_token = lex_token();
593566
}
594567

595-
int lex_accept(token_t token)
568+
/* Accepts next token if token types are matched. */
569+
int lex_accept_internal(token_t token, int aliasing)
596570
{
597571
if (next_token == token) {
598-
/* FIXME: this is a hack, fix aggressive aliasing first */
599-
if (token == T_cppd_ifdef)
600-
preproc_aliasing = 0;
601-
next_token = get_next_token();
602-
if (token == T_cppd_ifdef)
603-
preproc_aliasing = 1;
572+
next_token = lex_token_internal(aliasing);
604573
return 1;
605574
}
575+
606576
return 0;
607577
}
608578

579+
/* Accepts next token if token types are matched. To disable aliasing
580+
* on next token, use `lex_accept_internal`.
581+
*/
582+
int lex_accept(token_t token)
583+
{
584+
return lex_accept_internal(token, 1);
585+
}
586+
587+
/* Peeks next token and copy token's literal to value if token types
588+
* are matched.
589+
*/
609590
int lex_peek(token_t token, char *value)
610591
{
611592
if (next_token == token) {
@@ -617,17 +598,38 @@ int lex_peek(token_t token, char *value)
617598
return 0;
618599
}
619600

620-
void lex_ident(token_t token, char *value)
601+
/* Strictly match next token with given token type and copy token's
602+
* literal to value.
603+
*/
604+
void lex_ident_internal(token_t token, char *value, int aliasing)
621605
{
622606
if (next_token != token)
623607
error("Unexpected token");
624608
strcpy(value, token_str);
625-
next_token = get_next_token();
609+
next_token = lex_token_internal(aliasing);
626610
}
627611

628-
void lex_expect(token_t token)
612+
/* Strictly match next token with given token type and copy token's
613+
* literal to value. To disable aliasing on next token, use
614+
* `lex_ident_internal`.
615+
*/
616+
void lex_ident(token_t token, char *value)
617+
{
618+
lex_ident_internal(token, value, 1);
619+
}
620+
621+
/* Strictly match next token with given token type. */
622+
void lex_expect_internal(token_t token, int aliasing)
629623
{
630624
if (next_token != token)
631625
error("Unexpected token");
632-
next_token = get_next_token();
626+
next_token = lex_token_internal(aliasing);
627+
}
628+
629+
/* Strictly match next token with given token type. To disable aliasing
630+
* on next token, use `lex_expect_internal`.
631+
*/
632+
void lex_expect(token_t token)
633+
{
634+
lex_expect_internal(token, 1);
633635
}

src/parser.c

+24-43
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ int get_size(var_t *var, type_t *type)
6666
* whitespace */
6767
void skip_line(int invalidate)
6868
{
69+
/* FIXME: Comments will causes current validation failed. */
6970
skip_whitespace();
7071
do {
7172
if (invalidate && !is_whitespace(peek_char(0)) &&
@@ -75,28 +76,14 @@ void skip_line(int invalidate)
7576
} while (read_char(0) != '\n');
7677
}
7778

78-
void if_elif_skip_lines()
79-
{
80-
char peek_c;
81-
int i;
82-
83-
do {
84-
skip_whitespace();
85-
i = 0;
86-
do {
87-
token_str[i++] = next_char;
88-
} while (read_char(0) != '\n');
89-
token_str[i] = 0;
90-
read_char(1);
91-
peek_c = peek_char(1);
92-
} while (next_char != '#' || (next_char == '#' && peek_c == 'd'));
93-
skip_whitespace();
94-
}
95-
96-
void ifdef_else_skip_lines()
79+
/* Skips lines where preprocessor match is false, this will stop once next
80+
* token is either `T_cppd_elif`, `T_cppd_else` or `cppd_endif`.
81+
*/
82+
void cppd_control_flow_skip_lines()
9783
{
98-
while (!lex_peek(T_cppd_else, NULL) && !lex_peek(T_cppd_endif, NULL)) {
99-
next_token = get_next_token();
84+
while (!lex_peek(T_cppd_elif, NULL) && !lex_peek(T_cppd_else, NULL) &&
85+
!lex_peek(T_cppd_endif, NULL)) {
86+
next_token = lex_token();
10087
}
10188
skip_whitespace();
10289
}
@@ -111,12 +98,10 @@ void read_defined_macro()
11198
{
11299
char lookup_alias[MAX_TOKEN_LEN];
113100

114-
preproc_aliasing = 0; /* to prevent aggressive aliasing */
115101
lex_expect(T_identifier); /* defined */
116-
lex_expect(T_open_bracket);
102+
lex_expect_internal(T_open_bracket, 0);
117103
lex_ident(T_identifier, lookup_alias);
118104
lex_expect(T_close_bracket);
119-
preproc_aliasing = 1;
120105

121106
check_def(lookup_alias);
122107
}
@@ -169,10 +154,8 @@ int read_preproc_directive()
169154
if (lex_peek(T_cppd_undef, token)) {
170155
char alias[MAX_VAR_LEN];
171156

172-
preproc_aliasing = 0;
173-
lex_expect(T_cppd_undef);
157+
lex_expect_internal(T_cppd_undef, 0);
174158
lex_peek(T_identifier, alias);
175-
preproc_aliasing = 1;
176159
lex_expect(T_identifier);
177160

178161
remove_alias(alias);
@@ -201,7 +184,7 @@ int read_preproc_directive()
201184
return 1;
202185
}
203186

204-
if_elif_skip_lines();
187+
cppd_control_flow_skip_lines();
205188
} else {
206189
/* TODO: parse and evaluate constant expression here */
207190
}
@@ -210,7 +193,7 @@ int read_preproc_directive()
210193
if (lex_accept(T_cppd_elif)) {
211194
if (preproc_match) {
212195
while (!lex_peek(T_cppd_endif, NULL)) {
213-
next_token = get_next_token();
196+
next_token = lex_token();
214197
}
215198
return 1;
216199
}
@@ -223,7 +206,7 @@ int read_preproc_directive()
223206
return 1;
224207
}
225208

226-
if_elif_skip_lines();
209+
cppd_control_flow_skip_lines();
227210
} else {
228211
/* TODO: parse and evaluate constant expression here */
229212
}
@@ -240,16 +223,15 @@ int read_preproc_directive()
240223
return 1;
241224
}
242225

243-
/* skip lines until #else or #endif */
244-
ifdef_else_skip_lines();
226+
cppd_control_flow_skip_lines();
245227
return 1;
246228
}
247229
if (lex_accept(T_cppd_endif)) {
248230
preproc_match = 0;
249231
skip_whitespace();
250232
return 1;
251233
}
252-
if (lex_accept(T_cppd_ifdef)) {
234+
if (lex_accept_internal(T_cppd_ifdef, 0)) {
253235
preproc_match = 0;
254236
lex_ident(T_identifier, token);
255237
check_def(token);
@@ -259,8 +241,7 @@ int read_preproc_directive()
259241
return 1;
260242
}
261243

262-
/* skip lines until #else or #endif */
263-
ifdef_else_skip_lines();
244+
cppd_control_flow_skip_lines();
264245
return 1;
265246
}
266247

@@ -677,12 +658,12 @@ void read_expr_operand(block_t *parent, basic_block_t **bb)
677658
for (i = 0; i < remainder; i++) {
678659
source_idx = macro->params[macro->num_params - remainder + i];
679660
next_char = SOURCE[source_idx];
680-
next_token = get_next_token();
661+
next_token = lex_token();
681662
read_expr(parent, bb);
682663
}
683664
source_idx = t;
684665
next_char = SOURCE[source_idx];
685-
next_token = get_next_token();
666+
next_token = lex_token();
686667
} else if (mac) {
687668
if (parent->macro)
688669
error("Nested macro is not yet supported");
@@ -695,7 +676,7 @@ void read_expr_operand(block_t *parent, basic_block_t **bb)
695676
while (!lex_peek(T_close_bracket, NULL)) {
696677
mac->params[mac->num_params++] = source_idx;
697678
do {
698-
next_token = get_next_token();
679+
next_token = lex_token();
699680
} while (next_token != T_comma &&
700681
next_token != T_close_bracket);
701682
}
@@ -717,11 +698,11 @@ void read_expr_operand(block_t *parent, basic_block_t **bb)
717698
int t = source_idx;
718699
source_idx = macro_param_idx;
719700
next_char = SOURCE[source_idx];
720-
next_token = get_next_token();
701+
next_token = lex_token();
721702
read_expr(parent, bb);
722703
source_idx = t;
723704
next_char = SOURCE[source_idx];
724-
next_token = get_next_token();
705+
next_token = lex_token();
725706
} else if (con) {
726707
ph1_ir = add_ph1_ir(OP_load_constant);
727708
vd = require_var(parent);
@@ -1599,15 +1580,15 @@ void eval_ternary_imm(int cond, char *token)
15991580
{
16001581
if (cond == 0) {
16011582
while (next_token != T_colon) {
1602-
next_token = get_next_token();
1583+
next_token = lex_token();
16031584
}
16041585
lex_accept(T_colon);
16051586
read_global_assignment(token);
16061587
} else {
16071588
read_global_assignment(token);
16081589
lex_expect(T_colon);
16091590
while (!lex_peek(T_semicolon, NULL)) {
1610-
next_token = get_next_token();
1591+
next_token = lex_token();
16111592
}
16121593
}
16131594
}
@@ -2453,7 +2434,7 @@ basic_block_t *read_body_statement(block_t *parent, basic_block_t *bb)
24532434
while (!lex_peek(T_close_bracket, NULL)) {
24542435
mac->params[mac->num_params++] = source_idx;
24552436
do {
2456-
next_token = get_next_token();
2437+
next_token = lex_token();
24572438
} while (next_token != T_comma && next_token != T_close_bracket);
24582439
}
24592440
/* move `source_idx` to the macro body */

0 commit comments

Comments
 (0)