Skip to content

Commit 1ce6666

Browse files
committed
Parser: improve error message handling
* use single `on_error` handler with error level and message arguments * remove `Warning` token type, never handled anyway. * improve `#error` and `#warning` message parsing consistency * make `num_error` messages non fatal * fix `#warning` behavior, add tests
1 parent e80b1d4 commit 1ce6666

File tree

7 files changed

+81
-64
lines changed

7 files changed

+81
-64
lines changed

ast_utils/constants.c2

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ public const u32 MaxScopes = 32;
2121
public const u32 MaxIdentifierLen = 31;
2222
//public const u32 MaxFeatureName = 31;
2323
public const u32 MaxFeatureDepth = 6;
24-
public const u32 MaxErrorMsgLen = 31; // for #error "msg"
24+
public const u32 MaxErrorMsgLen = 127; // for #error "msg"
2525

2626
public const u32 MaxMultiString = 64*1024;
2727
public const u32 MaxMultiDeclBits = 4;

parser/c2_parser.c2

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -142,8 +142,7 @@ public fn void Parser.parse(Parser* p, i32 file_id, bool is_interface, bool is_g
142142
p.sm.get_offset(p.file_id),
143143
p.kwinfo,
144144
p.features,
145-
on_tokenizer_error,
146-
on_tokenizer_warning,
145+
Parser.on_tokenizer_error,
147146
p,
148147
false);
149148
p.tok.init();
@@ -159,17 +158,22 @@ public fn void Parser.parse(Parser* p, i32 file_id, bool is_interface, bool is_g
159158
buf.free();
160159
}
161160

162-
fn void on_tokenizer_error(void* arg, SrcLoc loc) {
161+
fn void Parser.on_tokenizer_error(void* arg, c2_tokenizer.ErrorLevel level, SrcLoc loc, const char* msg) {
163162
Parser* p = arg;
164-
// NOTE: cannot use p.tok.error_msg, because of possible lookahead (changes token)
165-
p.tok.loc = loc;
166-
// will longjmp
167-
p.error("%s", p.tokenizer.error_msg);
168-
}
169163

170-
fn void on_tokenizer_warning(void* arg, SrcLoc loc) {
171-
Parser* p = arg;
172-
p.diags.error(loc, "%s", p.tokenizer.error_msg);
164+
switch (level) {
165+
case Note:
166+
p.diags.note(loc, "%s", msg);
167+
break;
168+
case Warning:
169+
p.diags.warn(loc, "%s", msg);
170+
break;
171+
default:
172+
p.diags.error(loc, "%s", msg);
173+
break;
174+
}
175+
if (level == c2_tokenizer.ErrorLevel.FatalError)
176+
longjmp(&p.jmpbuf, 1);
173177
}
174178

175179
fn void Parser.consumeToken(Parser* p) {
@@ -872,10 +876,6 @@ fn void Parser.dump_token(Parser* p, const Token* tok) @(unused) {
872876
out.add(p.pool.idx2str(tok.text_idx));
873877
out.add("*/");
874878
break;
875-
case Warning:
876-
out.color(color.Yellow);
877-
out.add(tok.error_msg);
878-
break;
879879
case Error:
880880
out.color(color.Red);
881881
out.add(p.tokenizer.error_msg);

parser/c2_tokenizer.c2

Lines changed: 41 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -253,7 +253,8 @@ public type Feature struct {
253253
bool is_else; // inside the #else block
254254
}
255255

256-
public type HandlerFn fn void (void* arg, SrcLoc loc);
256+
public type ErrorLevel enum u8 { Note, Warning, Error, FatalError }
257+
public type ErrorFn fn void (void* arg, ErrorLevel level, SrcLoc loc, const char* msg);
257258

258259
public type Tokenizer struct {
259260
const char* cur;
@@ -265,9 +266,8 @@ public type Tokenizer struct {
265266

266267
string_pool.Pool* pool; // no ownership
267268
string_buffer.Buf* buf; // no ownership, used for strings and character constants
268-
HandlerFn on_error;
269-
HandlerFn on_warning;
270-
void* fn_arg;
269+
ErrorFn on_error;
270+
void* on_error_arg;
271271

272272
// Feature handling
273273
Feature[constants.MaxFeatureDepth+1] feature_stack;
@@ -278,7 +278,7 @@ public type Tokenizer struct {
278278

279279
char[256] error_msg;
280280
}
281-
static_assert(416, sizeof(Tokenizer));
281+
static_assert(408, sizeof(Tokenizer));
282282

283283
public fn void Tokenizer.init(Tokenizer* t,
284284
string_pool.Pool* pool,
@@ -287,9 +287,8 @@ public fn void Tokenizer.init(Tokenizer* t,
287287
SrcLoc loc_start,
288288
const keywords.Info* kwinfo,
289289
const string_list.List* features,
290-
HandlerFn on_error,
291-
HandlerFn on_warning,
292-
void* fn_arg,
290+
ErrorFn on_error,
291+
void* on_error_arg,
293292
bool raw_mode)
294293
{
295294
string.memset(t, 0, sizeof(Tokenizer));
@@ -302,8 +301,7 @@ public fn void Tokenizer.init(Tokenizer* t,
302301
t.pool = pool;
303302
t.buf = buf;
304303
t.on_error = on_error;
305-
t.on_warning = on_warning;
306-
t.fn_arg = fn_arg;
304+
t.on_error_arg = on_error_arg;
307305

308306
t.features = features;
309307
t.raw_mode = raw_mode;
@@ -684,7 +682,7 @@ fn void Tokenizer.error(Tokenizer* t, Token* result, const char* format @(printf
684682
result.kind = Kind.Error;
685683
result.error_msg = t.error_msg;
686684
result.done = true;
687-
if (t.on_error) t.on_error(t.fn_arg, result.loc);
685+
if (t.on_error) t.on_error(t.on_error_arg, FatalError, result.loc, t.error_msg);
688686
}
689687

690688
// generate an error but keep parsing
@@ -694,8 +692,7 @@ fn void Tokenizer.num_error(Tokenizer* t, Token* result, const char* p, const ch
694692
vsnprintf(t.error_msg, sizeof(t.error_msg), format, args);
695693
va_end(args);
696694

697-
// XXX: error position should be passed separately from token start
698-
result.loc = t.loc_start + (SrcLoc)(p - t.input_start);
695+
SrcLoc err_loc = t.loc_start + (SrcLoc)(p - t.input_start);
699696
// read the rest of the pp-number token
700697
for (;;) {
701698
if ((*p == 'e' || *p == 'E' || *p == 'p' || *p == 'P') && (p[1] == '+' || p[1] == '-')) {
@@ -712,7 +709,8 @@ fn void Tokenizer.num_error(Tokenizer* t, Token* result, const char* p, const ch
712709
}
713710
t.cur = p;
714711
result.len = (u16)((p - t.input_start) - (result.loc - t.loc_start));
715-
if (t.on_warning) t.on_warning(t.fn_arg, result.loc);
712+
// This is a non fatal error: keep parsing but do not analyse
713+
if (t.on_error) t.on_error(t.on_error_arg, Error, err_loc, t.error_msg);
716714
}
717715

718716
fn void Tokenizer.lex_identifier(Tokenizer* t, Token* result) {
@@ -1435,14 +1433,11 @@ fn bool Tokenizer.lex_feature_cmd(Tokenizer* t, Token* result) {
14351433
case Feat_ifdef:
14361434
case Feat_ifndef:
14371435
case Feat_elif:
1438-
if (t.handle_if(result, kind)) return true;
1439-
break;
1436+
return t.handle_if(result, kind);
14401437
case Feat_else:
1441-
if (t.handle_else(result)) return true;
1442-
break;
1438+
return t.handle_else(result);
14431439
case Feat_endif:
1444-
if (t.handle_endif(result)) return true;
1445-
break;
1440+
return t.handle_endif(result);
14461441
case Feat_error:
14471442
case Feat_warning:
14481443
if (!t.is_enabled()) return false; // if disabled, dont care if anything else
@@ -1466,29 +1461,37 @@ fn bool Tokenizer.at_bol(Tokenizer* t) {
14661461
}
14671462

14681463
fn bool Tokenizer.parse_error_warn(Tokenizer* t, Token* result, Kind kind) {
1469-
const char* start = t.cur;
1470-
while (*t.cur != '\0' && *t.cur != '\r' && *t.cur != '\n')
1471-
t.cur++;
1472-
usize len = (usize)(t.cur - start);
1473-
if (len > constants.MaxErrorMsgLen) {
1474-
t.error(result, "error msg too long (max %d bytes)", constants.MaxErrorMsgLen);
1475-
return true;
1464+
Token tok;
1465+
1466+
// parse pptokens instead of raw text
1467+
string_buffer.Buf msg.init(t.error_msg, elemsof(t.error_msg), false, false, 0);
1468+
SrcLoc last_loc = 0;
1469+
while (t.lex_preproc(&tok) != Kind.Eof) {
1470+
// replace blanks with a single space
1471+
if (last_loc && last_loc < tok.loc) msg.add1(' ');
1472+
// copy string text or token source
1473+
if (tok.kind == Kind.StringLiteral) {
1474+
msg.add2(t.pool.idx2str(tok.text_idx), tok.text_len);
1475+
} else {
1476+
msg.add2(t.input_start + (tok.loc - t.loc_start), tok.len);
1477+
}
1478+
last_loc = tok.loc + tok.len;
14761479
}
1477-
char[constants.MaxErrorMsgLen+1] msg;
1478-
string.memcpy(msg, start, len);
1479-
msg[len] = 0;
1480+
msg.size(); // ensure null terminator
14801481

14811482
if (kind == Kind.Feat_error) {
1482-
t.cur = t.line_start;
1483-
t.error(result, "%s", msg);
1484-
} else {
1485-
// TODO: output diagnostic synchronously
1486-
string.strcpy(t.error_msg, msg);
1487-
result.kind = Kind.Warning;
1488-
result.len = (u16)((t.cur - t.input_start) - (result.loc - t.loc_start));
1483+
const char* start = t.input_start + (result.loc - t.loc_start);
1484+
result.kind = Kind.Error;
1485+
result.done = true;
1486+
result.len = (u16)(t.cur - start);
14891487
result.error_msg = t.error_msg;
1488+
t.cur = start; // make #error sticky
1489+
if (t.on_error) t.on_error(t.on_error_arg, FatalError, result.loc, t.error_msg);
1490+
return true; // return error token with result.done set
1491+
} else {
1492+
if (t.on_error) t.on_error(t.on_error_arg, Warning, result.loc, t.error_msg);
1493+
return false; // continue reading tokens
14901494
}
1491-
return true;
14921495
}
14931496

14941497
fn bool Tokenizer.is_enabled(const Tokenizer* t) {

parser/token.c2

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,6 @@ public type Kind enum u8 {
146146
BlockComment,
147147
// Special Tokens
148148
Eof,
149-
Warning,
150149
Error,
151150
}
152151

@@ -285,7 +284,6 @@ const char*[] token_names = {
285284
[Kind.LineComment] = "l-comment",
286285
[Kind.BlockComment] = "b-comment",
287286
[Kind.Eof] = "eof",
288-
[Kind.Warning] = "warning",
289287
[Kind.Error] = "error",
290288
}
291289

test/parser/preprocessor_directives.c2

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,14 @@ const u32 Z = 2;
2525
static_assert(Z, 1);
2626

2727
#if 0
28-
#warning /* comment */ this is a warning
28+
#warning /* comment */ this is a disabled warning
2929
#endif
3030

31+
/**/ // @warning{this is a warning} +1
32+
#warning this is a warning
33+
/**/ // @warning{this is a warning} +1
34+
#warning /* comment */ this is a warning
35+
3136
public fn i32 main() {
3237
return 0;
3338
}

tools/c2cat.c2

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -306,7 +306,7 @@ public fn i32 c2cat(const char* filename)
306306
keywords.Info kwinfo;
307307
kwinfo.init(ctx.pool);
308308
c2_tokenizer.Tokenizer tokenizer;
309-
tokenizer.init(ctx.pool, buf, ctx.input, 0, &kwinfo, &features, nil, nil, nil, true);
309+
tokenizer.init(ctx.pool, buf, ctx.input, 0, &kwinfo, &features, nil, nil, true);
310310
ctx.tokenizer = &tokenizer;
311311

312312
Token tok;

tools/tester/test_db.c2

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515

1616
module test_db;
1717

18+
import ctype local;
1819
import c_errno local;
1920
import stdio local;
2021
import stdarg local;
@@ -491,24 +492,34 @@ fn void Db.parseTags(Db* db, const char* start, const char* end) {
491492
char[128] msg;
492493
if (!db.readUntil(msg, elemsof(msg), cp, '}', "message"))
493494
return;
495+
496+
// adjust line number for some special cases
497+
u32 line = db.line_nr - db.line_offset;
498+
for (const char* suff = cp + strlen(msg) + 1; suff < end; suff++) {
499+
if (!isspace(*suff)) {
500+
line += atoi(suff);
501+
break;
502+
}
503+
}
504+
494505
switch (kind) {
495506
case ERROR:
496507
#if TesterDebug
497-
color_print(color.Blue, " expecting error '%s' at %d", msg, db.line_nr - db.line_offset);
508+
color_print(color.Blue, " expecting error '%s' at %d", msg, line);
498509
#endif
499-
db.errors.add(db.current_file, db.line_nr - db.line_offset, msg);
510+
db.errors.add(db.current_file, line, msg);
500511
break;
501512
case WARNING:
502513
#if TesterDebug
503-
color_print(color.Blue, " expecting warning '%s' at %d", msg, db.line_nr - db.line_offset);
514+
color_print(color.Blue, " expecting warning '%s' at %d", msg, line);
504515
#endif
505-
db.warnings.add(db.current_file, db.line_nr - db.line_offset, msg);
516+
db.warnings.add(db.current_file, line, msg);
506517
break;
507518
case NOTE:
508519
#if TesterDebug
509-
color_print(color.Blue, " expecting note '%s' at %d", msg, db.line_nr - db.line_offset);
520+
color_print(color.Blue, " expecting note '%s' at %d", msg, line);
510521
#endif
511-
db.notes.add(db.current_file, db.line_nr - db.line_offset, msg);
522+
db.notes.add(db.current_file, line, msg);
512523
break;
513524
}
514525
}

0 commit comments

Comments
 (0)