Skip to content

Commit dd292af

Browse files
committed
Parser: improve error message handling
* use single `on_error` handler with error level and message arguments * remove `Warning` token type, never handled anyway. * improve `#error` and `#warning` message parsing consistency * make `num_error` messages non fatal * fix `#warning` behavior, add tests
1 parent b5b537c commit dd292af

File tree

7 files changed

+73
-58
lines changed

7 files changed

+73
-58
lines changed

ast_utils/constants.c2

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ public const u32 MaxScopes = 32;
2121
public const u32 MaxIdentifierLen = 31;
2222
//public const u32 MaxFeatureName = 31;
2323
public const u32 MaxFeatureDepth = 6;
24-
public const u32 MaxErrorMsgLen = 31; // for #error "msg"
24+
public const u32 MaxErrorMsgLen = 127; // for #error "msg"
2525

2626
public const u32 MaxMultiString = 64*1024;
2727
public const u32 MaxMultiDeclBits = 4;

parser/c2_parser.c2

Lines changed: 15 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -142,8 +142,7 @@ public fn void Parser.parse(Parser* p, i32 file_id, bool is_interface, bool is_g
142142
p.sm.get_offset(p.file_id),
143143
p.kwinfo,
144144
p.features,
145-
on_tokenizer_error,
146-
on_tokenizer_warning,
145+
Parser.on_tokenizer_error,
147146
p,
148147
false);
149148
p.tok.init();
@@ -159,16 +158,22 @@ public fn void Parser.parse(Parser* p, i32 file_id, bool is_interface, bool is_g
159158
buf.free();
160159
}
161160

162-
fn void on_tokenizer_error(void* arg, SrcLoc loc) {
161+
fn void Parser.on_tokenizer_error(void* arg, c2_tokenizer.ErrorLevel level, SrcLoc loc, const char* msg) {
163162
Parser* p = arg;
164-
// NOTE: cannot use p.tok.error_msg, because of possible lookahead (changes token)
165-
// will longjmp
166-
p.errorAt(loc, "%s", p.tokenizer.error_msg);
167-
}
168163

169-
fn void on_tokenizer_warning(void* arg, SrcLoc loc) {
170-
Parser* p = arg;
171-
p.diags.error(loc, "%s", p.tokenizer.error_msg);
164+
switch (level) {
165+
case Note:
166+
p.diags.note(loc, "%s", msg);
167+
break;
168+
case Warning:
169+
p.diags.warn(loc, "%s", msg);
170+
break;
171+
default:
172+
p.diags.error(loc, "%s", msg);
173+
break;
174+
}
175+
if (level == c2_tokenizer.ErrorLevel.FatalError)
176+
longjmp(&p.jmpbuf, 1);
172177
}
173178

174179
fn void Parser.consumeToken(Parser* p) {
@@ -891,10 +896,6 @@ fn void Parser.dump_token(Parser* p, const Token* tok) @(unused) {
891896
out.add(p.pool.idx2str(tok.text_idx));
892897
out.add("*/");
893898
break;
894-
case Warning:
895-
out.color(color.Yellow);
896-
out.add(tok.error_msg);
897-
break;
898899
case Error:
899900
out.color(color.Red);
900901
out.add(p.tokenizer.error_msg);

parser/c2_tokenizer.c2

Lines changed: 41 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -253,7 +253,8 @@ public type Feature struct {
253253
bool is_else; // inside the #else block
254254
}
255255

256-
public type HandlerFn fn void (void* arg, SrcLoc loc);
256+
public type ErrorLevel enum u8 { Note, Warning, Error, FatalError }
257+
public type ErrorFn fn void (void* arg, ErrorLevel level, SrcLoc loc, const char* msg);
257258

258259
public type Tokenizer struct {
259260
const char* cur;
@@ -265,9 +266,8 @@ public type Tokenizer struct {
265266

266267
string_pool.Pool* pool; // no ownership
267268
string_buffer.Buf* buf; // no ownership, used for strings and character constants
268-
HandlerFn on_error;
269-
HandlerFn on_warning;
270-
void* fn_arg;
269+
ErrorFn on_error;
270+
void* on_error_arg;
271271

272272
// Feature handling
273273
Feature[constants.MaxFeatureDepth+1] feature_stack;
@@ -278,7 +278,7 @@ public type Tokenizer struct {
278278

279279
char[256] error_msg;
280280
}
281-
static_assert(416, sizeof(Tokenizer));
281+
static_assert(408, sizeof(Tokenizer));
282282

283283
public fn void Tokenizer.init(Tokenizer* t,
284284
string_pool.Pool* pool,
@@ -287,9 +287,8 @@ public fn void Tokenizer.init(Tokenizer* t,
287287
SrcLoc loc_start,
288288
const keywords.Info* kwinfo,
289289
const string_list.List* features,
290-
HandlerFn on_error,
291-
HandlerFn on_warning,
292-
void* fn_arg,
290+
ErrorFn on_error,
291+
void* on_error_arg,
293292
bool raw_mode)
294293
{
295294
string.memset(t, 0, sizeof(Tokenizer));
@@ -302,8 +301,7 @@ public fn void Tokenizer.init(Tokenizer* t,
302301
t.pool = pool;
303302
t.buf = buf;
304303
t.on_error = on_error;
305-
t.on_warning = on_warning;
306-
t.fn_arg = fn_arg;
304+
t.on_error_arg = on_error_arg;
307305

308306
t.features = features;
309307
t.raw_mode = raw_mode;
@@ -684,7 +682,7 @@ fn void Tokenizer.error(Tokenizer* t, Token* result, const char* format @(printf
684682
result.kind = Kind.Error;
685683
result.error_msg = t.error_msg;
686684
result.done = true;
687-
if (t.on_error) t.on_error(t.fn_arg, result.loc);
685+
if (t.on_error) t.on_error(t.on_error_arg, FatalError, result.loc, t.error_msg);
688686
}
689687

690688
// generate an error but keep parsing
@@ -694,8 +692,7 @@ fn void Tokenizer.num_error(Tokenizer* t, Token* result, const char* p, const ch
694692
vsnprintf(t.error_msg, sizeof(t.error_msg), format, args);
695693
va_end(args);
696694

697-
// XXX: error position should be passed separately from token start
698-
result.loc = t.loc_start + (SrcLoc)(p - t.input_start);
695+
SrcLoc err_loc = t.loc_start + (SrcLoc)(p - t.input_start);
699696
// read the rest of the pp-number token
700697
for (;;) {
701698
if ((*p == 'e' || *p == 'E' || *p == 'p' || *p == 'P') && (p[1] == '+' || p[1] == '-')) {
@@ -712,7 +709,8 @@ fn void Tokenizer.num_error(Tokenizer* t, Token* result, const char* p, const ch
712709
}
713710
t.cur = p;
714711
result.len = (u16)((p - t.input_start) - (result.loc - t.loc_start));
715-
if (t.on_warning) t.on_warning(t.fn_arg, result.loc);
712+
// This is a non fatal error: keep parsing but do not analyse
713+
if (t.on_error) t.on_error(t.on_error_arg, Error, err_loc, t.error_msg);
716714
}
717715

718716
fn void Tokenizer.lex_identifier(Tokenizer* t, Token* result) {
@@ -1433,14 +1431,11 @@ fn bool Tokenizer.lex_feature_cmd(Tokenizer* t, Token* result) {
14331431
case Feat_ifdef:
14341432
case Feat_ifndef:
14351433
case Feat_elif:
1436-
if (t.handle_if(result, kind)) return true;
1437-
break;
1434+
return t.handle_if(result, kind);
14381435
case Feat_else:
1439-
if (t.handle_else(result)) return true;
1440-
break;
1436+
return t.handle_else(result);
14411437
case Feat_endif:
1442-
if (t.handle_endif(result)) return true;
1443-
break;
1438+
return t.handle_endif(result);
14441439
case Feat_error:
14451440
case Feat_warning:
14461441
if (!t.is_enabled()) return false; // if disabled, dont care if anything else
@@ -1451,7 +1446,6 @@ fn bool Tokenizer.lex_feature_cmd(Tokenizer* t, Token* result) {
14511446
t.error(result, "unknown feature-selection command");
14521447
return true;
14531448
}
1454-
return false;
14551449
}
14561450

14571451
fn bool Tokenizer.at_bol(Tokenizer* t) {
@@ -1464,29 +1458,37 @@ fn bool Tokenizer.at_bol(Tokenizer* t) {
14641458
}
14651459

14661460
fn bool Tokenizer.parse_error_warn(Tokenizer* t, Token* result, Kind kind) {
1467-
const char* start = t.cur;
1468-
while (*t.cur != '\0' && *t.cur != '\r' && *t.cur != '\n')
1469-
t.cur++;
1470-
usize len = (usize)(t.cur - start);
1471-
if (len > constants.MaxErrorMsgLen) {
1472-
t.error(result, "error msg too long (max %d bytes)", constants.MaxErrorMsgLen);
1473-
return true;
1461+
Token tok;
1462+
1463+
// parse pptokens instead of raw text
1464+
string_buffer.Buf msg.init(t.error_msg, elemsof(t.error_msg), false, false, 0);
1465+
SrcLoc last_loc = 0;
1466+
while (t.lex_preproc(&tok) != Kind.Eof) {
1467+
// replace blanks with a single space
1468+
if (last_loc && last_loc < tok.loc) msg.add1(' ');
1469+
// copy string text or token source
1470+
if (tok.kind == Kind.StringLiteral) {
1471+
msg.add2(t.pool.idx2str(tok.text_idx), tok.text_len);
1472+
} else {
1473+
msg.add2(t.input_start + (tok.loc - t.loc_start), tok.len);
1474+
}
1475+
last_loc = tok.loc + tok.len;
14741476
}
1475-
char[constants.MaxErrorMsgLen+1] msg;
1476-
string.memcpy(msg, start, len);
1477-
msg[len] = 0;
1477+
msg.size(); // ensure null terminator
14781478

14791479
if (kind == Kind.Feat_error) {
1480-
t.cur = t.line_start;
1481-
t.error(result, "%s", msg);
1482-
} else {
1483-
// TODO: output diagnostic synchronously
1484-
string.strcpy(t.error_msg, msg);
1485-
result.kind = Kind.Warning;
1486-
result.len = (u16)((t.cur - t.input_start) - (result.loc - t.loc_start));
1480+
const char* start = t.input_start + (result.loc - t.loc_start);
1481+
result.kind = Kind.Error;
1482+
result.done = true;
1483+
result.len = (u16)(t.cur - start);
14871484
result.error_msg = t.error_msg;
1485+
t.cur = start; // make #error sticky
1486+
if (t.on_error) t.on_error(t.on_error_arg, FatalError, result.loc, t.error_msg);
1487+
return true; // return error token with result.done set
1488+
} else {
1489+
if (t.on_error) t.on_error(t.on_error_arg, Warning, result.loc, t.error_msg);
1490+
return false; // continue reading tokens
14881491
}
1489-
return true;
14901492
}
14911493

14921494
fn bool Tokenizer.is_enabled(const Tokenizer* t) {

parser/token.c2

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,6 @@ public type Kind enum u8 {
146146
BlockComment,
147147
// Special Tokens
148148
Eof,
149-
Warning,
150149
Error,
151150
}
152151

@@ -285,7 +284,6 @@ const char*[] token_names = {
285284
[Kind.LineComment] = "l-comment",
286285
[Kind.BlockComment] = "b-comment",
287286
[Kind.Eof] = "eof",
288-
[Kind.Warning] = "warning",
289287
[Kind.Error] = "error",
290288
}
291289

test/parser/preprocessor_directives.c2

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,14 @@ const u32 Z = 2;
2525
static_assert(Z, 1);
2626

2727
#if 0
28-
#warning /* comment */ this is a warning
28+
#warning /* comment */ this is a disabled warning
2929
#endif
3030

31+
/**/ // @warning{this is a warning} +1
32+
#warning this is a warning
33+
/**/ // @warning{this is a warning} +1
34+
#warning /* comment */ this is a warning
35+
3136
public fn i32 main() {
3237
return 0;
3338
}

tools/c2cat.c2

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -287,7 +287,7 @@ public fn i32 c2cat(const char* filename, bool use_color)
287287
keywords.Info kwinfo;
288288
kwinfo.init(ctx.pool);
289289
c2_tokenizer.Tokenizer tokenizer;
290-
tokenizer.init(ctx.pool, buf, ctx.input, 0, &kwinfo, &features, nil, nil, nil, true);
290+
tokenizer.init(ctx.pool, buf, ctx.input, 0, &kwinfo, &features, nil, nil, true);
291291
ctx.tokenizer = &tokenizer;
292292

293293
Token tok;

tools/tester/test_db.c2

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515

1616
module test_db;
1717

18+
import ctype local;
1819
import c_errno local;
1920
import stdio local;
2021
import stdarg local;
@@ -504,7 +505,15 @@ fn void Db.parseTags(Db* db, const char* start, const char* end) {
504505
char[128] msg;
505506
if (!db.readUntil(msg, elemsof(msg), cp, '}', "message"))
506507
return;
508+
507509
u32 line_nr = db.line_nr - db.line_offset;
510+
// adjust line number for some special cases
511+
for (const char* suff = cp + strlen(msg) + 1; suff < end; suff++) {
512+
if (!isspace(*suff)) {
513+
line_nr += atoi(suff);
514+
break;
515+
}
516+
}
508517
switch (kind) {
509518
case ERROR:
510519
#if TesterDebug

0 commit comments

Comments
 (0)