forked from riversideresearch/Hammer
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathhammer.h
More file actions
1273 lines (1120 loc) · 42.4 KB
/
hammer.h
File metadata and controls
1273 lines (1120 loc) · 42.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/* Parser combinators for binary formats.
* Copyright (c) 2025 Riverside Research
* Copyright (C) 2012 Meredith L. Patterson, Dan "TQ" Hirsch
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation, version 2.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
#ifndef HAMMER_HAMMER__H
#define HAMMER_HAMMER__H
#include "compiler_specifics.h"
#ifndef HAMMER_INTERNAL__NO_STDARG_H
#include <stdarg.h>
#endif // HAMMER_INTERNAL__NO_STDARG_H
#include "allocator.h"
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#define BYTE_BIG_ENDIAN 0x1
#define BIT_BIG_ENDIAN 0x2
#define BIT_LITTLE_ENDIAN 0x0
#define BYTE_LITTLE_ENDIAN 0x0
#ifdef __cplusplus
extern "C" {
#endif
typedef struct HParseState_ HParseState;
/**
* @enum HParserBackend
* @brief Available parser backend implementations
*/
typedef enum HParserBackend_ {
PB_MIN = 0,
PB_INVALID = PB_MIN, /**< Have a backend that always fails to pass around "no such backend"
indications */
PB_PACKRAT,
PB_MAX = PB_PACKRAT
} HParserBackend;
typedef struct HParserBackendVTable_ HParserBackendVTable;
/**
* @struct HParserBackendWithParams
* @brief Backend configuration with parameters
*/
typedef struct HParserBackendWithParams_ {
/**< Name of backend extracted from a string if the choice of backend was specified in a call
* using a string */
char *requested_name;
/**< The backend (if backend is to be loaded from an external module set to invalid (?))*/
HParserBackend backend;
/**< Backend vtable (TODO: use this instead of the enum so we can get rid of that) */
HParserBackendVTable *backend_vtable;
/*
* Backend-specific parameters - if this needs to be freed, the backend should provide a
* free_params method in its vtable; currently no backends do this - PB_PACKRAT takes no params
*/
void *params;
/**< Allocator to use to free this (and the params if necessary) */
HAllocator *mm__;
} HParserBackendWithParams;
/**
* @enum HTokenType
* @brief Token types for parsed results
*/
typedef enum HTokenType_ {
TT_INVALID = 0,
TT_NONE = 1,
TT_BYTES = 2,
TT_SINT = 4,
TT_UINT = 8,
TT_DOUBLE = 12,
TT_FLOAT = 13,
TT_SEQUENCE = 16,
TT_RESERVED_1, /**< reserved for backend-specific internal use */
TT_ERR = 32,
TT_USER = 64,
TT_MAX
} HTokenType;
/**
* @struct HCountedArray
* @brief Dynamic array of parsed tokens
*/
typedef struct HCountedArray_ {
size_t capacity;
size_t used;
HArena *arena;
struct HParsedToken_ **elements;
} HCountedArray;
/**
* @struct HBytes
* @brief Byte array representation
*/
typedef struct HBytes_ {
const uint8_t *token;
size_t len;
} HBytes;
#ifdef SWIG
typedef union {
HBytes bytes;
int64_t sint;
uint64_t uint;
double dbl;
float flt;
HCountedArray *seq;
void *user;
} HTokenData;
#endif
/**
* @struct HParsedToken
* @brief Parsed token with type and value
*/
typedef struct HParsedToken_ {
HTokenType token_type;
#ifndef SWIG
union {
HBytes bytes;
int64_t sint;
uint64_t uint;
double dbl;
float flt;
HCountedArray *seq; /**< a sequence of HParsedToken's */
void *user;
};
#else
HTokenData token_data;
#endif
size_t index;
size_t bit_length;
char bit_offset;
} HParsedToken;
/**
* @struct HParseResult
* @brief The result of a successful parse. Note that this may reference the input string. If a
* parse fails, the parse result will be NULL. If a parse is successful but there's nothing there
* (i.e., if end_p succeeds) then there's a parse result but its ast is NULL.
*/
typedef struct HParseResult_ {
const HParsedToken *ast; /**< Abstract syntax tree */
int64_t bit_length;
HArena *arena; /**< Memory arena for the parse result */
} HParseResult;
/**
* TODO: document me.
* Relevant functions: h_bit_writer_new, h_bit_writer_put, h_bit_writer_get_buffer,
* h_bit_writer_free
*/
typedef struct HBitWriter_ HBitWriter;
typedef struct HCFChoice_ HCFChoice;
typedef struct HRVMProg_ HRVMProg;
typedef struct HParserVtable_ HParserVtable;
// TODO: Make this internal
typedef struct HParser_ {
const HParserVtable *vtable;
HParserBackend backend;
HParserBackendVTable *backend_vtable;
void *backend_data;
void *env;
HCFChoice *desugared; /**< if the parser can be desugared, its desugared form */
} HParser;
typedef struct HSuspendedParser_ HSuspendedParser;
/**
* @typedef HAction
* @brief Type of an action to apply to an AST, used in the action() parser. It can be any
* (user-defined) function that takes a HParseResult* and returns a HParsedToken*. (This is so that
* the user doesn't have to worry about memory allocation; action() does that for you.) Note that
* the tagged union in HParsedToken* supports user-defined types, so you can create your own token
* types (corresponding to, say, structs) and stuff values for them into the void* in the tagged
* union in HParsedToken.
*
* @param p The parse result to apply the action to.
* @param user_data Arbitrary user data pointer passed through from the action() parser.
*/
typedef HParsedToken *(*HAction)(const HParseResult *p, void *user_data);
/**
* @typedef HPredicate
* @brief Type of a boolean attribute-checking function, used in the attr_bool() parser. It can be
* any (user-defined) function that takes a HParseResult* and returns true or false.
*/
typedef bool (*HPredicate)(HParseResult *p, void *user_data);
/**
* @typedef HContinuation
* @brief Type of a parser that depends on the result of a previous parser,used in h_bind(). The
* void* argument is passed through from h_bind() and can be used to arbitrarily parameterize the
* function further. The HAllocator* argument gives access to temporary memory and is to be used for
* any allocations inside the function. Specifically, construction of any HParsers should use the
* '__m' combinator variants with the given allocator. Anything allocated thus will be freed by
* 'h_bind'.
*
* @param mm__ Allocator to use for any allocations needed to construct the returned parser.
*/
typedef HParser *(*HContinuation)(HAllocator *mm__, const HParsedToken *x, void *env);
enum BackendTokenType_ {
TT_backend_with_params_t = TT_USER,
TT_backend_name_t,
TT_backend_param_t,
TT_backend_param_name_t,
TT_backend_param_with_name_t,
TT_backend_params_t
};
typedef struct backend_param {
size_t len;
uint8_t *param;
uint8_t *param_name;
} backend_param_t;
typedef struct backend_param_name {
size_t len;
uint8_t *param_name;
size_t param_id;
} backend_param_name_t;
typedef struct backend_param_with_name {
backend_param_name_t param_name;
backend_param_t param;
} backend_param_with_name_t;
typedef struct {
uint8_t *name;
size_t len;
} backend_name_t;
typedef struct backend_params {
backend_param_with_name_t *params;
size_t len;
} backend_params_t;
typedef struct backend_with_params {
backend_name_t name;
backend_params_t params;
} backend_with_params_t;
/**
* @defgroup benchmarking Benchmarking and Test Case Management
* @{
*/
typedef struct HParserTestcase_ {
unsigned char *input;
size_t length;
char *output_unambiguous;
} HParserTestcase;
#ifdef SWIG
typedef union {
const char* actual_results;
size_t parse_time;
} HResultTiming;
#endif
typedef struct HCaseResult_ {
bool success;
#ifndef SWIG
union {
const char
*actual_results; /**< on failure, filled in with the results of h_write_result_unamb */
size_t parse_time; /**< on success, filled in with time for a single parse, in nsec */
};
#else
HResultTiming timestamp;
#endif
size_t length;
} HCaseResult;
typedef struct HBackendResults_ {
HParserBackend backend;
bool compile_success;
size_t n_testcases;
size_t failed_testcases; /**< actually a count... */
HCaseResult *cases;
} HBackendResults;
typedef struct HBenchmarkResults_ {
size_t len;
HBackendResults *results;
} HBenchmarkResults;
/** @} */
/**
* @defgroup backend_functions Backend Management
* @{
*/
/**
* @brief Check if backend is available
* @param backend Backend to check
* @return 1 if available, 0 otherwise
*/
int h_is_backend_available(HParserBackend backend);
/**
* @brief Get default backend (currently PB_PACKRAT)
* @return Default backend
*/
HParserBackend h_get_default_backend(void);
/**
* @brief Get default backend vtable
* @return Backend function table
*/
HParserBackendVTable *h_get_default_backend_vtable(void);
/**
* @brief Copy backend configuration with parameters
* @param be_with_params Backend configuration to copy
* @return New backend configuration
*/
HParserBackendWithParams *h_copy_backend_with_params(HParserBackendWithParams *be_with_params);
HParserBackendWithParams *h_copy_backend_with_params__m(HAllocator *mm__,
HParserBackendWithParams *be_with_params);
/**
* @brief Free backend configuration
* @param be_with_params Backend configuration to free
*/
void h_free_backend_with_params(HParserBackendWithParams *be_with_params);
/**
* @brief Get backend name string
* @param be Backend type
* @return Constant name string (do not free)
*/
const char *h_get_name_for_backend(HParserBackend be);
/**
* @brief Get backend configuration name string
* @param be_with_params Backend configuration
* @return Allocated name string (caller must free)
*/
char *h_get_name_for_backend_with_params(HParserBackendWithParams *be_with_params);
char *h_get_name_for_backend_with_params__m(HAllocator *mm__,
HParserBackendWithParams *be_with_params);
/**
* @brief Get backend descriptive text
* @param be Backend type
* @return const char* (do not free)
*/
const char *h_get_descriptive_text_for_backend(HParserBackend be);
/**
* @brief Get backend configuration descriptive text
* @param be_with_params Backend configuration
* @return Allocated descriptive text (caller must free)
*/
char *h_get_descriptive_text_for_backend_with_params(HParserBackendWithParams *be_with_params);
char *h_get_descriptive_text_for_backend_with_params__m(HAllocator *mm__,
HParserBackendWithParams *be_with_params);
/**
* @brief Look up backend by name
* @param name Backend name
* @return Backend type or PB_INVALID
*/
HParserBackend h_query_backend_by_name(const char *name);
/**
* @brief Parse backend specification string
* @param name_with_params String like "lalr(1)"
* @return Backend configuration
*/
HParserBackendWithParams *h_get_backend_with_params_by_name(const char *name_with_params);
HParserBackendWithParams *h_get_backend_with_params_by_name__m(HAllocator *mm__,
const char *name_with_params);
/** @} */
/**
* @defgroup parsing Parsing Functions
* @{
*/
/**
* @brief Top-level function to call a parser that has been built over some piece of input (of known
* size).
*
* @param parser Parser to use
* @param input Input data
* @param length Length of input data
* @return Parse result, or NULL on failure
*/
HParseResult *h_parse(const HParser *parser, const uint8_t *input, size_t length);
HParseResult *h_parse__m(HAllocator *mm__, const HParser *parser, const uint8_t *input,
size_t length);
/**
* @brief Initialize a parser for iteratively consuming an input stream in chunks. This is only
* supported by some backends.
*
* @param parser Parser to use
* @return Result is NULL if not supported by the backend.
*/
HSuspendedParser *h_parse_start(const HParser *parser);
HSuspendedParser *h_parse_start__m(HAllocator *mm__, const HParser *parser);
/**
* @brief Run a suspended parser (as returned by h_parse_start) on a chunk of input.
* @param s Suspended parser state
* @param input Input data chunk
* @param length Length of input data chunk
* @return Returns true if the parser is done (needs no more input).
*/
bool h_parse_chunk(HSuspendedParser *s, const uint8_t *input, size_t length);
/**
* @brief Finish an iterative parse. Signals the end of input to the backend and returns the parse
* result.
*
* @param s Suspended parser state
* @return Parse result, or NULL on failure
*/
HParseResult *h_parse_finish(HSuspendedParser *s);
/** @} */
/**
* @defgroup basic_parsers Basic Parser Combinators
* @{
*/
/**
* @brief Given a string, returns a parser that parses that string value.
* @param str String to parse
* @param len Length of string to parse
* @return Result token type: TT_BYTES
*/
HParser *h_token(const uint8_t *str, const size_t len);
HParser *h_token__m(HAllocator *mm__, const uint8_t *str, const size_t len);
/**
* @brief Parse literal string (macro convenience)
* @param s String literal
*/
#define h_literal(s) h_token(((const uint8_t *)(s)), sizeof(s) - 1)
/**
* @brief Given a single character, returns a parser that parses that character.
* @param c Character to parse
* @return Result token type: TT_UINT
* @note Consumes 8 bits from the input stream
*/
HParser *h_ch(const uint8_t c);
HParser *h_ch__m(HAllocator *mm__, const uint8_t c);
/**
* @brief Given two single-character bounds, lower and upper, returns a parser that parses a single
* character within the range
*
* @param lower Lower bound (inclusive)
* @param upper Upper bound (inclusive)
* @return Result token type: TT_UINT
* @note Consumes 8 bits from the input stream
*/
HParser *h_ch_range(const uint8_t lower, const uint8_t upper);
HParser *h_ch_range__m(HAllocator *mm__, const uint8_t lower, const uint8_t upper);
/**
* @brief Given an integer parser, p, and two integer bounds, lower and upper, returns a parser that
* parses an integral value within the range
*
* @param p Integer parser (e.g., h_int8(), h_uint32(), etc.)
* @param lower Lower bound (inclusive)
* @param upper Upper bound (inclusive)
* @return Result token type: Same as p's result type
* @note Consumes the same number of bits as p
*/
HParser *h_int_range(const HParser *p, const int64_t lower, const int64_t upper);
HParser *h_int_range__m(HAllocator *mm__, const HParser *p, const int64_t lower,
const int64_t upper);
/**
* @brief Returns a parser that parses the specified number of bits. sign == true if signed, false
* if unsigned.
*
* @param len Number of bits
* @param sign true for signed, false for unsigned
* @return Result token type: TT_SINT if sign == true, TT_UINT if sign == false
* @note Consumes 'len' bits from the input stream
*/
HParser *h_bits(size_t len, _Bool sign);
HParser *h_bits__m(HAllocator *mm__, size_t len, _Bool sign);
/**
* @brief Returns a parser that parses the specified number of octets. The input does not have to be
* aligned to a byte boundary.
*
* @param len Number of bytes
* @return Result token type: TT_BYTES
* @note Consumes 'len * 8' bits from the input stream
*/
HParser *h_bytes(size_t len);
HParser *h_bytes__m(HAllocator *mm__, size_t len);
/** @} */
/**
* @defgroup integer_parsers Integer Parsers
* @{
*/
/**
* @brief Parse a signed 64-bit integer
* @return Result token type: TT_SINT
* @note Consumes 64 bits from the input stream
*/
HParser *h_int64(void);
HParser *h_int64__m(HAllocator *mm__);
/**
* @brief Parse a signed 32-bit integer
* @return Result token type: TT_SINT
* @note Consumes 32 bits from the input stream
*/
HParser *h_int32(void);
HParser *h_int32__m(HAllocator *mm__);
/**
* @brief Parse a signed 16-bit integer
* @return Result token type: TT_SINT
* @note Consumes 16 bits from the input stream
*/
HParser *h_int16(void);
HParser *h_int16__m(HAllocator *mm__);
/**
* @brief Parse a signed 8-bit integer
* @return Result token type: TT_SINT
* @note Consumes 8 bits from the input stream
*/
HParser *h_int8(void);
HParser *h_int8__m(HAllocator *mm__);
/**
* @brief Parse an unsigned 64-bit integer
* @return Result token type: TT_UINT
* @note Consumes 64 bits from the input stream
*/
HParser *h_uint64(void);
HParser *h_uint64__m(HAllocator *mm__);
/**
* @brief Parse an unsigned 32-bit integer
* @return Result token type: TT_UINT
* @note Consumes 32 bits from the input stream
*/
HParser *h_uint32(void);
HParser *h_uint32__m(HAllocator *mm__);
/**
* @brief Parse an unsigned 16-bit integer
* @return Result token type: TT_UINT
* @note Consumes 16 bits from the input stream
*/
HParser *h_uint16(void);
HParser *h_uint16__m(HAllocator *mm__);
/**
* @brief Parse an unsigned 8-bit integer
* @return Result token type: TT_UINT
* @note Consumes 8 bits from the input stream
*/
HParser *h_uint8(void);
HParser *h_uint8__m(HAllocator *mm__);
/** @} */
/** @defgroup combinators Parser Combinators
* @{
*/
/**
* @brief Given another parser, p, returns a parser that skips any whitespace and then applies p.
* @param p Parser to apply after whitespace
* @return Result token type: p's result type
*/
HParser *h_whitespace(const HParser *p);
HParser *h_whitespace__m(HAllocator *mm__, const HParser *p);
/**
* @brief Given two parsers, p and q, returns a parser that parses them in sequence but only returns
* p's result.
*
* @param p First parser
* @param q Second parser
* @return Result token type: p's result type
*/
HParser *h_left(const HParser *p, const HParser *q);
HParser *h_left__m(HAllocator *mm__, const HParser *p, const HParser *q);
/**
* @brief Given two parsers, p and q, returns a parser that parses them in sequence but only returns
* q's result.
*
* @param p First parser
* @param q Second parser
* @return Result token type: q's result type
*/
HParser *h_right(const HParser *p, const HParser *q);
HParser *h_right__m(HAllocator *mm__, const HParser *p, const HParser *q);
/**
* @brief Given three parsers, p, x, and q, returns a parser that parses them in sequence but only
* returns x's result
* @param p First parser
* @param x Middle parser
* @param q Last parser
* @return Result token type: x's result type
*/
HParser *h_middle(const HParser *p, const HParser *x, const HParser *q);
HParser *h_middle__m(HAllocator *mm__, const HParser *p, const HParser *x, const HParser *q);
/**
* @brief Given another parser, p, and a function f, returns a parser that applies p, then applies f
* to everything in the AST of p's result
* @param p Parser to wrap
* @param a Action function
* @param user_data Context for action
* @return Result token type: any
*/
HParser *h_action(const HParser *p, const HAction a, void *user_data);
HParser *h_action__m(HAllocator *mm__, const HParser *p, const HAction a, void *user_data);
/**
* @brief Parse a single character in the given charset
* @param charset Character set
* @param length Charset length
* @return Result token type: TT_UINT
*/
HParser *h_in(const uint8_t *charset, size_t length);
HParser *h_in__m(HAllocator *mm__, const uint8_t *charset, size_t length);
/**
* @brief Parse a single character *NOT* in the given charset
* @param charset Character set to exclude
* @param length Charset length
* @return Result token type: TT_UINT
*/
HParser *h_not_in(const uint8_t *charset, size_t length);
HParser *h_not_in__m(HAllocator *mm__, const uint8_t *charset, size_t length);
/**
* @brief A no-argument parser that succeeds if there is no more input to parse.
* @return Result token type: None. The HParseResult exists but its AST is NULL.
*/
HParser *h_end_p(void);
HParser *h_end_p__m(HAllocator *mm__);
/**
* @brief This parser always fails.
* @return Result token type: NULL. Always.
*/
HParser *h_nothing_p(void);
HParser *h_nothing_p__m(HAllocator *mm__);
/**
* @brief Given a null-terminated list of parsers, apply each parser in order. The parse succeeds
* only if all parsers succeed.
*
* @result Result token type: TT_SEQUENCE
*/
HParser *h_sequence(HParser *p, ...) __attribute__((sentinel));
HParser *h_sequence__m(HAllocator *mm__, HParser *p, ...) __attribute__((sentinel));
HParser *h_sequence__mv(HAllocator *mm__, HParser *p, va_list ap);
HParser *h_sequence__v(HParser *p, va_list ap);
HParser *h_sequence__a(void *args[]);
HParser *h_sequence__ma(HAllocator *mm__, void *args[]);
#define h_drop_from(p, ...) h_drop_from_(p, __VA_ARGS__, -1)
/**
* @brief Given an `h_sequence` and a list of indices, returns a parser that parses the sequence but
* returns it without the results at the dropped indices. If a negative integer appears in the
* middle of the list, this combinator will silently ignore the rest of the list.
*
* @param p Sequence parser
* @param ... Indices of elements to drop
* @return Result token type: TT_SEQUENCE
*/
HParser *h_drop_from_(HParser *p, ...);
HParser *h_drop_from___m(HAllocator *mm__, HParser *p, ...);
HParser *h_drop_from___mv(HAllocator *mm__, HParser *p, va_list ap);
HParser *h_drop_from___v(HParser *p, va_list ap);
HParser *h_drop_from___a(void *args[]);
HParser *h_drop_from___ma(HAllocator *mm__, void *args[]);
/**
* @brief Given an array of parsers, p_array, apply each parser in order. The first parser to
* succeed is the result; if no parsers succeed, the parse fails.
*
* @param p_array Array of parsers
* @return Result token type: The type of the first successful parser's result.
*/
HParser *h_choice(HParser *p, ...) __attribute__((sentinel));
HParser *h_choice__m(HAllocator *mm__, HParser *p, ...) __attribute__((sentinel));
HParser *h_choice__mv(HAllocator *mm__, HParser *p, va_list ap);
HParser *h_choice__v(HParser *p, va_list ap);
HParser *h_choice__a(void *args[]);
HParser *h_choice__ma(HAllocator *mm__, void *args[]);
/**
* @brief Given a null-terminated list of parsers, match a permutation phrase of these parsers, i.e.
* match all parsers exactly once in any order.
*
* If multiple orders would match, the lexically smallest permutation is used; in other words, at
* any step the remaining available parsers are tried in the order in which they appear in the
* arguments.
*
* As an exception, 'h_optional' parsers (actually those that return a result of token type
* TT_NONE) are detected and the algorithm will try to match them with a non-empty result.
* Specifically, a result of TT_NONE is treated as a non-match as long as any other argument
* matches.
*
* Other parsers that succeed on any input (e.g. h_many), that match the same input as others, or
* that match input which is a prefix of another match can lead to unexpected results and should
* probably not be used as arguments.
*
* The result is a sequence of the same length as the argument list. Each parser's result is placed
* at that parser's index in the arguments. The permutation itself (the order in which the
* arguments were matched) is not returned.
*
* @param p Null-terminated list of parsers
* @return Result token type: TT_SEQUENCE
*/
HParser *h_permutation(HParser *p, ...) __attribute__((sentinel));
HParser *h_permutation__m(HAllocator *mm__, HParser *p, ...) __attribute__((sentinel));
HParser *h_permutation__mv(HAllocator *mm__, HParser *p, va_list ap);
HParser *h_permutation__v(HParser *p, va_list ap);
HParser *h_permutation__a(void *args[]);
HParser *h_permutation__ma(HAllocator *mm__, void *args[]);
/**
* @brief Given two parsers, p1 and p2, this parser succeeds in the following cases:
* - if p1 succeeds and p2 fails
* - if both succeed but p1's result is as long as or longer than p2's
*
* @param p1 First parser
* @param p2 Second parser
* @return Result token type: p1's result type.
*/
HParser *h_butnot(const HParser *p1, const HParser *p2);
HParser *h_butnot__m(HAllocator *mm__, const HParser *p1, const HParser *p2);
/**
* @brief Given two parsers, p1 and p2, this parser succeeds in the following cases:
* - if p1 succeeds and p2 fails
* - if both succeed but p2's result is shorter than p1's
*
* @param p1 First parser
* @param p2 Second parser
* @return Result token type: p1's result type.
*/
HParser *h_difference(const HParser *p1, const HParser *p2);
HParser *h_difference__m(HAllocator *mm__, const HParser *p1, const HParser *p2);
/**
* @brief Given two parsers, p1 and p2, this parser succeeds if *either* p1 or p2 succeed, but not
* if they both do.
*
* @param p1 First parser
* @param p2 Second parser
* @return Result token type: The type of the result of whichever parser succeeded.
*/
HParser *h_xor(const HParser *p1, const HParser *p2);
HParser *h_xor__m(HAllocator *mm__, const HParser *p1, const HParser *p2);
/**
* @brief Given a parser, p, this parser succeeds for zero or more repetitions of p.
* @param p Parser to repeat
* @return Result token type: TT_SEQUENCE
*/
HParser *h_many(const HParser *p);
HParser *h_many__m(HAllocator *mm__, const HParser *p);
/**
* @brief Given a parser, p, this parser succeeds for one or more repetitions of p.
* @param p Parser to repeat
* @return Result token type: TT_SEQUENCE
*/
HParser *h_many1(const HParser *p);
HParser *h_many1__m(HAllocator *mm__, const HParser *p);
/**
* @brief Given a parser, p, this parser succeeds for exactly N repetitions of p.
* @param p Parser to repeat
* @param n Number of repetitions
* @return Result token type: TT_SEQUENCE
*/
HParser *h_repeat_n(const HParser *p, const size_t n);
HParser *h_repeat_n__m(HAllocator *mm__, const HParser *p, const size_t n);
/**
* @brief Given a parser, p, this parser succeeds with the value p parsed or with an empty result.
* @param p Parser to apply optionally
* @return Result token type: If p succeeded, the type of its result; if not, TT_NONE.
*/
HParser *h_optional(const HParser *p);
HParser *h_optional__m(HAllocator *mm__, const HParser *p);
/**
* @brief Given a parser, p, this parser succeeds if p succeeds, but doesn't include p's result in
* the result.
*
* @param p Parser to ignore
* @return Result token type: None. The HParseResult exists but its AST is NULL.
*/
HParser *h_ignore(const HParser *p);
HParser *h_ignore__m(HAllocator *mm__, const HParser *p);
/**
* @brief Given a parser, p, and a parser for a separator, sep, this parser matches a (possibly
* empty) list of things that p can parse, separated by sep. For example, if p is
* repeat1(range('0','9')) and sep is ch(','), sepBy(p, sep) will match a comma-separated list of
* integers.
*
* @param p Parser for list elements
* @param sep Parser for separator
* @return Result token type: TT_SEQUENCE
*/
HParser *h_sepBy(const HParser *p, const HParser *sep);
HParser *h_sepBy__m(HAllocator *mm__, const HParser *p, const HParser *sep);
/**
* @brief Given a parser, p, and a parser for a separator, sep, this parser matches a list of things
* that p can parse, separated by sep. Unlike sepBy, this ensures that the result has at least one
* element. For example, if p is repeat1(range('0','9')) and sep is ch(','), sepBy1(p, sep) will
* match a comma-separated list of integers.
*
* @param p Parser for list elements
* @param sep Parser for separator
* @return Result token type: TT_SEQUENCE
*/
HParser *h_sepBy1(const HParser *p, const HParser *sep);
HParser *h_sepBy1__m(HAllocator *mm__, const HParser *p, const HParser *sep);
/**
* @brief This parser always returns a zero length match, i.e., empty string.
* @return Result token type: None. The HParseResult exists but its AST is NULL.
*/
HParser *h_epsilon_p(void);
HParser *h_epsilon_p__m(HAllocator *mm__);
/**
* @brief This parser applies its first argument to read an unsigned integer value, then applies its
* second argument that many times. length should parse an unsigned integer value; this is checked
* at runtime. Specifically, the token_type of the returned token must be TT_UINT. In future we
* might relax this to include TT_USER but don't count on it.
*
* @param length Parser to read length
* @param value Parser to apply length times
* @return Result token type: TT_SEQUENCE
*/
HParser *h_length_value(const HParser *length, const HParser *value);
HParser *h_length_value__m(HAllocator *mm__, const HParser *length, const HParser *value);
/**
* @brief This parser attaches a predicate function, which returns true or false, to a parser. The
* function is evaluated over the parser's result.
*
* The parse only succeeds if the attribute function returns true.
*
* attr_bool will check whether p's result exists and whether p's result AST exists; you do not need
* to check for this in your predicate function.
*
* @param p Parser to wrap
* @param pred Predicate function
* @param user_data Context for predicate
* @return Result token type: p's result type if pred succeeded, NULL otherwise.
*/
HParser *h_attr_bool(const HParser *p, HPredicate pred, void *user_data);
HParser *h_attr_bool__m(HAllocator *mm__, const HParser *p, HPredicate pred, void *user_data);
/**
* @brief The 'and' parser asserts that a conditional syntax is satisfied, but doesn't consume that
* conditional syntax. This is useful for lookahead. As an example:
*
* Suppose you already have a parser, hex_p, that parses numbers in hexadecimal format (including
* the leading '0x'). Then `sequence(and(token((const uint8_t*)"0x", 2)), hex_p)` checks to see
* whether there is a leading "0x", *does not* consume the "0x", and then applies hex_p to parse the
* hex-formatted number.
*
* 'and' succeeds if p succeeds, and fails if p fails.
*
* @param p Parser to apply
* @return Result token type: None. The HParseResult exists but its AST is NULL.
*/
HParser *h_and(const HParser *p);
HParser *h_and__m(HAllocator *mm__, const HParser *p);
/**
* @brief The 'not' parser asserts that a conditional syntax is *not* satisfied, but doesn't consume
* that conditional syntax. As a somewhat contrived example:
*
* Since 'choice' applies its arguments in order, the following parser: `sequence(ch('a'),
* choice(ch('+'), token((const uint8_t*)"++"), NULL), ch('b'), NULL)` will not parse "a++b",
* because once choice() has succeeded, it will not backtrack and try other alternatives if a later
* parser in the sequence fails. Instead, you can force the use of the second alternative by turning
* the ch('+') alternative into a sequence with not:
* `sequence(ch('a'), choice(sequence(ch('+'), not(ch('+')), NULL), token((const uint8_t*)"++")),
* ch('b'), NULL)` If the input string is "a+b", the first alternative is applied; if the input
* string is "a++b", the second alternative is applied.
*
* @param p Parser to apply
* @return Result token type: None. The HParseResult exists but its AST is NULL.
*/
HParser *h_not(const HParser *p);
HParser *h_not__m(HAllocator *mm__, const HParser *p);
/**
* @brief Create a parser that just calls out to another, as yet unknown, parser.
* Note that the inner parser gets bound later, with bind_indirect. This can be used to create
* recursive parsers.
*
* @return Result token type: the type of whatever parser is bound to it with bind_indirect().
*/
HParser *h_indirect(void);
HParser *h_indirect__m(HAllocator *mm__);
/**
* @brief Set the inner parser of an indirect. See comments on indirect for details.
* @param indirect Parser created with h_indirect()
* @param inner Parser to bind to indirect
*/
void h_bind_indirect(HParser *indirect, const HParser *inner);
void h_bind_indirect__m(HAllocator *mm__, HParser *indirect, const HParser *inner);
/**
* @brief This parser runs its argument parser with the given endianness setting.
*
* The value of 'endianness' should be a bit-wise or of the constants
* BYTE_BIG_ENDIAN/BYTE_LITTLE_ENDIAN and BIT_BIG_ENDIAN/BIT_LITTLE_ENDIAN.
*
* @param endianness Endianness setting
* @param p Parser to run with given endianness
* @return Result token type: p's result type.
*/
HParser *h_with_endianness(char endianness, const HParser *p);
HParser *h_with_endianness__m(HAllocator *mm__, char endianness, const HParser *p);
/**
* @brief The 'h_put_value' combinator stashes the result of the parser it wraps in a symbol table
* in the parse state, so that non- local actions and predicates can access this value.
*
* Attempting to use h_put with a name that was already in the symbol table will return NULL (and
* parse failure)
*
* @param p Parser whose result to stash
* @param name Name to stash the result under (must be unique)
* @return Result token type: p's token type if name was not already in the symbol table.
*/
HParser *h_put_value(const HParser *p, const char *name);
HParser *h_put_value__m(HAllocator *mm__, const HParser *p, const char *name);
/**
* @brief The 'h_get_value' combinator retrieves a named HParseResult that was previously stashed in
* the parse state.
*
* @param name Name to retrieve
* @return Result token type: whatever the stashed HParseResult is, if present. If absent, NULL (and
* thus parse failure).
*/
HParser *h_get_value(const char *name);
HParser *h_get_value__m(HAllocator *mm__, const char *name);
/**