|
| 1 | +# Patc (Patsy) is a pattern -> table compiler. |
| 2 | +from typing import List, Union, Optional |
| 3 | + |
| 4 | + |
| 5 | +class Pattern: |
| 6 | + def __init__(self): |
| 7 | + pass |
| 8 | + |
| 9 | + |
| 10 | +class Choice(Pattern): |
| 11 | + def __init__(self, variants: List[Union[Pattern, str]]): |
| 12 | + self.variants = variants |
| 13 | + super().__init__() |
| 14 | + |
| 15 | + |
| 16 | +class Sequence(Pattern): |
| 17 | + def __init__(self, elements: List[Union[Pattern, str]]): |
| 18 | + self.elements = elements |
| 19 | + super().__init__() |
| 20 | + |
| 21 | + |
| 22 | +example = Choice(["cat", "car"]) |
| 23 | + |
| 24 | + |
| 25 | +def eliminate_left_recursion(pattern: Pattern) -> Pattern: |
| 26 | + # TODO: Eliminate cyclical left recursion. |
| 27 | + return pattern |
| 28 | + |
| 29 | + |
| 30 | +# We'll keep the original choice structure for now since it might have semantic meaning for presedence. |
| 31 | +# def consolidate_choice_of_choices(pattern: Pattern) -> Pattern: |
| 32 | +# if isinstance(pattern, Choice): |
| 33 | +# variants = [] |
| 34 | +# |
| 35 | + |
| 36 | +def get_prefix(pattern: Union[Pattern, str]) -> Optional[str]: |
| 37 | + if isinstance(pattern, str): |
| 38 | + return pattern[0], pattern[1:] |
| 39 | + elif isinstance(pattern, Sequence): |
| 40 | + return Sequence() |
| 41 | + |
| 42 | + |
| 43 | + |
| 44 | +def merge_common_prefix(pattern: Pattern) -> Pattern: |
| 45 | + # TODO: Merge common prefixes. Eliminates the need for backtracking. |
| 46 | + if isinstance(pattern, Choice): |
| 47 | + prefixes = {} |
| 48 | + |
| 49 | + # for variant in pattern.variants: |
| 50 | + # if isinstance(variant, Choice): |
| 51 | + # for subvariant in variant.variants: |
| 52 | + |
| 53 | + # elif isinstance(variant, Sequence): |
| 54 | + # prefixes[variant.element[0]] |
| 55 | + # elif isinstance(variant, str): |
| 56 | + |
| 57 | + # else: |
| 58 | + # raise TypeError("Unknown pattern type.") |
| 59 | + |
| 60 | + return pattern |
| 61 | + |
| 62 | + |
| 63 | +class TableEntry: |
| 64 | + def __init__(self): |
| 65 | + pass |
| 66 | + |
| 67 | + |
| 68 | +def compile(pattern: Pattern): |
| 69 | + acyclic = eliminate_left_recursion(pattern) |
| 70 | + linear = merge_common_prefix(acyclic) |
| 71 | + # Now you have an acyclic graph. Traverse it to generate table states. |
| 72 | + # State ID |
| 73 | + # input letter -> next TableEntry. |
| 74 | + # Operations |
| 75 | + # - Call. Add to stack. |
| 76 | + # - Emit left terminal (precedence). Return. |
| 77 | + # - Emit right terminal (precedence). Return. |
| 78 | + # - Match input. Transition to next state. |
| 79 | + states = {} |
| 80 | + |
| 81 | + |
| 82 | +class TblOperation: |
| 83 | + def __init__(self, next: Optional[int]): |
| 84 | + # None = stay in the same state. Else, index into tbl. |
| 85 | + self.next = next |
| 86 | + |
| 87 | +class TblMatch(TblOperation): |
| 88 | + pass |
| 89 | + |
| 90 | +class TblEmitLeft(TblOperation): |
| 91 | + pass |
| 92 | + |
| 93 | +class TblEmitRight(TblOperation): |
| 94 | + pass |
| 95 | + |
| 96 | +class TblCall(TblOperation): |
| 97 | + pass |
| 98 | + |
| 99 | + |
| 100 | +# a + b + c |
| 101 | +example = Choice(["cat", "car"]) |
| 102 | +table = [{ |
| 103 | + # 0 |
| 104 | + "c": TblMatch(1) |
| 105 | + }, |
| 106 | + { |
| 107 | + # 1 |
| 108 | + "a": TblMatch(2), |
| 109 | + }, |
| 110 | + { |
| 111 | + "t": TblEmitRight(-1), |
| 112 | + "r": TblEmitRight(-1), |
| 113 | + } |
| 114 | +] |
| 115 | + |
| 116 | +def match(tbl: List[dict], input: str): |
| 117 | + state = 0 |
| 118 | + output_queue = [] |
| 119 | + pending_stack = [] |
| 120 | + |
| 121 | + for idx, letter in enumerate(input): |
| 122 | + entry = tbl[state].get(letter) |
| 123 | + if entry is None: |
| 124 | + raise ValueError(f"Mismatch at {idx}: {input[:idx]}\033[4m{input[idx]}\033[0m{input[idx+1:]}") |
| 125 | + elif isinstance(entry, TblMatch): |
| 126 | + print(f"{letter}", end=" ") |
| 127 | + # pending_stack.append(letter) |
| 128 | + elif isinstance(entry, TblEmitRight): |
| 129 | + print(f"{letter}", end=" ") |
| 130 | + # pending_stack.append(letter) |
| 131 | + # TODO.... This needs precedence handling. |
| 132 | + # while pending_stack: |
| 133 | + # output_queue.append(pending_stack.pop()) |
| 134 | + else: |
| 135 | + raise NotImplemented("tbd...") |
| 136 | + |
| 137 | + state = entry.next |
| 138 | + if state == -1: |
| 139 | + if idx < len(input) - 1: |
| 140 | + raise ValueError(f"Input not fully consumed: {idx}: {input[:idx]}, {input[idx]}, {input[idx+1:]}") |
| 141 | + |
| 142 | + |
| 143 | +match(table, "bat") |
0 commit comments