Skip to content

Commit aa929b7

Browse files
committed
GH8: Scaffolding for table parser.
1 parent 4acfc58 commit aa929b7

File tree

2 files changed

+148
-0
lines changed

2 files changed

+148
-0
lines changed

.gitignore

+5
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,8 @@
11
.DS_Store
22
.env/
33
node_modules/
4+
__pycache__/
5+
*.log
6+
*.py[cod]
7+
dist/
8+
.idea

Sources/Former/bootstrap/patc.py

+143
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,143 @@
1+
# Patc (Patsy) is a pattern -> table compiler.
2+
from typing import List, Union, Optional
3+
4+
5+
class Pattern:
6+
def __init__(self):
7+
pass
8+
9+
10+
class Choice(Pattern):
11+
def __init__(self, variants: List[Union[Pattern, str]]):
12+
self.variants = variants
13+
super().__init__()
14+
15+
16+
class Sequence(Pattern):
17+
def __init__(self, elements: List[Union[Pattern, str]]):
18+
self.elements = elements
19+
super().__init__()
20+
21+
22+
example = Choice(["cat", "car"])
23+
24+
25+
def eliminate_left_recursion(pattern: Pattern) -> Pattern:
26+
# TODO: Eliminate cyclical left recursion.
27+
return pattern
28+
29+
30+
# We'll keep the original choice structure for now since it might have semantic meaning for presedence.
31+
# def consolidate_choice_of_choices(pattern: Pattern) -> Pattern:
32+
# if isinstance(pattern, Choice):
33+
# variants = []
34+
#
35+
36+
def get_prefix(pattern: Union[Pattern, str]) -> Optional[str]:
37+
if isinstance(pattern, str):
38+
return pattern[0], pattern[1:]
39+
elif isinstance(pattern, Sequence):
40+
return Sequence()
41+
42+
43+
44+
def merge_common_prefix(pattern: Pattern) -> Pattern:
45+
# TODO: Merge common prefixes. Eliminates the need for backtracking.
46+
if isinstance(pattern, Choice):
47+
prefixes = {}
48+
49+
# for variant in pattern.variants:
50+
# if isinstance(variant, Choice):
51+
# for subvariant in variant.variants:
52+
53+
# elif isinstance(variant, Sequence):
54+
# prefixes[variant.element[0]]
55+
# elif isinstance(variant, str):
56+
57+
# else:
58+
# raise TypeError("Unknown pattern type.")
59+
60+
return pattern
61+
62+
63+
class TableEntry:
64+
def __init__(self):
65+
pass
66+
67+
68+
def compile(pattern: Pattern):
69+
acyclic = eliminate_left_recursion(pattern)
70+
linear = merge_common_prefix(acyclic)
71+
# Now you have an acyclic graph. Traverse it to generate table states.
72+
# State ID
73+
# input letter -> next TableEntry.
74+
# Operations
75+
# - Call. Add to stack.
76+
# - Emit left terminal (precedence). Return.
77+
# - Emit right terminal (precedence). Return.
78+
# - Match input. Transition to next state.
79+
states = {}
80+
81+
82+
class TblOperation:
83+
def __init__(self, next: Optional[int]):
84+
# None = stay in the same state. Else, index into tbl.
85+
self.next = next
86+
87+
class TblMatch(TblOperation):
88+
pass
89+
90+
class TblEmitLeft(TblOperation):
91+
pass
92+
93+
class TblEmitRight(TblOperation):
94+
pass
95+
96+
class TblCall(TblOperation):
97+
pass
98+
99+
100+
# a + b + c
101+
example = Choice(["cat", "car"])
102+
table = [{
103+
# 0
104+
"c": TblMatch(1)
105+
},
106+
{
107+
# 1
108+
"a": TblMatch(2),
109+
},
110+
{
111+
"t": TblEmitRight(-1),
112+
"r": TblEmitRight(-1),
113+
}
114+
]
115+
116+
def match(tbl: List[dict], input: str):
117+
state = 0
118+
output_queue = []
119+
pending_stack = []
120+
121+
for idx, letter in enumerate(input):
122+
entry = tbl[state].get(letter)
123+
if entry is None:
124+
raise ValueError(f"Mismatch at {idx}: {input[:idx]}\033[4m{input[idx]}\033[0m{input[idx+1:]}")
125+
elif isinstance(entry, TblMatch):
126+
print(f"{letter}", end=" ")
127+
# pending_stack.append(letter)
128+
elif isinstance(entry, TblEmitRight):
129+
print(f"{letter}", end=" ")
130+
# pending_stack.append(letter)
131+
# TODO.... This needs precedence handling.
132+
# while pending_stack:
133+
# output_queue.append(pending_stack.pop())
134+
else:
135+
raise NotImplemented("tbd...")
136+
137+
state = entry.next
138+
if state == -1:
139+
if idx < len(input) - 1:
140+
raise ValueError(f"Input not fully consumed: {idx}: {input[:idx]}, {input[idx]}, {input[idx+1:]}")
141+
142+
143+
match(table, "bat")

0 commit comments

Comments
 (0)