-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathscanner.py
27 lines (23 loc) · 857 Bytes
/
scanner.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
import re
from typeDef import TypeDefinition
def parse(typeDef, src_code, filter_=None):
"""
return a list of Token object, generated by the input src_code
if want to filter out any type of token, use filter.
eg:
parse(typedef, src, ['block_comment', 'line_comment'])
"""
assert isinstance(typeDef, TypeDefinition)
result = []
token_regex = typeDef.getRE()
for mo in re.finditer(token_regex, src_code):
if filter_ is None or mo.lastgroup not in filter_:
id_ = typeDef.getID(mo.lastgroup)
result.append((str(mo[0]), id_))
result.append(("$", "$"))
return result
if __name__ == "__main__":
typedef = TypeDefinition.load("simpleJava/typedef")
with open("simpleJava/test.sjava", "r") as f:
src_code = f.read()
print(parse(typedef, src_code, ['space']))