-
Notifications
You must be signed in to change notification settings - Fork 7
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
11 changed files
with
1,134 additions
and
141 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,109 @@ | ||
# Nif library | ||
# (c) Copyright 2024 Andreas Rumpf | ||
# | ||
# See the file "license.txt", included in this | ||
# distribution, for details about the copyright. | ||
|
||
## Parse NIF into a packed tree representation. | ||
|
||
import bitabs, lineinfos, stringviews, packedtrees, nifreader | ||
|
||
type | ||
NifKind* = enum | ||
Empty, Ident, Sym, Symdef, IntLit, UIntLit, FloatLit, CharLit, StrLit, | ||
Err, # must not be an atom! | ||
Compound | ||
|
||
type | ||
StrId* = distinct uint32 | ||
IntId* = distinct uint32 | ||
UIntId* = distinct uint32 | ||
FloatId* = distinct uint32 | ||
KindId* = distinct uint32 | ||
Literals* = object | ||
man*: LineInfoManager | ||
kinds*: BiTable[KindId, string] | ||
files*: BiTable[FileId, string] # we cannot use StringView here as it may have unexpanded backslashes! | ||
strings*: BiTable[StrId, string] | ||
integers*: BiTable[IntId, int64] | ||
uintegers*: BiTable[UIntId, uint64] | ||
floats*: BiTable[FloatId, float64] | ||
|
||
proc addAtom*[L](dest: var PackedTree[NifKind]; kind: NifKind; lit: L; info: PackedLineInfo) = | ||
packedtrees.addAtom dest, kind, uint32(lit), info | ||
|
||
proc parse*(r: var Reader; dest: var PackedTree[NifKind]; lits: var Literals; parentInfo: PackedLineInfo): bool = | ||
let t = next(r) | ||
var currentInfo = parentInfo | ||
if t.filename.len == 0: | ||
# relative file position | ||
if t.pos.line != 0 or t.pos.col != 0: | ||
let (file, line, col) = unpack(lits.man, parentInfo) | ||
currentInfo = pack(lits.man, file, line+t.pos.line, col+t.pos.col) | ||
else: | ||
# absolute file position: | ||
let fileId = lits.files.getOrIncl(decodeFilename t) | ||
currentInfo = pack(lits.man, fileId, t.pos.line, t.pos.col) | ||
|
||
result = true | ||
case t.tk | ||
of EofToken, ParRi: | ||
result = false | ||
of ParLe: | ||
#let kind = whichKind(t.s, Err) | ||
let ka = lits.kinds.getOrInclFromView(t.s).uint32 + ord(Compound).uint32 | ||
let kb = if ka > 255'u32: ord(Compound).uint32 else: ka | ||
copyInto(dest, cast[NifKind](kb), currentInfo): | ||
if ka > 255'u32: | ||
# handle overflow: | ||
dest.addAtom Ident, ka, currentInfo | ||
while true: | ||
let progress = parse(r, dest, lits, currentInfo) | ||
if not progress: break | ||
|
||
of UnknownToken: | ||
copyInto dest, Err, currentInfo: | ||
dest.addAtom StrLit, lits.strings.getOrIncl(decodeStr t), currentInfo | ||
of DotToken: | ||
dest.addAtom Empty, 0'u32, currentInfo | ||
of Ident: | ||
dest.addAtom Ident, lits.strings.getOrIncl(decodeStr t), currentInfo | ||
of Symbol: | ||
dest.addAtom Sym, lits.strings.getOrIncl(decodeStr t), currentInfo | ||
of SymbolDef: | ||
dest.addAtom Symdef, lits.strings.getOrIncl(decodeStr t), currentInfo | ||
of StringLit: | ||
dest.addAtom StrLit, lits.strings.getOrIncl(decodeStr t), currentInfo | ||
# XXX handle suffixes | ||
of CharLit: | ||
dest.addAtom CharLit, uint32 decodeChar(t), currentInfo | ||
of IntLit: | ||
dest.addAtom IntLit, lits.integers.getOrIncl(decodeInt t), currentInfo | ||
# XXX handle suffixes | ||
of UIntLit: | ||
dest.addAtom UIntLit, lits.uintegers.getOrIncl(decodeUInt t), currentInfo | ||
# XXX handle suffixes | ||
of FloatLit: | ||
dest.addAtom FloatLit, lits.floats.getOrIncl(decodeFloat t), currentInfo | ||
# XXX handle suffixes | ||
|
||
type | ||
Module* = object | ||
t*: PackedTree[NifKind] | ||
lits*: Literals | ||
|
||
proc parse*(r: var Reader): Module = | ||
# empirically, (size div 7) is a good estimate for the number of nodes | ||
# in the file: | ||
result = Module(t: createPackedTree[NifKind](r.fileSize div 7)) | ||
discard parse(r, result.t, result.lits, NoLineInfo) | ||
|
||
proc memSizes*(m: Module) = | ||
echo "Tree ", m.t.len # * sizeof(PackedNode[NifKind]) | ||
echo "Man ", m.lits.man.memSize | ||
echo "Kinds ", m.lits.kinds.memSize | ||
echo "Files ", m.lits.files.memSize | ||
echo "Strings ", m.lits.strings.memSize | ||
echo "Ints ", m.lits.integers.memSize | ||
echo "UInts ", m.lits.uintegers.memSize | ||
echo "Floats ", m.lits.floats.memSize |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,239 @@ | ||
# Nif library | ||
# (c) Copyright 2024 Andreas Rumpf | ||
# | ||
# See the file "license.txt", included in this | ||
# distribution, for details about the copyright. | ||
|
||
## We like to traverse the implied AST by NIF without creating the AST | ||
## as that it is expensive. We especially want to be able to skip subtrees. | ||
## The solution is to traverse the file once and compute a skip table. | ||
|
||
import nifreader, stringviews | ||
import std / [strutils, algorithm, tables] | ||
|
||
type | ||
SkipEntry = object | ||
offset, span: int32 | ||
SkipTable = seq[SkipEntry] | ||
|
||
proc `<`*(a, b: SkipEntry): bool = a.offset < b.offset | ||
proc `==`*(a, b: SkipEntry): bool = a.offset == b.offset | ||
|
||
proc myHex(x: int32): string = | ||
let x = toHex(x) | ||
var i = 0 | ||
while i < x.len and x[i] == '0': | ||
inc i | ||
substr(x, i) | ||
|
||
const | ||
Base36 = "0123456789abcdefghijklmnopqrstuvwxyz" | ||
|
||
proc add*(result: var string; id: int32) = | ||
var id = id | ||
# Convert decimal number to base 36, reversed since it does not matter: | ||
while id > 0'i32: | ||
result.add Base36[int(id mod 36'i32)] | ||
id = id div 36'i32 | ||
|
||
proc `$`(e: SkipEntry): string = | ||
result = "" | ||
result.add e.offset | ||
result.add ":" | ||
result.add e.span | ||
#myHex(e.offset) & ":" & myHex(e.span) | ||
#$e.span | ||
|
||
proc `$`(s: seq[SkipEntry]): string = | ||
result = "" | ||
var prev = 0'i32 | ||
for e in s: | ||
result.add $(e.offset - prev) | ||
result.add ":" | ||
result.add $e.span | ||
result.add ',' | ||
prev = e.offset | ||
when false: | ||
for x in e: | ||
result.add $x | ||
result.add ',' | ||
|
||
proc computeSkipTable*(r: var Reader; s: var SkipTable): int = | ||
result = offset(r) | ||
let t = next(r) | ||
#echo "LOOKGIN AT ", t | ||
case t.tk | ||
of EofToken: | ||
assert false | ||
of ParRi: | ||
result = -result | ||
of ParLe: | ||
let start = result | ||
var last = -1 | ||
while true: | ||
last = computeSkipTable(r, s) | ||
if last < 0: break | ||
let span = (-last) - start | ||
if span > 128: | ||
# assumption: 2 cache lines is not worth storing | ||
s.add SkipEntry(offset: start.int32, span: span.int32) | ||
of UnknownToken, DotToken, Ident, Symbol, SymbolDef, | ||
StringLit, CharLit, IntLit, UIntLit, FloatLit: | ||
discard | ||
|
||
type | ||
LazyNode* = object | ||
tk: TokenKind | ||
s: StringView | ||
file: StringView | ||
line, col: int32 | ||
|
||
LazyTree* = object | ||
r: Reader | ||
skipTable: Table[int, StringView] | ||
|
||
proc down*(tree: var LazyTree; n: LazyNode): LazyNode = | ||
# Node is always a compound node and after its tag we find the | ||
# "down" node. | ||
assert n.tk == ParLe | ||
setPosition(tree.r, n.s) | ||
let t = next(tree.r) | ||
if t.tk in {ParRi, EofToken}: | ||
return LazyNode(tk: t.tk, s: t.s) | ||
|
||
var line, col: int32 | ||
if t.filename.len > 0: | ||
line = t.pos.line | ||
col = t.pos.col | ||
else: | ||
line = n.line + t.pos.line | ||
col = n.col + t.pos.col | ||
result = LazyNode(tk: t.tk, s: t.s, file: t.filename, line: line, col: col) | ||
|
||
proc root*(tree: var LazyTree): LazyNode = | ||
let t = next(tree.r) | ||
assert t.tk notin {ParRi, EofToken}, "module has no root!" | ||
var line, col: int32 | ||
if t.filename.len > 0: | ||
line = t.pos.line | ||
col = t.pos.col | ||
result = LazyNode(tk: t.tk, s: t.s, file: t.filename, line: line, col: col) | ||
|
||
proc next*(tree: var LazyTree; n, parent: LazyNode): LazyNode = | ||
assert parent.tk == ParLe | ||
setPosition(tree.r, n.s) | ||
if n.tk == ParLe: | ||
# skip to ParRi | ||
let start = offset(tree.r) | ||
if tree.skipTable.len > 0: | ||
let dest = tree.skipTable.getOrDefault(start) | ||
if dest.len > 0: | ||
return LazyNode(tk: ParRi, s: dest) | ||
var stack: seq[(int, StringView)] = @[] | ||
var t = next(tree.r) | ||
while true: | ||
assert t.tk != EofToken, "missing ')'" | ||
if t.tk == ParRi: | ||
if stack.len == 0: | ||
t = next(tree.r) # skip the ')' and return whatever comes after: | ||
return LazyNode(tk: t.tk, s: t.s) | ||
let finished = stack.pop() | ||
t = next(tree.r) | ||
|
||
if span(tree.r, finished[0], t.s) > 128: | ||
# assumption: > 2 cache lines is worth storing | ||
tree.skipTable[finished[0]] = t.s | ||
else: | ||
if t.tk == ParLe: | ||
stack.add (offset(tree.r), StringView(p: nil, len: 0)) | ||
t = next(tree.r) | ||
else: | ||
let t = next(tree.r) | ||
var line, col: int32 | ||
if t.filename.len > 0: | ||
line = t.pos.line | ||
col = t.pos.col | ||
else: | ||
line = parent.line + t.pos.line | ||
col = parent.col + t.pos.col | ||
result = LazyNode(tk: t.tk, s: t.s, file: t.filename, line: line, col: col) | ||
|
||
iterator sons(tree: var LazyTree; n: LazyNode): LazyNode = | ||
var child = down(tree, n) | ||
while child.tk != ParRi: | ||
yield child | ||
child = next(tree, child, n) | ||
|
||
proc sons2*(tree: var LazyTree; n: LazyNode): (LazyNode, LazyNode) = | ||
let a = down(tree, n) | ||
let b = next(tree, a, n) | ||
(a, b) | ||
|
||
proc traverse(tree: var LazyTree; n: LazyNode) = | ||
if n.tk == ParLe: | ||
echo "current head ", n.s | ||
if n.s == "asgn": | ||
echo "Found an assignment!" | ||
let (a, b) = sons2(tree, n) | ||
echo a.s, " = ", b.s | ||
else: | ||
for ch in sons(tree, n): | ||
traverse tree, ch | ||
else: | ||
echo "ignoring atom ", n.s | ||
|
||
|
||
when isMainModule: | ||
|
||
const testData = """(.nif24) | ||
(stmts | ||
(asgn (at x i) (call f a b)) | ||
(call g x yz) | ||
) | ||
""" | ||
|
||
var tree = LazyTree(r: openFromBuffer(testData)) | ||
let res = processDirectives(tree.r) | ||
echo res | ||
let r = root(tree) | ||
traverse tree, r | ||
echo "new round:" | ||
traverse tree, r | ||
|
||
|
||
when false: | ||
import std / [strutils, monotimes] | ||
|
||
template bench(task, body) = | ||
let t0 = getMonoTime() | ||
body | ||
echo task, " TOOK ", getMonoTime() - t0 | ||
|
||
proc test*(filename: string) = | ||
echo "A MEM: ", formatSize getOccupiedMem() | ||
|
||
var r = nifreader.open(filename) | ||
|
||
echo "B MEM: ", formatSize getOccupiedMem() | ||
|
||
bench "initial load": | ||
let res = processDirectives(r) | ||
echo res | ||
|
||
echo "C MEM: ", formatSize getOccupiedMem() | ||
|
||
assert res == Success | ||
var s: SkipTable = @[] | ||
bench "SkipTable": | ||
discard computeSkipTable(r, s) | ||
echo "D MEM: ", formatSize getOccupiedMem() | ||
algorithm.sort s | ||
echo s | ||
echo "In bytes: ", ($s).len | ||
echo "as binary: ", s.len * sizeof(SkipEntry) | ||
echo "ENTRIES: ", s.len | ||
|
||
echo "AToms ", atoms, " TRees ", trees | ||
|
||
test "tests/data/ccgexprs.nif" | ||
|
Oops, something went wrong.