Skip to content

Commit

Permalink
fixed merge conflict
Browse files Browse the repository at this point in the history
  • Loading branch information
Araq committed Jul 12, 2024
2 parents 77c815f + 97fa70a commit ca00232
Show file tree
Hide file tree
Showing 11 changed files with 1,134 additions and 141 deletions.
109 changes: 109 additions & 0 deletions src/experiments/nif_packedtree.nim
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
# Nif library
# (c) Copyright 2024 Andreas Rumpf
#
# See the file "license.txt", included in this
# distribution, for details about the copyright.

## Parse NIF into a packed tree representation.

import bitabs, lineinfos, stringviews, packedtrees, nifreader

type
NifKind* = enum
Empty, Ident, Sym, Symdef, IntLit, UIntLit, FloatLit, CharLit, StrLit,
Err, # must not be an atom!
Compound

type
StrId* = distinct uint32
IntId* = distinct uint32
UIntId* = distinct uint32
FloatId* = distinct uint32
KindId* = distinct uint32
Literals* = object
man*: LineInfoManager
kinds*: BiTable[KindId, string]
files*: BiTable[FileId, string] # we cannot use StringView here as it may have unexpanded backslashes!
strings*: BiTable[StrId, string]
integers*: BiTable[IntId, int64]
uintegers*: BiTable[UIntId, uint64]
floats*: BiTable[FloatId, float64]

proc addAtom*[L](dest: var PackedTree[NifKind]; kind: NifKind; lit: L; info: PackedLineInfo) =
packedtrees.addAtom dest, kind, uint32(lit), info

proc parse*(r: var Reader; dest: var PackedTree[NifKind]; lits: var Literals; parentInfo: PackedLineInfo): bool =
let t = next(r)
var currentInfo = parentInfo
if t.filename.len == 0:
# relative file position
if t.pos.line != 0 or t.pos.col != 0:
let (file, line, col) = unpack(lits.man, parentInfo)
currentInfo = pack(lits.man, file, line+t.pos.line, col+t.pos.col)
else:
# absolute file position:
let fileId = lits.files.getOrIncl(decodeFilename t)
currentInfo = pack(lits.man, fileId, t.pos.line, t.pos.col)

result = true
case t.tk
of EofToken, ParRi:
result = false
of ParLe:
#let kind = whichKind(t.s, Err)
let ka = lits.kinds.getOrInclFromView(t.s).uint32 + ord(Compound).uint32
let kb = if ka > 255'u32: ord(Compound).uint32 else: ka
copyInto(dest, cast[NifKind](kb), currentInfo):
if ka > 255'u32:
# handle overflow:
dest.addAtom Ident, ka, currentInfo
while true:
let progress = parse(r, dest, lits, currentInfo)
if not progress: break

of UnknownToken:
copyInto dest, Err, currentInfo:
dest.addAtom StrLit, lits.strings.getOrIncl(decodeStr t), currentInfo
of DotToken:
dest.addAtom Empty, 0'u32, currentInfo
of Ident:
dest.addAtom Ident, lits.strings.getOrIncl(decodeStr t), currentInfo
of Symbol:
dest.addAtom Sym, lits.strings.getOrIncl(decodeStr t), currentInfo
of SymbolDef:
dest.addAtom Symdef, lits.strings.getOrIncl(decodeStr t), currentInfo
of StringLit:
dest.addAtom StrLit, lits.strings.getOrIncl(decodeStr t), currentInfo
# XXX handle suffixes
of CharLit:
dest.addAtom CharLit, uint32 decodeChar(t), currentInfo
of IntLit:
dest.addAtom IntLit, lits.integers.getOrIncl(decodeInt t), currentInfo
# XXX handle suffixes
of UIntLit:
dest.addAtom UIntLit, lits.uintegers.getOrIncl(decodeUInt t), currentInfo
# XXX handle suffixes
of FloatLit:
dest.addAtom FloatLit, lits.floats.getOrIncl(decodeFloat t), currentInfo
# XXX handle suffixes

type
Module* = object
t*: PackedTree[NifKind]
lits*: Literals

proc parse*(r: var Reader): Module =
# empirically, (size div 7) is a good estimate for the number of nodes
# in the file:
result = Module(t: createPackedTree[NifKind](r.fileSize div 7))
discard parse(r, result.t, result.lits, NoLineInfo)

proc memSizes*(m: Module) =
echo "Tree ", m.t.len # * sizeof(PackedNode[NifKind])
echo "Man ", m.lits.man.memSize
echo "Kinds ", m.lits.kinds.memSize
echo "Files ", m.lits.files.memSize
echo "Strings ", m.lits.strings.memSize
echo "Ints ", m.lits.integers.memSize
echo "UInts ", m.lits.uintegers.memSize
echo "Floats ", m.lits.floats.memSize
239 changes: 239 additions & 0 deletions src/experiments/skiptables.nim
Original file line number Diff line number Diff line change
@@ -0,0 +1,239 @@
# Nif library
# (c) Copyright 2024 Andreas Rumpf
#
# See the file "license.txt", included in this
# distribution, for details about the copyright.

## We like to traverse the implied AST by NIF without creating the AST
## as that it is expensive. We especially want to be able to skip subtrees.
## The solution is to traverse the file once and compute a skip table.

import nifreader, stringviews
import std / [strutils, algorithm, tables]

type
SkipEntry = object
offset, span: int32
SkipTable = seq[SkipEntry]

proc `<`*(a, b: SkipEntry): bool = a.offset < b.offset
proc `==`*(a, b: SkipEntry): bool = a.offset == b.offset

proc myHex(x: int32): string =
let x = toHex(x)
var i = 0
while i < x.len and x[i] == '0':
inc i
substr(x, i)

const
Base36 = "0123456789abcdefghijklmnopqrstuvwxyz"

proc add*(result: var string; id: int32) =
var id = id
# Convert decimal number to base 36, reversed since it does not matter:
while id > 0'i32:
result.add Base36[int(id mod 36'i32)]
id = id div 36'i32

proc `$`(e: SkipEntry): string =
result = ""
result.add e.offset
result.add ":"
result.add e.span
#myHex(e.offset) & ":" & myHex(e.span)
#$e.span

proc `$`(s: seq[SkipEntry]): string =
result = ""
var prev = 0'i32
for e in s:
result.add $(e.offset - prev)
result.add ":"
result.add $e.span
result.add ','
prev = e.offset
when false:
for x in e:
result.add $x
result.add ','

proc computeSkipTable*(r: var Reader; s: var SkipTable): int =
result = offset(r)
let t = next(r)
#echo "LOOKGIN AT ", t
case t.tk
of EofToken:
assert false
of ParRi:
result = -result
of ParLe:
let start = result
var last = -1
while true:
last = computeSkipTable(r, s)
if last < 0: break
let span = (-last) - start
if span > 128:
# assumption: 2 cache lines is not worth storing
s.add SkipEntry(offset: start.int32, span: span.int32)
of UnknownToken, DotToken, Ident, Symbol, SymbolDef,
StringLit, CharLit, IntLit, UIntLit, FloatLit:
discard

type
LazyNode* = object
tk: TokenKind
s: StringView
file: StringView
line, col: int32

LazyTree* = object
r: Reader
skipTable: Table[int, StringView]

proc down*(tree: var LazyTree; n: LazyNode): LazyNode =
# Node is always a compound node and after its tag we find the
# "down" node.
assert n.tk == ParLe
setPosition(tree.r, n.s)
let t = next(tree.r)
if t.tk in {ParRi, EofToken}:
return LazyNode(tk: t.tk, s: t.s)

var line, col: int32
if t.filename.len > 0:
line = t.pos.line
col = t.pos.col
else:
line = n.line + t.pos.line
col = n.col + t.pos.col
result = LazyNode(tk: t.tk, s: t.s, file: t.filename, line: line, col: col)

proc root*(tree: var LazyTree): LazyNode =
let t = next(tree.r)
assert t.tk notin {ParRi, EofToken}, "module has no root!"
var line, col: int32
if t.filename.len > 0:
line = t.pos.line
col = t.pos.col
result = LazyNode(tk: t.tk, s: t.s, file: t.filename, line: line, col: col)

proc next*(tree: var LazyTree; n, parent: LazyNode): LazyNode =
assert parent.tk == ParLe
setPosition(tree.r, n.s)
if n.tk == ParLe:
# skip to ParRi
let start = offset(tree.r)
if tree.skipTable.len > 0:
let dest = tree.skipTable.getOrDefault(start)
if dest.len > 0:
return LazyNode(tk: ParRi, s: dest)
var stack: seq[(int, StringView)] = @[]
var t = next(tree.r)
while true:
assert t.tk != EofToken, "missing ')'"
if t.tk == ParRi:
if stack.len == 0:
t = next(tree.r) # skip the ')' and return whatever comes after:
return LazyNode(tk: t.tk, s: t.s)
let finished = stack.pop()
t = next(tree.r)

if span(tree.r, finished[0], t.s) > 128:
# assumption: > 2 cache lines is worth storing
tree.skipTable[finished[0]] = t.s
else:
if t.tk == ParLe:
stack.add (offset(tree.r), StringView(p: nil, len: 0))
t = next(tree.r)
else:
let t = next(tree.r)
var line, col: int32
if t.filename.len > 0:
line = t.pos.line
col = t.pos.col
else:
line = parent.line + t.pos.line
col = parent.col + t.pos.col
result = LazyNode(tk: t.tk, s: t.s, file: t.filename, line: line, col: col)

iterator sons(tree: var LazyTree; n: LazyNode): LazyNode =
var child = down(tree, n)
while child.tk != ParRi:
yield child
child = next(tree, child, n)

proc sons2*(tree: var LazyTree; n: LazyNode): (LazyNode, LazyNode) =
let a = down(tree, n)
let b = next(tree, a, n)
(a, b)

proc traverse(tree: var LazyTree; n: LazyNode) =
if n.tk == ParLe:
echo "current head ", n.s
if n.s == "asgn":
echo "Found an assignment!"
let (a, b) = sons2(tree, n)
echo a.s, " = ", b.s
else:
for ch in sons(tree, n):
traverse tree, ch
else:
echo "ignoring atom ", n.s


when isMainModule:

const testData = """(.nif24)
(stmts
(asgn (at x i) (call f a b))
(call g x yz)
)
"""

var tree = LazyTree(r: openFromBuffer(testData))
let res = processDirectives(tree.r)
echo res
let r = root(tree)
traverse tree, r
echo "new round:"
traverse tree, r


when false:
import std / [strutils, monotimes]

template bench(task, body) =
let t0 = getMonoTime()
body
echo task, " TOOK ", getMonoTime() - t0

proc test*(filename: string) =
echo "A MEM: ", formatSize getOccupiedMem()

var r = nifreader.open(filename)

echo "B MEM: ", formatSize getOccupiedMem()

bench "initial load":
let res = processDirectives(r)
echo res

echo "C MEM: ", formatSize getOccupiedMem()

assert res == Success
var s: SkipTable = @[]
bench "SkipTable":
discard computeSkipTable(r, s)
echo "D MEM: ", formatSize getOccupiedMem()
algorithm.sort s
echo s
echo "In bytes: ", ($s).len
echo "as binary: ", s.len * sizeof(SkipEntry)
echo "ENTRIES: ", s.len

echo "AToms ", atoms, " TRees ", trees

test "tests/data/ccgexprs.nif"

Loading

0 comments on commit ca00232

Please sign in to comment.