fixed merge conflict

nim-lang · Jul 12, 2024 · ca00232 · ca00232
2 parents 77c815f + 97fa70a
commit ca00232
Show file tree

Hide file tree

Showing 11 changed files with 1,134 additions and 141 deletions.
diff --git a/src/experiments/nif_packedtree.nim b/src/experiments/nif_packedtree.nim
@@ -0,0 +1,109 @@
+#       Nif library
+# (c) Copyright 2024 Andreas Rumpf
+#
+# See the file "license.txt", included in this
+# distribution, for details about the copyright.
+
+## Parse NIF into a packed tree representation.
+
+import bitabs, lineinfos, stringviews, packedtrees, nifreader
+
+type
+  NifKind* = enum
+    Empty, Ident, Sym, Symdef, IntLit, UIntLit, FloatLit, CharLit, StrLit,
+    Err, # must not be an atom!
+    Compound
+
+type
+  StrId* = distinct uint32
+  IntId* = distinct uint32
+  UIntId* = distinct uint32
+  FloatId* = distinct uint32
+  KindId* = distinct uint32
+  Literals* = object
+    man*: LineInfoManager
+    kinds*: BiTable[KindId, string]
+    files*: BiTable[FileId, string] # we cannot use StringView here as it may have unexpanded backslashes!
+    strings*: BiTable[StrId, string]
+    integers*: BiTable[IntId, int64]
+    uintegers*: BiTable[UIntId, uint64]
+    floats*: BiTable[FloatId, float64]
+
+proc addAtom*[L](dest: var PackedTree[NifKind]; kind: NifKind; lit: L; info: PackedLineInfo) =
+  packedtrees.addAtom dest, kind, uint32(lit), info
+
+proc parse*(r: var Reader; dest: var PackedTree[NifKind]; lits: var Literals; parentInfo: PackedLineInfo): bool =
+  let t = next(r)
+  var currentInfo = parentInfo
+  if t.filename.len == 0:
+    # relative file position
+    if t.pos.line != 0 or t.pos.col != 0:
+      let (file, line, col) = unpack(lits.man, parentInfo)
+      currentInfo = pack(lits.man, file, line+t.pos.line, col+t.pos.col)
+  else:
+    # absolute file position:
+    let fileId = lits.files.getOrIncl(decodeFilename t)
+    currentInfo = pack(lits.man, fileId, t.pos.line, t.pos.col)
+
+  result = true
+  case t.tk
+  of EofToken, ParRi:
+    result = false
+  of ParLe:
+    #let kind = whichKind(t.s, Err)
+    let ka = lits.kinds.getOrInclFromView(t.s).uint32 + ord(Compound).uint32
+    let kb = if ka > 255'u32: ord(Compound).uint32 else: ka
+    copyInto(dest, cast[NifKind](kb), currentInfo):
+      if ka > 255'u32:
+        # handle overflow:
+        dest.addAtom Ident, ka, currentInfo
+      while true:
+        let progress = parse(r, dest, lits, currentInfo)
+        if not progress: break
+
+  of UnknownToken:
+    copyInto dest, Err, currentInfo:
+      dest.addAtom StrLit, lits.strings.getOrIncl(decodeStr t), currentInfo
+  of DotToken:
+    dest.addAtom Empty, 0'u32, currentInfo
+  of Ident:
+    dest.addAtom Ident, lits.strings.getOrIncl(decodeStr t), currentInfo
+  of Symbol:
+    dest.addAtom Sym, lits.strings.getOrIncl(decodeStr t), currentInfo
+  of SymbolDef:
+    dest.addAtom Symdef, lits.strings.getOrIncl(decodeStr t), currentInfo
+  of StringLit:
+    dest.addAtom StrLit, lits.strings.getOrIncl(decodeStr t), currentInfo
+    # XXX handle suffixes
+  of CharLit:
+    dest.addAtom CharLit, uint32 decodeChar(t), currentInfo
+  of IntLit:
+    dest.addAtom IntLit, lits.integers.getOrIncl(decodeInt t), currentInfo
+    # XXX handle suffixes
+  of UIntLit:
+    dest.addAtom UIntLit, lits.uintegers.getOrIncl(decodeUInt t), currentInfo
+    # XXX handle suffixes
+  of FloatLit:
+    dest.addAtom FloatLit, lits.floats.getOrIncl(decodeFloat t), currentInfo
+    # XXX handle suffixes
+
+type
+  Module* = object
+    t*: PackedTree[NifKind]
+    lits*: Literals
+
+proc parse*(r: var Reader): Module =
+  # empirically, (size div 7) is a good estimate for the number of nodes
+  # in the file:
+  result = Module(t: createPackedTree[NifKind](r.fileSize div 7))
+  discard parse(r, result.t, result.lits, NoLineInfo)
+
+proc memSizes*(m: Module) =
+  echo "Tree ", m.t.len # * sizeof(PackedNode[NifKind])
+  echo "Man ", m.lits.man.memSize
+  echo "Kinds ", m.lits.kinds.memSize
+  echo "Files ", m.lits.files.memSize
+  echo "Strings ", m.lits.strings.memSize
+  echo "Ints ", m.lits.integers.memSize
+  echo "UInts ", m.lits.uintegers.memSize
+  echo "Floats ", m.lits.floats.memSize
diff --git a/src/experiments/skiptables.nim b/src/experiments/skiptables.nim
@@ -0,0 +1,239 @@
+#       Nif library
+# (c) Copyright 2024 Andreas Rumpf
+#
+# See the file "license.txt", included in this
+# distribution, for details about the copyright.
+
+## We like to traverse the implied AST by NIF without creating the AST
+## as that it is expensive. We especially want to be able to skip subtrees.
+## The solution is to traverse the file once and compute a skip table.
+
+import nifreader, stringviews
+import std / [strutils, algorithm, tables]
+
+type
+  SkipEntry = object
+    offset, span: int32
+  SkipTable = seq[SkipEntry]
+
+proc `<`*(a, b: SkipEntry): bool = a.offset < b.offset
+proc `==`*(a, b: SkipEntry): bool = a.offset == b.offset
+
+proc myHex(x: int32): string =
+  let x = toHex(x)
+  var i = 0
+  while i < x.len and x[i] == '0':
+    inc i
+  substr(x, i)
+
+const
+  Base36 = "0123456789abcdefghijklmnopqrstuvwxyz"
+
+proc add*(result: var string; id: int32) =
+  var id = id
+  # Convert decimal number to base 36, reversed since it does not matter:
+  while id > 0'i32:
+    result.add Base36[int(id mod 36'i32)]
+    id = id div 36'i32
+
+proc `$`(e: SkipEntry): string =
+  result = ""
+  result.add e.offset
+  result.add ":"
+  result.add e.span
+  #myHex(e.offset) & ":" & myHex(e.span)
+  #$e.span
+
+proc `$`(s: seq[SkipEntry]): string =
+  result = ""
+  var prev = 0'i32
+  for e in s:
+    result.add $(e.offset - prev)
+    result.add ":"
+    result.add $e.span
+    result.add ','
+    prev = e.offset
+  when false:
+    for x in e:
+      result.add $x
+      result.add ','
+
+proc computeSkipTable*(r: var Reader; s: var SkipTable): int =
+  result = offset(r)
+  let t = next(r)
+  #echo "LOOKGIN AT ", t
+  case t.tk
+  of EofToken:
+    assert false
+  of ParRi:
+    result = -result
+  of ParLe:
+    let start = result
+    var last = -1
+    while true:
+      last = computeSkipTable(r, s)
+      if last < 0: break
+    let span = (-last) - start
+    if span > 128:
+      # assumption: 2 cache lines is not worth storing
+      s.add SkipEntry(offset: start.int32, span: span.int32)
+  of UnknownToken, DotToken, Ident, Symbol, SymbolDef,
+      StringLit, CharLit, IntLit, UIntLit, FloatLit:
+    discard
+
+type
+  LazyNode* = object
+    tk: TokenKind
+    s: StringView
+    file: StringView
+    line, col: int32
+
+  LazyTree* = object
+    r: Reader
+    skipTable: Table[int, StringView]
+
+proc down*(tree: var LazyTree; n: LazyNode): LazyNode =
+  # Node is always a compound node and after its tag we find the
+  # "down" node.
+  assert n.tk == ParLe
+  setPosition(tree.r, n.s)
+  let t = next(tree.r)
+  if t.tk in {ParRi, EofToken}:
+    return LazyNode(tk: t.tk, s: t.s)
+
+  var line, col: int32
+  if t.filename.len > 0:
+    line = t.pos.line
+    col = t.pos.col
+  else:
+    line = n.line + t.pos.line
+    col = n.col + t.pos.col
+  result = LazyNode(tk: t.tk, s: t.s, file: t.filename, line: line, col: col)
+
+proc root*(tree: var LazyTree): LazyNode =
+  let t = next(tree.r)
+  assert t.tk notin {ParRi, EofToken}, "module has no root!"
+  var line, col: int32
+  if t.filename.len > 0:
+    line = t.pos.line
+    col = t.pos.col
+  result = LazyNode(tk: t.tk, s: t.s, file: t.filename, line: line, col: col)
+
+proc next*(tree: var LazyTree; n, parent: LazyNode): LazyNode =
+  assert parent.tk == ParLe
+  setPosition(tree.r, n.s)
+  if n.tk == ParLe:
+    # skip to ParRi
+    let start = offset(tree.r)
+    if tree.skipTable.len > 0:
+      let dest = tree.skipTable.getOrDefault(start)
+      if dest.len > 0:
+        return LazyNode(tk: ParRi, s: dest)
+    var stack: seq[(int, StringView)] = @[]
+    var t = next(tree.r)
+    while true:
+      assert t.tk != EofToken, "missing ')'"
+      if t.tk == ParRi:
+        if stack.len == 0:
+          t = next(tree.r) # skip the ')' and return whatever comes after:
+          return LazyNode(tk: t.tk, s: t.s)
+        let finished = stack.pop()
+        t = next(tree.r)
+
+        if span(tree.r, finished[0], t.s) > 128:
+          # assumption: > 2 cache lines is worth storing
+          tree.skipTable[finished[0]] = t.s
+      else:
+        if t.tk == ParLe:
+          stack.add (offset(tree.r), StringView(p: nil, len: 0))
+        t = next(tree.r)
+  else:
+    let t = next(tree.r)
+    var line, col: int32
+    if t.filename.len > 0:
+      line = t.pos.line
+      col = t.pos.col
+    else:
+      line = parent.line + t.pos.line
+      col = parent.col + t.pos.col
+    result = LazyNode(tk: t.tk, s: t.s, file: t.filename, line: line, col: col)
+
+iterator sons(tree: var LazyTree; n: LazyNode): LazyNode =
+  var child = down(tree, n)
+  while child.tk != ParRi:
+    yield child
+    child = next(tree, child, n)
+
+proc sons2*(tree: var LazyTree; n: LazyNode): (LazyNode, LazyNode) =
+  let a = down(tree, n)
+  let b = next(tree, a, n)
+  (a, b)
+
+proc traverse(tree: var LazyTree; n: LazyNode) =
+  if n.tk == ParLe:
+    echo "current head ", n.s
+    if n.s == "asgn":
+      echo "Found an assignment!"
+      let (a, b) = sons2(tree, n)
+      echo a.s, " = ", b.s
+    else:
+      for ch in sons(tree, n):
+        traverse tree, ch
+  else:
+    echo "ignoring atom ", n.s
+
+
+when isMainModule:
+
+  const testData = """(.nif24)
+(stmts
+  (asgn (at x i) (call f a b))
+  (call g x yz)
+)
+  """
+
+  var tree = LazyTree(r: openFromBuffer(testData))
+  let res = processDirectives(tree.r)
+  echo res
+  let r = root(tree)
+  traverse tree, r
+  echo "new round:"
+  traverse tree, r
+
+
+when false:
+  import std / [strutils, monotimes]
+
+  template bench(task, body) =
+    let t0 = getMonoTime()
+    body
+    echo task, " TOOK ", getMonoTime() - t0
+
+  proc test*(filename: string) =
+    echo "A MEM: ", formatSize getOccupiedMem()
+
+    var r = nifreader.open(filename)
+
+    echo "B MEM: ", formatSize getOccupiedMem()
+
+    bench "initial load":
+      let res = processDirectives(r)
+      echo res
+
+    echo "C MEM: ", formatSize getOccupiedMem()
+
+    assert res == Success
+    var s: SkipTable = @[]
+    bench "SkipTable":
+      discard computeSkipTable(r, s)
+      echo "D MEM: ", formatSize getOccupiedMem()
+      algorithm.sort s
+      echo s
+      echo "In bytes: ", ($s).len
+      echo "as binary: ", s.len * sizeof(SkipEntry)
+    echo "ENTRIES: ", s.len
+
+    echo "AToms ", atoms, " TRees ", trees
+
+  test "tests/data/ccgexprs.nif"
+