From ee90c792c5be514b6e3413310e1e6bdee074a3b9 Mon Sep 17 00:00:00 2001
From: Brendan Berg <brendan@berg.industries>
Date: Sun, 11 Sep 2022 13:06:21 -0400
Subject: [PATCH 1/6] initial python wip

---
 python/src/__init__.py    |  18 +++
 python/src/combinators.py |  10 ++
 python/src/format.py      |  16 ++
 python/src/parse.py       | 302 ++++++++++++++++++++++++++++++++++++++
 4 files changed, 346 insertions(+)
 create mode 100644 python/src/__init__.py
 create mode 100644 python/src/combinators.py
 create mode 100644 python/src/format.py
 create mode 100644 python/src/parse.py

diff --git a/python/src/__init__.py b/python/src/__init__.py
new file mode 100644
index 0000000..2febdff
--- /dev/null
+++ b/python/src/__init__.py
@@ -0,0 +1,18 @@
+
+import parse
+
+defaultSerializer = object()
+
+def parse(input: str):
+    _ignore, data, error = parse.document(input, 0)
+
+    if error:
+        raise error
+    else:
+        return data
+
+def serialize(table):
+    return defaultSerializer.serialize(table)
+
+def setSerializer(serializer):
+    defaultSerializer = serializer()
diff --git a/python/src/combinators.py b/python/src/combinators.py
new file mode 100644
index 0000000..d935504
--- /dev/null
+++ b/python/src/combinators.py
@@ -0,0 +1,10 @@
+
+
+def concat(*funcs):
+    pass
+
+def altern(*funcs):
+    pass
+
+def repeat(*funcs):
+    pass
diff --git a/python/src/format.py b/python/src/format.py
new file mode 100644
index 0000000..d6751ea
--- /dev/null
+++ b/python/src/format.py
@@ -0,0 +1,16 @@
+import re
+
+
+class TableFormat(object):
+    rules = None
+
+    def __init__(self, rules):
+        rule = re.compile(r'^([A-Z]+)(?::([A-Z]+))?$|^([0-9]+)(?::([0-9]+))?$|^([A-Z]+)([0-9]+)(?::([A-Z]+)([0-9]+))?$')
+        
+        self.rules = []
+
+        for key, props in rules:
+            if match := rule.match(key):
+                startRow, endRow, startCol, endCol = (0, -1, 0, -1)
+
+                
\ No newline at end of file
diff --git a/python/src/parse.py b/python/src/parse.py
new file mode 100644
index 0000000..1ecdc01
--- /dev/null
+++ b/python/src/parse.py
@@ -0,0 +1,302 @@
+import re
+from enum import Enum
+
+from combinators import concat, altern, repeat
+from format import TableFormat
+
+
+pattern = {
+    'string': re.compile(r'"((?:[^"\n\r\b\\]|\\.)*)"[^\S\r\n]*'),
+    'integer': re.compile(r'([+-]?(?:\d+_?)*\d+)[^\S\r\n]*'),
+    'float': None,
+    'hex': None,
+    'exponent': None,
+    'date': None,
+    'time': None,
+    'boolean': None,
+    'null': None,
+
+    'newline': re.compile(r'\n'),
+    'comma': None,
+    'equals': None,
+    'tilde': None,
+    'star': None,
+    'openBrace': None,
+    'closeBrace': None,
+
+    'version': None,
+    'cellRange': None,
+    'tag': None,
+    'propName': None,
+}
+
+class Token(Enum):
+    Equals = '='
+    Tilde = '~'
+    Star = '*'
+    Comma = ','
+    Newline = '\n'
+    OpenBrace = '{'
+    CloseBrace = '}'
+
+
+def document(input: str, offset: int):
+    offset, head, error = header(input, offset)
+
+    if error:
+        return (offset, None, error)
+
+    offset, table, error = data(input, offset)
+
+    if error:
+        return (offset, None, error)
+    
+    table.header = head
+
+    if offset == len(input):
+        return (offset, table, None)
+    
+    offset, table.format, error = format(input, offset)
+
+    if error:
+        return (offset, None, error)
+
+    return (offset, table, None)
+
+def header(input: str, offset: int):
+    def headerLine(input: str, offset: int):
+        offset, elt, error = label(input, offset)
+
+        if error: return (offset, [], None)
+
+        offset, matched, error = concat(
+            repeat(comma, label), newline
+        )(input, offset)
+
+        if error: return (offset, None, error)
+
+        labels = filter(lambda elt: elt not in (Token.Comma, Token.Newline), matched)
+        return (offset, [elt] + labels, None)
+
+    start = offset
+    offset, elts, error = headerLine(input, offset)
+
+    if error: return (offset, None, error)
+
+    offset, verNumber, error = concat(equals, version, newline)(input, offset)
+
+    if error:
+        return (start, None, error)
+    elif verNumber[1] != '0.1':
+        return (start, None, 'invalid version number')
+    else:
+        return (offset, elts, None)
+
+def data(input: str, offset: int):
+    # TODO: FINISH THIS IMPLEMENTATION
+    offset, rows, error = repeat(row)(input, offset)
+
+    if error: return (offset, None, error)
+    else:
+        count = 0
+        breaks = []
+        table = {}
+
+        table.breaks = breaks
+        return (offset, table, None)
+
+def format(input: str, offset: int):
+    offset, _ignore, errors = concat(star, newline)(input, offset)
+
+    if errors:
+        return (offset, None, errors)
+
+    return formatRules(input, offset)
+
+def formatRules(input: str, offset: int):
+    offset, lines, error = repeat(formatRule)(input, offset)
+
+    if error:
+        return (offset, None, error)
+    elif offset != len(input):
+        return (offset, None, 'format rule')
+
+    rules = {}
+
+    for key, props in lines:
+        if key in rules:
+            rules[key] += props
+        else:
+            rules[key] = props
+
+    return (offset, TableFormat(rules), None)
+
+def formatRule(input: str, offset: int):
+    offset, result, error = concat(cellRange, properties)(input, offset)
+
+    if error:
+        return (offset, None, error)
+    else:
+        return (offset, {result[0]: result[1:]}, None)
+
+def cellRange(input: str, offset: int):
+    if match := pattern['cellRange'].match(input, offset):
+        return (match.end(), match.groups()[0], None)
+    else:
+        return (offset, None, 'cell range')
+
+def properties(input: str, offset: int):
+    position, props, error = concat(
+        openBrace,
+        concat(tag, repeat(comma, tag)),
+        closeBrace
+    )(input, offset)
+
+    if error:
+        return (position, None, error)
+    else:
+        filtered = filter(lambda prop: prop is not Token.Comma, props[1:-1])
+        return (position, filtered, None)
+
+def row(input: str, offset: int):
+    position, rowData, error = altern(
+        concat(element, repeat(comma, element), newline),
+        concat(tilde, newline)
+    )(input, offset)
+
+    if error:
+        return (position, None, "element or '~'")
+    elif rowData and rowData[0] is Token.Tilde:
+        return (position, Token.Tilde, None)
+    else:
+        filtered = filter(lambda elt: elt not in (Token.Comma, Token.Newline), rowData)
+        return (position, filtered, None)
+
+def element(input: str, offset: int):
+    return altern(stringValue, number, booleanValue, nullValue)(input, offset)
+
+def label(input: str, offset: int):
+    return altern(stringValue, nullValue)(input, offset)
+
+def equals(input: str, offset: int):
+    if match := pattern['equals'].match(input, offset):
+        return (match.end(), Token.Equals, None)
+    else:
+        return (offset, None, 'header separator')
+
+def star(input: str, offset: int):
+    if match := pattern['star'].match(input, offset):
+        return (match.end(), Token.CloseBrace, None)
+    else:
+        return (offset, None, 'format separator')
+
+def tilde(input: str, offset: int):
+    if match := pattern['tilde'].match(input, offset):
+        return (match.end(), Token.Tilde, None)
+    else:
+        return (offset, None, 'section separator')
+
+def comma(input: str, offset: int):
+    if match := pattern['comma'].match(input, offset):
+        return (match.end(), Token.Comma, None)
+    else:
+        return (offset, None, 'comma')
+
+def newline(input: str, offset: int):
+    if match := pattern['newline'].match(input, offset):
+        return (match.end(), Token.Newline, None)
+    else:
+        return (offset, None, 'newline')
+
+def openBrace(input: str, offset: int):
+    if match := pattern['openBrace'].match(input, offset):
+        return (match.end(), Token.OpenBrace, None)
+    else:
+        return (offset, None, '"{"')
+
+def closeBrace(input: str, offset: int):
+    if match := pattern['closeBrace'].match(input, offset):
+        return (match.end(), Token.CloseBrace, None)
+    else:
+        return (offset, None, '"}"')
+
+def version(input: str, offset: int):
+    if match := pattern['version'].match(input, offset):
+        return (match.end(), match.groups()[0], None)
+    else:
+        return (offset, None, 'version number')
+        
+def tag(input: str, offset: int):
+    if match := pattern['propName'].match(input, offset):
+        return (match.end(), match.groups()[0], None)
+    else:
+        return (offset, None, 'Format Property')
+
+def stringValue(input: str, offset: int):
+    escapes = re.compile(r'\\["ntfrb\\]|\\u\{([0-9A-Fa-f]{1,8})\}')
+
+    def replace(match):
+        if codepoint := match.groups():
+            return chr(int(codepoint[0], 16))
+        else:
+            chars = {
+                r'\"': '\"',
+                r'\n': '\n',
+                r'\t': '\t',
+                r'\f': '\f',
+                r'\r': '\r',
+                r'\b': '\b',
+                r'\\': '\\'
+            }
+            return chars[match.string]
+
+    if match := pattern['string'].match(input, offset):
+        value = escapes.sub(replace, match.groups()[0])
+        return (match.end(), value, None)
+    else:
+        return (offset, None, 'string')
+
+def number(input: str, offset: int):
+    return altern(scientific, hexValue, floatValue, intValue)(input, offset)
+
+def scientific(input: str, offset: int):
+    if match := pattern['exponent'].match(input, offset):
+        mantissa = match.groups()[0].replace('_', '')
+        exponent = match.groups()[1].replace('_', '')
+        return (match.end(), float(f'{mantissa}e{exponent}'), None)
+    else:
+        return (offset, None, 'scientific')
+
+def floatValue(input: str, offset: int):
+    if match := pattern['float'].match(input, offset):
+        value = match.groups()[0].replace('_', '')
+        return (match.end(), float(value), None)
+    else:
+        return (offset, None, 'float')
+
+def intValue(input: str, offset: int):
+    if match := pattern['integer'].match(input, offset):
+        value = match.groups()[0].replace('_', '')
+        return (match.end(), int(value), None)
+    else:
+        return (offset, None, 'integer')
+
+def hexValue(input: str, offset: int):
+    if match := pattern['hex'].match(input, offset):
+        value = match.groups()[0].replace('0x', '', 1).replace('_', '')
+        return [match.end(), int(value, 16), None]
+    else:
+        return (offset, None, 'hexadecimal')
+
+def booleanValue(input: str, offset: int):
+    if match := pattern['boolean'].match(input, offset):
+        value = match.groups()[0] == 'true'
+        return (match.end(), value, None)
+    else:
+        return (offset, None, 'boolean')
+
+def nullValue(input: str, offset: int):
+    if match := pattern['null'].match(input, offset):
+        return (match.end(), None, None)
+    else:
+        return (offset, None, 'null')

From 59c3ae10a28735ddeb636aaeb16beaa350c3c78d Mon Sep 17 00:00:00 2001
From: Brendan Berg <brendan@berg.industries>
Date: Sun, 11 Sep 2022 23:36:17 -0400
Subject: [PATCH 2/6] python should be feature-complete

---
 python/src/__init__.py    | 10 +++---
 python/src/combinators.py | 70 +++++++++++++++++++++++++++++++++++----
 python/src/format.py      | 69 ++++++++++++++++++++++++++++++++++++--
 python/src/parse.py       | 58 +++++++++++++++++++-------------
 python/src/serializers.py | 61 ++++++++++++++++++++++++++++++++++
 python/src/table.py       | 53 +++++++++++++++++++++++++++++
 6 files changed, 286 insertions(+), 35 deletions(-)
 create mode 100644 python/src/serializers.py
 create mode 100644 python/src/table.py

diff --git a/python/src/__init__.py b/python/src/__init__.py
index 2febdff..e41668c 100644
--- a/python/src/__init__.py
+++ b/python/src/__init__.py
@@ -1,7 +1,8 @@
-
 import parse
+from serializers import Serializer, TabloSerializer
+
 
-defaultSerializer = object()
+defaultSerializer: Serializer = TabloSerializer
 
 def parse(input: str):
     _ignore, data, error = parse.document(input, 0)
@@ -14,5 +15,6 @@ def parse(input: str):
 def serialize(table):
     return defaultSerializer.serialize(table)
 
-def setSerializer(serializer):
-    defaultSerializer = serializer()
+def setSerializer(serializer: Serializer):
+    global defaultSerializer
+    defaultSerializer = serializer
diff --git a/python/src/combinators.py b/python/src/combinators.py
index d935504..71d6380 100644
--- a/python/src/combinators.py
+++ b/python/src/combinators.py
@@ -1,10 +1,68 @@
+from typing import Iterable
 
 
-def concat(*funcs):
-    pass
+def concat(*rules):
+    def combinator(input: str, offset: int):
+        cursor = offset
+        results = []
 
-def altern(*funcs):
-    pass
+        for rule in rules:
+            offset, match, error = rule(input, cursor)
 
-def repeat(*funcs):
-    pass
+            if error:
+                return (cursor, None, error)
+            else:
+                cursor = offset
+
+                if isinstance(match, Iterable):
+                    results.extend(match)
+                else:
+                    results.append(match)
+
+        return (cursor, results, None)
+
+    return combinator
+
+def altern(*rules):
+    def combinator(input: str, offset: int):
+        cursor = offset
+        errors = []
+
+        for rule in rules:
+            offset, result, error = rule(input, cursor)
+
+            if not error:
+                return (offset, result, None)
+            else:
+                cursor = offset
+                errors.extend(error)
+        
+        return (offset, None, f'one of {",".join(errors)}')
+
+    return combinator
+
+def repeat(*rules):
+    def combinator(input: str, offset: int):
+        cursor = offset
+        results = []
+        error = None
+
+        while True:
+            first, *rest = rules
+            cursor, result, error = first(input, cursor)
+
+            if error:
+                break
+            else:
+                results.append(result)
+
+            cursor, result, error = concat(*rest)(input, cursor)
+
+            if error:
+                return (offset, None, error)
+            else:
+                results.extend(result)
+        
+        return (cursor, results, error)
+    
+    return combinator
diff --git a/python/src/format.py b/python/src/format.py
index d6751ea..6f3181c 100644
--- a/python/src/format.py
+++ b/python/src/format.py
@@ -11,6 +11,71 @@ def __init__(self, rules):
 
         for key, props in rules:
             if match := rule.match(key):
-                startRow, endRow, startCol, endCol = (0, -1, 0, -1)
+                start_row, end_row, start_col, end_col = (0, -1, 0, -1)
 
-                
\ No newline at end of file
+                groups = match.groups()
+
+                if groups[0] is not None:
+                    start_col = self._alpha_to_int(groups[0])
+                    end_col = self._alpha_to_int(groups[1]) if groups[1] is not None else start_col
+                elif groups[2] is not None:
+                    start_row = int(groups[2])
+                    end_row = int(groups[3]) if groups[3] is not None else start_row
+                elif groups[4] is not None:
+                    start_col = self._alpha_to_int(groups[4])
+                    start_row = int(groups[5])
+                    end_col = self._alpha_to_int(groups[6]) if groups[6] is not None else start_col
+                    end_row = int(groups[7]) if groups[7] is not None else start_row
+
+                if ((start_row <= end_row or end_row == -1) and 
+                        (start_col <= end_col or end_col == -1)):
+                    bounds = (start_col, end_col, start_row, end_row)
+                    self.rules.append((
+                        bounds, key, props
+                    ))
+
+
+
+    def get_props(self, col, row):
+        def applicable(rule):
+            bounds, _key, _props = rule
+            start_col, end_col, start_row, end_row = bounds
+
+            return (
+                row >= start_row and
+                (row <= end_row or end_row == -1) and
+                numeric_column >= start_col and
+                (numeric_column <= end_col or end_col == -1)
+            )
+
+        numeric_column = self._alpha_to_int(col)
+
+        result = []
+
+        for rule in filter(applicable, self.rules):
+            _bounds, _key, props = rule
+            result.extend(props)
+
+        return result
+
+    def get_rules(self):
+        rules = {}
+
+        for rule in self.rules:
+            _bounds, key, props = rule
+
+            if key in rules:
+                rules[key].extend(props)
+            else:
+                rules[key] = props
+
+        return rules
+
+    def _alpha_to_int(self, index: str):
+        alphabet = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
+        value = 0
+
+        for idx, char in enumerate(reversed(index.split(''))):
+            value += alphabet.index(char) * 26 ** idx
+
+        return value
diff --git a/python/src/parse.py b/python/src/parse.py
index 1ecdc01..341fccb 100644
--- a/python/src/parse.py
+++ b/python/src/parse.py
@@ -1,33 +1,35 @@
+from functools import reduce
 import re
 from enum import Enum
 
 from combinators import concat, altern, repeat
 from format import TableFormat
+from table import Table
 
 
 pattern = {
     'string': re.compile(r'"((?:[^"\n\r\b\\]|\\.)*)"[^\S\r\n]*'),
     'integer': re.compile(r'([+-]?(?:\d+_?)*\d+)[^\S\r\n]*'),
-    'float': None,
-    'hex': None,
-    'exponent': None,
-    'date': None,
-    'time': None,
-    'boolean': None,
-    'null': None,
+    'float': re.compile(r'([+-]?(?:(?:(?:0|[1-9](?:_?\d+)*)\.(?:(?:\d+_?)*\d+)?)|0\.|\.\d+))[^\S\r\n]*'),
+    'hex': re.compile(r'([+-]?0x(?:[\dA-Fa-f]+_?)*[\dA-Fa-f]+)[^\S\r\n]*'),
+    'exponent': re.compile(r'([+-]?(?:(?:0|[1-9](?:_?\d+)*\.(?:(?:\d+_?)*\d+)?)|0\.|\.\d+|(?:\d+_?)*\d+))[eE]([+-]?(?:\d+_?)*\d+)[^\S\r\n]*'),
+    'date': re.compile(r'#(?:(\d{4})(?:-(\d{2})(?:-(\d{2}))?)?)?'),
+    'time': re.compile(r'(\d{2})(?::(\d{2})(?::(\d{2})(?:\.(\d{4}))?)?)?(Z|[+-]?\d{4})?'),
+    'boolean': re.compile(r'(true|false)[^\S\r\n]*'),
+    'null': re.compile(r'-[^\S\r\n]*'),
 
     'newline': re.compile(r'\n'),
-    'comma': None,
-    'equals': None,
-    'tilde': None,
-    'star': None,
-    'openBrace': None,
-    'closeBrace': None,
-
-    'version': None,
-    'cellRange': None,
-    'tag': None,
-    'propName': None,
+    'comma': re.compile(r',[^\S\r\n]*'),
+    'equals': re.compile(r'='),
+    'tilde': re.compile(r'~'),
+    'star': re.compile(r'\*'),
+    'openBrace': re.compile(r'{[^\S\r\n]*'),
+    'closeBrace': re.compile(r'}[^\S\r\n]*\n'),
+
+    'version': re.compile(r' ?(\d+\.\d+)'),
+    'cellRange': re.compile(r''),
+    'tag': re.compile(r'([A-Za-z_][A-Za-z0-9_-]*)[^\S\r\n]*'),
+    'propName': re.compile(r'(plain|bold|italic|underline|strike|normal|mono|black|red|orange|yellow|green|blue|violet|grey|white)[^\S\r\n]*'),
 }
 
 class Token(Enum):
@@ -93,15 +95,25 @@ def headerLine(input: str, offset: int):
         return (offset, elts, None)
 
 def data(input: str, offset: int):
-    # TODO: FINISH THIS IMPLEMENTATION
     offset, rows, error = repeat(row)(input, offset)
 
-    if error: return (offset, None, error)
+    if error:
+        return (offset, None, error)
     else:
-        count = 0
-        breaks = []
-        table = {}
+        def process(result, elt):
+            count, rows, breaks = result
+            
+            if elt == Token.Tilde:
+                breaks.append(count)
+            else:
+                count += 1
+                result.append(elt)
+
+            return (count, rows, breaks)
+
+        _count, result, breaks = reduce(process, rows, (0, [], []))
 
+        table = Table(None, result, [])
         table.breaks = breaks
         return (offset, table, None)
 
diff --git a/python/src/serializers.py b/python/src/serializers.py
new file mode 100644
index 0000000..9bad64b
--- /dev/null
+++ b/python/src/serializers.py
@@ -0,0 +1,61 @@
+from table import Table
+
+
+column_labels = {}
+
+def int_to_alpha(value: int):
+    alphabet = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
+
+    def decay(value: int):
+        while value > 0:
+            yield value
+            value //= 26
+
+    if value not in column_labels:
+        result = ''
+
+        for v in decay(value):
+            result = alphabet[v % 26] + result
+
+        column_labels[value] = result or 'A'
+
+    return column_labels[value]
+
+
+class Serializer(object):
+    @classmethod
+    def serialize(cls, table):
+        raise NotImplementedError()
+
+class TabloSerializer(Serializer):
+    @classmethod
+    def serialize(cls, table):
+        header = cls.serialize_header(table)
+        data = cls.serialize_data(table)
+        format = cls.serialize_format(table.format)
+
+        return f'{header}=0.1\n{data}\n{format}'
+
+    @classmethod
+    def serialize_header(cls, table: Table):
+        if not table.header:
+            return ''
+
+        return ','.join(cls.serialize_item(val) for val in table.header) + '\n'
+
+    @classmethod
+    def serlialize_data(cls, table: Table):
+        return '\n'.join(','.join(
+            cls.serialize_item(elt) for elt in row
+        ) for row in table.data)
+
+    @classmethod
+    def serialize_item(cls, item):
+        if isinstance(item, str):
+            return f'"{item}"'
+        elif isinstance(item, (int, float)):
+            return str(item)
+        elif isinstance(item, bool):
+            return 'true' if item else 'false'
+        elif item is None:
+            return '-'
\ No newline at end of file
diff --git a/python/src/table.py b/python/src/table.py
new file mode 100644
index 0000000..162ff8f
--- /dev/null
+++ b/python/src/table.py
@@ -0,0 +1,53 @@
+import re
+from typing import Any
+from format import TableFormat
+
+
+class Table(object):
+    def __init__(self, header, rows, format):
+        self.header = header
+        self.data = rows
+        self.format = format
+        self.breaks = []
+
+    def concat(self, rows):
+        self.data += rows
+
+    def get(self, column, row):
+        if isinstance(column, str):
+            column = self._alpha_to_int(column)
+
+        return self.data[row][column]
+
+    def get_row(self, row):
+        return self.data[row]
+
+    def __getattribute__(self, name: str) -> Any:
+        if match := re.match(r'([A-Z]+)?([0-9]+)?', name):
+            col, row = (None, None)
+
+            if col_str := match.groups()[0]:
+                col = self._alpha_to_int(col_str)
+            
+            if row_str := match.groups()[1]:
+                row = int(row_str)
+
+            if row is not None and col is not None:
+                return self.data[row][col]
+            elif row is not None:
+                return self.data[row]
+            elif col is not None:
+                return (row[col] for row in self.data)
+            else:
+                raise KeyError()
+        else:
+            raise KeyError()
+        
+    def _alpha_to_int(self, index: str):
+        alphabet = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
+        value = 0
+
+        for idx, char in enumerate(reversed(index.split(''))):
+            value += alphabet.index(char) * 26 ** idx
+
+        return value

From 5822f6041722d4de8016c088e258b0994755c0ea Mon Sep 17 00:00:00 2001
From: Brendan Berg <brendan@berg.industries>
Date: Mon, 12 Sep 2022 00:07:44 -0400
Subject: [PATCH 3/6] add html serializer

---
 python/src/serializers.py | 58 ++++++++++++++++++++++++++++++++++++++-
 1 file changed, 57 insertions(+), 1 deletion(-)

diff --git a/python/src/serializers.py b/python/src/serializers.py
index 9bad64b..be9dd6a 100644
--- a/python/src/serializers.py
+++ b/python/src/serializers.py
@@ -27,6 +27,7 @@ class Serializer(object):
     def serialize(cls, table):
         raise NotImplementedError()
 
+
 class TabloSerializer(Serializer):
     @classmethod
     def serialize(cls, table):
@@ -58,4 +59,59 @@ def serialize_item(cls, item):
         elif isinstance(item, bool):
             return 'true' if item else 'false'
         elif item is None:
-            return '-'
\ No newline at end of file
+            return '-'
+
+
+class HtmlSerializer(Serializer):
+    @classmethod
+    def serialize(cls, table: Table):
+        header = cls.serialize_header(table)
+        data = cls.serialize_data(table)
+
+        return f'<table>{header}{data}\n</table>'
+
+    @classmethod
+    def serialize_header(cls, table: Table):
+        if not table.header:
+            return ''
+        
+        def serialize_item(index: int, item):
+            col = int_to_alpha(index)
+            value = cls.serialize_item(item)
+            return f'<td data-col-index="{col}">{value}</td>'
+
+        items = (serialize_item(index, item) for index, item in enumerate(table.header))
+
+        return f'\n  <thead><tr>\n    {"\n    ".join(items)}\n  </tr></thead>'
+
+    @classmethod
+    def serialize_data(cls, table: Table):
+        def serialize_item(col_idx, row_idx, item):
+            col_str = int_to_alpha(col_idx)
+            value = cls.serialize_item(item)
+
+            if props := table.format and table.format.get_props(col_str, row_idx):
+                class_attr = f' class="{" ".join(props)}"'
+            else:
+                class_attr = ''
+
+            return f'<td data-col-index="{col_str}"{class_attr}>{value}</td>'
+
+        def serialize_row(row_idx, row):
+            items = (serialize_item(col_idx, row_idx, item) for col_idx, item in enumerate(row))
+            return f'<tr data-row-index="{row_idx}">\n      {"\n      ".join(items)}\n    </tr>'
+        
+        rows = (serialize_row(index, row) for index, row in enumerate(table.data))
+
+        return f'\n  <tbody>\n    {"\n    ".join(rows)}\n  </tbody>'
+
+    @classmethod
+    def serialize_item(cls, item):
+        if isinstance(item, str):
+            return item
+        elif isinstance(item, (int, float)):
+            return str(item)
+        elif isinstance(item, bool):
+            return 'True' if item else 'False'
+        elif item is None:
+            return ''

From 38ea39910d5774bb2f76a886da6f0a88c6f63b5c Mon Sep 17 00:00:00 2001
From: Brendan Berg <brendan@berg.industries>
Date: Mon, 12 Sep 2022 21:24:59 -0400
Subject: [PATCH 4/6] bugfixes and project structure

---
 .gitignore                           |  1 +
 python/README.md                     | 66 ++++++++++++++++++++++++++++
 python/{src => tablo}/__init__.py    |  8 ++--
 python/{src => tablo}/combinators.py | 11 ++---
 python/{src => tablo}/format.py      |  6 +--
 python/{src => tablo}/parse.py       | 16 +++----
 python/{src => tablo}/serializers.py | 21 ++++++---
 python/{src => tablo}/table.py       | 21 ++++++---
 8 files changed, 119 insertions(+), 31 deletions(-)
 create mode 100644 python/README.md
 rename python/{src => tablo}/__init__.py (63%)
 rename python/{src => tablo}/combinators.py (84%)
 rename python/{src => tablo}/format.py (95%)
 rename python/{src => tablo}/parse.py (95%)
 rename python/{src => tablo}/serializers.py (82%)
 rename python/{src => tablo}/table.py (67%)

diff --git a/.gitignore b/.gitignore
index 9daa824..4b0b674 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,3 @@
 .DS_Store
+__pycache__
 node_modules
diff --git a/python/README.md b/python/README.md
new file mode 100644
index 0000000..6023e17
--- /dev/null
+++ b/python/README.md
@@ -0,0 +1,66 @@
+# The Tablo File Format
+
+__tablo__ is a plain text interchange format for tabular data. It is more
+expressive than CSV while remaining easy for people to read and write.
+
+It adds explicit headers, datatypes, and cell formatting to address
+shortcomings of delimiter-separated formats like CSV or TSV. __tablo__ solves
+delimiter collision issues by having well-defined quoting and escaping rules
+that are familiar to users of formats like JSON or common programming languages
+like Python or JavaScript.
+
+## What's Wrong with CSV?
+
+> the Microsoft version of CSV is a textbook example of how *not* to design a
+> textual file format
+
+—Eric S. Raymond, [*The Art of Unix Programming*][taoup]
+
+Stated simply, there is no single CSV standard. It exists as a myriad of
+informal variants whose implementation varies from vendor to vendor. Character
+encodings and escape sequences vary from one application to the next, and the
+ambiguities in various edge cases means that the output of one application may
+not be readable by another.
+
+__tablo__ is designed to solve a number of ambiguities and shortcomings in CSV.
+
+One of the first obvious differences is that header rows are optional, but
+well-defined. In other words, a document may or may not contain a header, but
+determining whether the document includes a header is always unambiguous.
+
+A crucial aspect of the __tablo__ format is that it doesn't make assumptions
+about the type of data in each cell. If a value is surrounded by quotes, it is 
+*always* a string. If a value is a number without quotes, it is *always* a
+number. If a value is an ISO-8601 formatted date preceded by a hash mark, it
+is *always* a datetime.
+
+## Installation
+
+Install with `pip`:
+
+```
+python -m pip install tablo-fyi
+```
+
+## Usage
+
+Parsing is accomplished with the `parse` function.
+
+```
+from tablo import parse, serialize
+
+data = parse('"name", "age"\n=0.1\n"Tom", 24\n"Jerry", 27\n')
+
+name = data['A0']  # Retrieves the value in column A, row 0 => 'Tom'
+age = data['B1']   # Retrieves the value in column B, row 1 => 27
+```
+
+## More Information
+
+More information can be found in [the __tablo__ specification][spec], and a
+set of [example files][examples] can be found in the [project repository][repo].
+
+[taoup]: http://www.catb.org/esr/writings/taoup/html/ch05s02.html#id2901882
+[spec]: https://tablo.fyi
+[examples]: https://github.com/jotjotdotio/tablo/tree/main/examples
+[repo]: https://github.com/jotjotdotio/tablo
diff --git a/python/src/__init__.py b/python/tablo/__init__.py
similarity index 63%
rename from python/src/__init__.py
rename to python/tablo/__init__.py
index e41668c..c5613e5 100644
--- a/python/src/__init__.py
+++ b/python/tablo/__init__.py
@@ -1,14 +1,14 @@
-import parse
-from serializers import Serializer, TabloSerializer
+from tablo.parse import document
+from tablo.serializers import Serializer, TabloSerializer
 
 
 defaultSerializer: Serializer = TabloSerializer
 
 def parse(input: str):
-    _ignore, data, error = parse.document(input, 0)
+    _ignore, data, error = document(input, 0)
 
     if error:
-        raise error
+        raise ValueError(error)
     else:
         return data
 
diff --git a/python/src/combinators.py b/python/tablo/combinators.py
similarity index 84%
rename from python/src/combinators.py
rename to python/tablo/combinators.py
index 71d6380..1024695 100644
--- a/python/src/combinators.py
+++ b/python/tablo/combinators.py
@@ -7,14 +7,14 @@ def combinator(input: str, offset: int):
         results = []
 
         for rule in rules:
-            offset, match, error = rule(input, cursor)
+            newOffset, match, error = rule(input, cursor)
 
             if error:
-                return (cursor, None, error)
+                return (offset, None, error)
             else:
-                cursor = offset
+                cursor = newOffset
 
-                if isinstance(match, Iterable):
+                if isinstance(match, Iterable) and not isinstance(match, str):
                     results.extend(match)
                 else:
                     results.append(match)
@@ -50,8 +50,9 @@ def combinator(input: str, offset: int):
         while True:
             first, *rest = rules
             cursor, result, error = first(input, cursor)
-
+            
             if error:
+                error = None
                 break
             else:
                 results.append(result)
diff --git a/python/src/format.py b/python/tablo/format.py
similarity index 95%
rename from python/src/format.py
rename to python/tablo/format.py
index 6f3181c..fb7904a 100644
--- a/python/src/format.py
+++ b/python/tablo/format.py
@@ -4,12 +4,12 @@
 class TableFormat(object):
     rules = None
 
-    def __init__(self, rules):
+    def __init__(self, rules={}):
         rule = re.compile(r'^([A-Z]+)(?::([A-Z]+))?$|^([0-9]+)(?::([0-9]+))?$|^([A-Z]+)([0-9]+)(?::([A-Z]+)([0-9]+))?$')
         
         self.rules = []
 
-        for key, props in rules:
+        for key, props in rules.items():
             if match := rule.match(key):
                 start_row, end_row, start_col, end_col = (0, -1, 0, -1)
 
@@ -75,7 +75,7 @@ def _alpha_to_int(self, index: str):
         alphabet = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
         value = 0
 
-        for idx, char in enumerate(reversed(index.split(''))):
+        for idx, char in enumerate(reversed(index)):
             value += alphabet.index(char) * 26 ** idx
 
         return value
diff --git a/python/src/parse.py b/python/tablo/parse.py
similarity index 95%
rename from python/src/parse.py
rename to python/tablo/parse.py
index 341fccb..0b58c4e 100644
--- a/python/src/parse.py
+++ b/python/tablo/parse.py
@@ -2,9 +2,9 @@
 import re
 from enum import Enum
 
-from combinators import concat, altern, repeat
-from format import TableFormat
-from table import Table
+from tablo.combinators import concat, altern, repeat
+from tablo.format import TableFormat
+from tablo.table import Table
 
 
 pattern = {
@@ -27,7 +27,7 @@
     'closeBrace': re.compile(r'}[^\S\r\n]*\n'),
 
     'version': re.compile(r' ?(\d+\.\d+)'),
-    'cellRange': re.compile(r''),
+    'cellRange': re.compile(r'((?:[A-Z]+[\d]+:[A-Z]+[\d]+)|(?:[A-Z]+:[A-Z]+)|(?:[\d]+:[\d]+)|(?:[A-Z]+[\d]+))[^\S\r\n]'),
     'tag': re.compile(r'([A-Za-z_][A-Za-z0-9_-]*)[^\S\r\n]*'),
     'propName': re.compile(r'(plain|bold|italic|underline|strike|normal|mono|black|red|orange|yellow|green|blue|violet|grey|white)[^\S\r\n]*'),
 }
@@ -107,13 +107,13 @@ def process(result, elt):
                 breaks.append(count)
             else:
                 count += 1
-                result.append(elt)
+                rows.append(elt)
 
             return (count, rows, breaks)
 
         _count, result, breaks = reduce(process, rows, (0, [], []))
 
-        table = Table(None, result, [])
+        table = Table(None, result, TableFormat())
         table.breaks = breaks
         return (offset, table, None)
 
@@ -149,7 +149,7 @@ def formatRule(input: str, offset: int):
     if error:
         return (offset, None, error)
     else:
-        return (offset, {result[0]: result[1:]}, None)
+        return (offset, (result[0], result[1:]), None)
 
 def cellRange(input: str, offset: int):
     if match := pattern['cellRange'].match(input, offset):
@@ -182,7 +182,7 @@ def row(input: str, offset: int):
         return (position, Token.Tilde, None)
     else:
         filtered = filter(lambda elt: elt not in (Token.Comma, Token.Newline), rowData)
-        return (position, filtered, None)
+        return (position, list(filtered), None)
 
 def element(input: str, offset: int):
     return altern(stringValue, number, booleanValue, nullValue)(input, offset)
diff --git a/python/src/serializers.py b/python/tablo/serializers.py
similarity index 82%
rename from python/src/serializers.py
rename to python/tablo/serializers.py
index be9dd6a..f10eb75 100644
--- a/python/src/serializers.py
+++ b/python/tablo/serializers.py
@@ -1,4 +1,5 @@
-from table import Table
+from tablo.format import TableFormat
+from tablo.table import Table
 
 
 column_labels = {}
@@ -45,7 +46,7 @@ def serialize_header(cls, table: Table):
         return ','.join(cls.serialize_item(val) for val in table.header) + '\n'
 
     @classmethod
-    def serlialize_data(cls, table: Table):
+    def serialize_data(cls, table: Table):
         return '\n'.join(','.join(
             cls.serialize_item(elt) for elt in row
         ) for row in table.data)
@@ -60,6 +61,13 @@ def serialize_item(cls, item):
             return 'true' if item else 'false'
         elif item is None:
             return '-'
+    
+    @classmethod
+    def serialize_format(cls, format: TableFormat):
+        rules = format.get_rules()
+        return '*\n' + '\n'.join(
+            f'{key} {{{",".join(props)}}}' for key, props in rules.items()
+        )
 
 
 class HtmlSerializer(Serializer):
@@ -82,7 +90,8 @@ def serialize_item(index: int, item):
 
         items = (serialize_item(index, item) for index, item in enumerate(table.header))
 
-        return f'\n  <thead><tr>\n    {"\n    ".join(items)}\n  </tr></thead>'
+        sep = "\n    "
+        return f'\n  <thead><tr>\n    {sep.join(items)}\n  </tr></thead>'
 
     @classmethod
     def serialize_data(cls, table: Table):
@@ -99,11 +108,13 @@ def serialize_item(col_idx, row_idx, item):
 
         def serialize_row(row_idx, row):
             items = (serialize_item(col_idx, row_idx, item) for col_idx, item in enumerate(row))
-            return f'<tr data-row-index="{row_idx}">\n      {"\n      ".join(items)}\n    </tr>'
+            sep = "\n      "
+            return f'<tr data-row-index="{row_idx}">\n      {sep.join(items)}\n    </tr>'
         
         rows = (serialize_row(index, row) for index, row in enumerate(table.data))
 
-        return f'\n  <tbody>\n    {"\n    ".join(rows)}\n  </tbody>'
+        sep = "\n    "
+        return f'\n  <tbody>\n    {sep.join(rows)}\n  </tbody>'
 
     @classmethod
     def serialize_item(cls, item):
diff --git a/python/src/table.py b/python/tablo/table.py
similarity index 67%
rename from python/src/table.py
rename to python/tablo/table.py
index 162ff8f..bd8a58a 100644
--- a/python/src/table.py
+++ b/python/tablo/table.py
@@ -1,6 +1,6 @@
 import re
 from typing import Any
-from format import TableFormat
+from tablo.format import TableFormat
 
 
 class Table(object):
@@ -22,7 +22,7 @@ def get(self, column, row):
     def get_row(self, row):
         return self.data[row]
 
-    def __getattribute__(self, name: str) -> Any:
+    def __getitem__(self, name: str) -> Any:
         if match := re.match(r'([A-Z]+)?([0-9]+)?', name):
             col, row = (None, None)
 
@@ -33,11 +33,20 @@ def __getattribute__(self, name: str) -> Any:
                 row = int(row_str)
 
             if row is not None and col is not None:
-                return self.data[row][col]
+                try:
+                    return self.data[row][col]
+                except IndexError:
+                    raise KeyError()
             elif row is not None:
-                return self.data[row]
+                try:
+                    return self.data[row]
+                except IndexError:
+                    raise KeyError()
             elif col is not None:
-                return (row[col] for row in self.data)
+                try:
+                    return (row[col] for row in self.data)
+                except IndexError:
+                    raise KeyError()
             else:
                 raise KeyError()
         else:
@@ -47,7 +56,7 @@ def _alpha_to_int(self, index: str):
         alphabet = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
         value = 0
 
-        for idx, char in enumerate(reversed(index.split(''))):
+        for idx, char in enumerate(reversed(index)):
             value += alphabet.index(char) * 26 ** idx
 
         return value

From a2323eacba93b00d5720bb275c90d4e37d15046b Mon Sep 17 00:00:00 2001
From: Brendan Berg <brendan@berg.industries>
Date: Mon, 12 Sep 2022 22:38:13 -0400
Subject: [PATCH 5/6] housekeeping

---
 .gitignore              |  7 +++++++
 python/README.md        |  4 ++--
 python/requirements.txt |  3 +++
 python/setup.py         | 21 +++++++++++++++++++++
 4 files changed, 33 insertions(+), 2 deletions(-)
 create mode 100644 python/requirements.txt
 create mode 100644 python/setup.py

diff --git a/.gitignore b/.gitignore
index 4b0b674..4c13563 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,10 @@
 .DS_Store
+
+# Python
 __pycache__
+python/build
+python/dist
+*.egg-info
+
+# TypeScript
 node_modules
diff --git a/python/README.md b/python/README.md
index 6023e17..ae553e3 100644
--- a/python/README.md
+++ b/python/README.md
@@ -47,9 +47,9 @@ python -m pip install tablo-fyi
 Parsing is accomplished with the `parse` function.
 
 ```
-from tablo import parse, serialize
+import tablo
 
-data = parse('"name", "age"\n=0.1\n"Tom", 24\n"Jerry", 27\n')
+data = tablo.parse('"name", "age"\n=0.1\n"Tom", 24\n"Jerry", 27\n')
 
 name = data['A0']  # Retrieves the value in column A, row 0 => 'Tom'
 age = data['B1']   # Retrieves the value in column B, row 1 => 27
diff --git a/python/requirements.txt b/python/requirements.txt
new file mode 100644
index 0000000..b1d7d60
--- /dev/null
+++ b/python/requirements.txt
@@ -0,0 +1,3 @@
+hypothesis==6.54.5
+twine==4.0.1
+wheel==0.37.1
diff --git a/python/setup.py b/python/setup.py
new file mode 100644
index 0000000..edfd3b0
--- /dev/null
+++ b/python/setup.py
@@ -0,0 +1,21 @@
+import pathlib
+from setuptools import setup, find_packages
+
+
+BASE_DIR = pathlib.Path(__file__).parent
+
+config = {
+    'name': 'tablo-fyi',
+    'version': '0.4.5',
+    'description': "A tabular data format that doesn't make you want to pull your hair out",
+    'long_description': (BASE_DIR / "README.md").read_text(),
+    'long_description_content_type': 'text/markdown',
+    'author': 'Brendan Berg',
+    'author_email': 'brendan@berg.industries',
+    'license': 'MIT',
+    'url': 'https://github.com/jotjotdotio/tablo',
+    'install_requires': [],
+    'packages': find_packages()
+}
+
+setup(**config)

From 6a10fe9f6c104bf86a835c8a563d5f818e40deae Mon Sep 17 00:00:00 2001
From: Brendan Berg <brendan@berg.industries>
Date: Thu, 13 Oct 2022 21:52:52 -0400
Subject: [PATCH 6/6] wip

---
 .gitignore                 |  2 ++
 python/getting-started.md  | 21 +++++++++++++++++++++
 python/test/__init__.py    |  4 ++++
 python/test/header.test.py | 11 +++++++++++
 python/test/parser.test.py |  0
 5 files changed, 38 insertions(+)
 create mode 100644 python/getting-started.md
 create mode 100644 python/test/__init__.py
 create mode 100644 python/test/header.test.py
 create mode 100644 python/test/parser.test.py

diff --git a/.gitignore b/.gitignore
index 4c13563..e64a58a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,6 +5,8 @@ __pycache__
 python/build
 python/dist
 *.egg-info
+*.pyc
+*.pyo
 
 # TypeScript
 node_modules
diff --git a/python/getting-started.md b/python/getting-started.md
new file mode 100644
index 0000000..cd6aeec
--- /dev/null
+++ b/python/getting-started.md
@@ -0,0 +1,21 @@
+# Development Guide
+
+## Installing
+
+1. Run `python -m pip install -r requirements.txt` to install all development dependencies
+
+## Testing
+
+1. Run `python -m unittest test` to run the test suite
+
+## Building
+
+1. Run `python setup.py sdist bdist_wheel` to build the source and binary
+distributions
+
+## Distributing
+
+1. Run `twine upload -r testpypi dist/*` to upload build artifacts the
+PyPI test environment
+2. Verify that everything looks right
+3. Run `twine upload dist/*` to upload build artifacts to PyPI
diff --git a/python/test/__init__.py b/python/test/__init__.py
new file mode 100644
index 0000000..1c35782
--- /dev/null
+++ b/python/test/__init__.py
@@ -0,0 +1,4 @@
+import unittest
+
+if __name__ == '__main__':
+    unittest.main()
\ No newline at end of file
diff --git a/python/test/header.test.py b/python/test/header.test.py
new file mode 100644
index 0000000..4c37a11
--- /dev/null
+++ b/python/test/header.test.py
@@ -0,0 +1,11 @@
+from hypothesis import given
+from hypothesis.strategies import text
+
+from tablo import parse
+
+
+@given(text())
+def test_decode_inverts_encode(s):
+    input = f'"{s}"\n=0.1\n'
+    offset, result, error = parse.header(input, 0)
+    assert(result.header[0] == s)
diff --git a/python/test/parser.test.py b/python/test/parser.test.py
new file mode 100644
index 0000000..e69de29