jotjotdotio · brendanberg · Sep 11, 2022 · Sep 12, 2022 · Sep 12, 2022 · Sep 13, 2022
diff --git a/.gitignore b/.gitignore
@@ -1,2 +1,12 @@
 .DS_Store
+
+# Python
+__pycache__
+python/build
+python/dist
+*.egg-info
+*.pyc
+*.pyo
+
+# TypeScript
 node_modules
diff --git a/python/README.md b/python/README.md
@@ -0,0 +1,66 @@
+# The Tablo File Format
+
+__tablo__ is a plain text interchange format for tabular data. It is more
+expressive than CSV while remaining easy for people to read and write.
+
+It adds explicit headers, datatypes, and cell formatting to address
+shortcomings of delimiter-separated formats like CSV or TSV. __tablo__ solves
+delimiter collision issues by having well-defined quoting and escaping rules
+that are familiar to users of formats like JSON or common programming languages
+like Python or JavaScript.
+
+## What's Wrong with CSV?
+
+> the Microsoft version of CSV is a textbook example of how *not* to design a
+> textual file format
+
+—Eric S. Raymond, [*The Art of Unix Programming*][taoup]
+
+Stated simply, there is no single CSV standard. It exists as a myriad of
+informal variants whose implementation varies from vendor to vendor. Character
+encodings and escape sequences vary from one application to the next, and the
+ambiguities in various edge cases means that the output of one application may
+not be readable by another.
+
+__tablo__ is designed to solve a number of ambiguities and shortcomings in CSV.
+
+One of the first obvious differences is that header rows are optional, but
+well-defined. In other words, a document may or may not contain a header, but
+determining whether the document includes a header is always unambiguous.
+
+A crucial aspect of the __tablo__ format is that it doesn't make assumptions
+about the type of data in each cell. If a value is surrounded by quotes, it is 
+*always* a string. If a value is a number without quotes, it is *always* a
+number. If a value is an ISO-8601 formatted date preceded by a hash mark, it
+is *always* a datetime.
+
+## Installation
+
+Install with `pip`:
+
+```
+python -m pip install tablo-fyi
+```
+
+## Usage
+
+Parsing is accomplished with the `parse` function.
+
+```
+import tablo
+
+data = tablo.parse('"name", "age"\n=0.1\n"Tom", 24\n"Jerry", 27\n')
+
+name = data['A0']  # Retrieves the value in column A, row 0 => 'Tom'
+age = data['B1']   # Retrieves the value in column B, row 1 => 27
+```
+
+## More Information
+
+More information can be found in [the __tablo__ specification][spec], and a
+set of [example files][examples] can be found in the [project repository][repo].
+
+[taoup]: http://www.catb.org/esr/writings/taoup/html/ch05s02.html#id2901882
+[spec]: https://tablo.fyi
+[examples]: https://github.com/jotjotdotio/tablo/tree/main/examples
+[repo]: https://github.com/jotjotdotio/tablo
diff --git a/python/getting-started.md b/python/getting-started.md
@@ -0,0 +1,21 @@
+# Development Guide
+
+## Installing
+
+1. Run `python -m pip install -r requirements.txt` to install all development dependencies
+
+## Testing
+
+1. Run `python -m unittest test` to run the test suite
+
+## Building
+
+1. Run `python setup.py sdist bdist_wheel` to build the source and binary
+distributions
+
+## Distributing
+
+1. Run `twine upload -r testpypi dist/*` to upload build artifacts the
+PyPI test environment
+2. Verify that everything looks right
+3. Run `twine upload dist/*` to upload build artifacts to PyPI
diff --git a/python/requirements.txt b/python/requirements.txt
@@ -0,0 +1,3 @@
+hypothesis==6.54.5
+twine==4.0.1
+wheel==0.37.1
diff --git a/python/setup.py b/python/setup.py
@@ -0,0 +1,21 @@
+import pathlib
+from setuptools import setup, find_packages
+
+
+BASE_DIR = pathlib.Path(__file__).parent
+
+config = {
+    'name': 'tablo-fyi',
+    'version': '0.4.5',
+    'description': "A tabular data format that doesn't make you want to pull your hair out",
+    'long_description': (BASE_DIR / "README.md").read_text(),
+    'long_description_content_type': 'text/markdown',
+    'author': 'Brendan Berg',
+    'author_email': 'brendan@berg.industries',
+    'license': 'MIT',
+    'url': 'https://github.com/jotjotdotio/tablo',
+    'install_requires': [],
+    'packages': find_packages()
+}
+
+setup(**config)
diff --git a/python/tablo/__init__.py b/python/tablo/__init__.py
@@ -0,0 +1,20 @@
+from tablo.parse import document
+from tablo.serializers import Serializer, TabloSerializer
+
+
+defaultSerializer: Serializer = TabloSerializer
+
+def parse(input: str):
+    _ignore, data, error = document(input, 0)
+
+    if error:
+        raise ValueError(error)
+    else:
+        return data
+
+def serialize(table):
+    return defaultSerializer.serialize(table)
+
+def setSerializer(serializer: Serializer):
+    global defaultSerializer
+    defaultSerializer = serializer
diff --git a/python/tablo/combinators.py b/python/tablo/combinators.py
@@ -0,0 +1,69 @@
+from typing import Iterable
+
+
+def concat(*rules):
+    def combinator(input: str, offset: int):
+        cursor = offset
+        results = []
+
+        for rule in rules:
+            newOffset, match, error = rule(input, cursor)
+
+            if error:
+                return (offset, None, error)
+            else:
+                cursor = newOffset
+
+                if isinstance(match, Iterable) and not isinstance(match, str):
+                    results.extend(match)
+                else:
+                    results.append(match)
+
+        return (cursor, results, None)
+
+    return combinator
+
+def altern(*rules):
+    def combinator(input: str, offset: int):
+        cursor = offset
+        errors = []
+
+        for rule in rules:
+            offset, result, error = rule(input, cursor)
+
+            if not error:
+                return (offset, result, None)
+            else:
+                cursor = offset
+                errors.extend(error)
+
+        return (offset, None, f'one of {",".join(errors)}')
+
+    return combinator
+
+def repeat(*rules):
+    def combinator(input: str, offset: int):
+        cursor = offset
+        results = []
+        error = None
+
+        while True:
+            first, *rest = rules
+            cursor, result, error = first(input, cursor)
+
+            if error:
+                error = None
+                break
+            else:
+                results.append(result)
+
+            cursor, result, error = concat(*rest)(input, cursor)
+
+            if error:
+                return (offset, None, error)
+            else:
+                results.extend(result)
+
+        return (cursor, results, error)
+
+    return combinator
diff --git a/python/tablo/format.py b/python/tablo/format.py
@@ -0,0 +1,81 @@
+import re
+
+
+class TableFormat(object):
+    rules = None
+
+    def __init__(self, rules={}):
+        rule = re.compile(r'^([A-Z]+)(?::([A-Z]+))?$|^([0-9]+)(?::([0-9]+))?$|^([A-Z]+)([0-9]+)(?::([A-Z]+)([0-9]+))?$')
+
+        self.rules = []
+
+        for key, props in rules.items():
+            if match := rule.match(key):
+                start_row, end_row, start_col, end_col = (0, -1, 0, -1)
+
+                groups = match.groups()
+
+                if groups[0] is not None:
+                    start_col = self._alpha_to_int(groups[0])
+                    end_col = self._alpha_to_int(groups[1]) if groups[1] is not None else start_col
+                elif groups[2] is not None:
+                    start_row = int(groups[2])
+                    end_row = int(groups[3]) if groups[3] is not None else start_row
+                elif groups[4] is not None:
+                    start_col = self._alpha_to_int(groups[4])
+                    start_row = int(groups[5])
+                    end_col = self._alpha_to_int(groups[6]) if groups[6] is not None else start_col
+                    end_row = int(groups[7]) if groups[7] is not None else start_row
+
+                if ((start_row <= end_row or end_row == -1) and 
+                        (start_col <= end_col or end_col == -1)):
+                    bounds = (start_col, end_col, start_row, end_row)
+                    self.rules.append((
+                        bounds, key, props
+                    ))
+
+
+
+    def get_props(self, col, row):
+        def applicable(rule):
+            bounds, _key, _props = rule
+            start_col, end_col, start_row, end_row = bounds
+
+            return (
+                row >= start_row and
+                (row <= end_row or end_row == -1) and
+                numeric_column >= start_col and
+                (numeric_column <= end_col or end_col == -1)
+            )
+
+        numeric_column = self._alpha_to_int(col)
+
+        result = []
+
+        for rule in filter(applicable, self.rules):
+            _bounds, _key, props = rule
+            result.extend(props)
+
+        return result
+
+    def get_rules(self):
+        rules = {}
+
+        for rule in self.rules:
+            _bounds, key, props = rule
+
+            if key in rules:
+                rules[key].extend(props)
+            else:
+                rules[key] = props
+
+        return rules
+
+    def _alpha_to_int(self, index: str):
+        alphabet = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
+        value = 0
+
+        for idx, char in enumerate(reversed(index)):
+            value += alphabet.index(char) * 26 ** idx
+
+        return value