ONEcampaign · jm-rivera · Oct 27, 2025 · Oct 24, 2025 · Oct 27, 2025 · Oct 27, 2025
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
@@ -41,4 +41,4 @@ jobs:
       - name: Publish to PyPI (uv)
         env:
           UV_PUBLISH_TOKEN: ${{ secrets.PYPI_API_TOKEN }}
-        run: uv publish
+        run: uv publish
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -0,0 +1,36 @@
+name: Tests
+
+on:
+  pull_request:
+    branches: [ main]
+
+jobs:
+  test:
+    name: Run Tests
+    runs-on: ${{ matrix.os }}
+
+    strategy:
+      matrix:
+        os: [ubuntu-latest, macos-latest, windows-latest]
+        python-version: ["3.11", "3.12", "3.13"]
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Set up uv
+        uses: astral-sh/setup-uv@v6
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      - name: Install dependencies
+        run: uv sync --all-groups
+
+      - name: Run unit tests
+        run: uv run pytest tests/ -n auto -m "not integration" -v
+
+      - name: Integration Tests
+        if: github.event_name == 'pull_request' && matrix.python-version == '3.12' && matrix.os == 'ubuntu-latest'
+        run: uv run pytest tests/ -v
+        env:
+          ODA_READER_CACHE_DIR: ${{ runner.temp }}/oda_cache
diff --git a/.gitignore b/.gitignore
@@ -160,3 +160,5 @@ cython_debug/
 .idea/
 .DS_Store
 /src/oda_reader/dev_tests.py
+/docs/site
+/docs/plans
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -0,0 +1,119 @@
+# Pre-commit hooks for ODA Reader Package
+#
+# Installation:
+#   1. Install pre-commit: `uv sync --group dev`
+#   2. Install hooks: `uv run pre-commit install`
+#
+# Usage:
+#   - Hooks run automatically on `git commit`
+#   - Run manually on all files: `uv run pre-commit run --all-files`
+#   - Run on specific files: `uv run pre-commit run --files src/file.py`
+#   - Update hooks: `uv run pre-commit autoupdate`
+#   - Skip hooks temporarily: `git commit --no-verify`
+
+repos:
+  # ============================================================================
+  # Ruff - Fast Python linter and formatter (replaces black, isort, flake8)
+  # ============================================================================
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.8.4
+    hooks:
+      # Run the linter
+      - id: ruff
+        args: [--fix]
+      # Run the formatter
+      - id: ruff-format
+
+  # ============================================================================
+  # Built-in pre-commit hooks for file quality
+  # ============================================================================
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v5.0.0
+    hooks:
+      # Remove trailing whitespace
+      - id: trailing-whitespace
+        args: [--markdown-linebreak-ext=md]
+
+      # Ensure files end with a newline
+      - id: end-of-file-fixer
+
+      # Prevent mixed line endings (Unix vs Windows)
+      - id: mixed-line-ending
+        args: [--fix=lf]
+
+      # Check for files that would conflict on case-insensitive filesystems
+      - id: check-case-conflict
+
+      # Check for merge conflict strings
+      - id: check-merge-conflict
+
+      # Check for debug statements (pdb, breakpoint, etc.)
+      - id: debug-statements
+
+      # Validate Python syntax
+      - id: check-ast
+
+      # Check for proper shebang formatting
+      - id: check-shebang-scripts-are-executable
+
+      # Prevent large files from being committed (default 500kb)
+      - id: check-added-large-files
+        args: [--maxkb=1000]  # Allow up to 1MB files
+        exclude: |
+          (?x)^(
+              .*\.parquet|
+              .*\.feather|
+              uv\.lock
+          )$
+
+  # ============================================================================
+  # JSON validation (critical for schema mappings)
+  # ============================================================================
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v5.0.0
+    hooks:
+      # Check JSON syntax
+      - id: check-json
+
+      # Pretty-format JSON files
+      - id: pretty-format-json
+        args: [--autofix, --indent=2, --no-sort-keys]
+        exclude: |
+          (?x)^(
+              \.vscode/.*|
+              \.claude/.*
+          )$
+
+  # ============================================================================
+  # YAML validation (for GitHub Actions and MkDocs)
+  # ============================================================================
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v5.0.0
+    hooks:
+      # Check YAML syntax
+      - id: check-yaml
+        args: [--allow-multiple-documents]
+
+  # ============================================================================
+  # Security checks
+  # ============================================================================
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v5.0.0
+    hooks:
+      # Detect private keys
+      - id: detect-private-key
+
+  # ============================================================================
+  # Python-specific checks
+  # ============================================================================
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v5.0.0
+    hooks:
+      # Check for common Python mistakes
+      - id: check-builtin-literals
+
+      # Check docstring is first
+      - id: check-docstring-first
+
+      # Validate pyproject.toml
+      - id: check-toml
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -19,18 +19,18 @@ a type error.
 for the bulk Multisystem data. This is a small fix to address that.
 
 ## 1.1.4 (2025-04-22)
-- fix small cache bug 
+- fix small cache bug
 
 ## 1.1.3 (2025-04-22)
-- Small caching improvements 
+- Small caching improvements
 
 ## 1.1.2 (2025-04-22)
 - Extends caching to bulk downloaded files.
 - Other minor tweaks to how caching works.
 
 ## 1.1.1 (2025-04-16)
 - Manages an issue created by the OECD when they are about to release new data. In that case
-certain dataflows return `NoRecordsFound`, even though the query is valid for lower dataflows. 
+certain dataflows return `NoRecordsFound`, even though the query is valid for lower dataflows.
 This version of `oda_reader` defends against that.
 
 
@@ -88,4 +88,4 @@ and the wrong schema (dac1) was loaded.
 
 ## 0.1.0 (2024-05-05)
 - Initial release. It includes a basic implementation of an API call for DAC1 and DAC2.
-- This release includes tools to translate the API response into the old .Stat schema.
+- This release includes tools to translate the API response into the old .Stat schema.
diff --git a/README.md b/README.md
@@ -9,7 +9,7 @@ The OECD DAC Data Importer
 the **OECD data explorer API** and bulk downloads.
 
 It allows for easy, programmatic access to OECD DAC data in python. It is designed for policy
-analysts, data analysts, researchers and students who need easy and programmatic access to 
+analysts, data analysts, researchers and students who need easy and programmatic access to
 OECD DAC data.
 
 This documentation will walk you through how to set up and use ODA Reader.
@@ -246,7 +246,7 @@ crs_data = download_crs(
 
 The filtering can get quite specific. For example, the following
 query gets disbursements for ODA grants from Germany to Nigeria for
-primary education, provided through multilateral organisations, in 
+primary education, provided through multilateral organisations, in
 constant prices:
 
 ```python
@@ -266,8 +266,8 @@ crs_data = download_crs(
 )
 ```
 
-The data-explorer API can also return semi-aggregates, built from the CRS microdata. 
-That is the data that is shown online through the data-explorer. 
+The data-explorer API can also return semi-aggregates, built from the CRS microdata.
+That is the data that is shown online through the data-explorer.
 
 You can get that view of the data using the ODA Reader package. However, the filters must
 be used to avoid double counting.
@@ -540,7 +540,7 @@ When using ODA Reader, you can apply filters to refine the data you retrieve fro
 
 Filters allow you to specify subsets of data, making it easy to focus on the information that is most relevant to your needs.
 
-Filters are specified as a dictionary, with keys representing the filter categories (such as donor, recipient, sector, etc.) and values representing the criteria to match, provided as single values (like a year, or a code), or lists of values (like multiple donors or multiple sectors). 
+Filters are specified as a dictionary, with keys representing the filter categories (such as donor, recipient, sector, etc.) and values representing the criteria to match, provided as single values (like a year, or a code), or lists of values (like multiple donors or multiple sectors).
 
 You can use the `get_available_filters()` function to see the available filter parameters that
 can be used for a specific dataset. Note that (for now) all filter values must be provided using

diff --git a/docs/README.md b/docs/README.md
@@ -0,0 +1,53 @@
+# ODA Reader Documentation
+
+This directory contains the MkDocs documentation for ODA Reader.
+
+## Building Locally
+
+Install dependencies:
+
+```bash
+cd ..  # back to project root
+uv sync --group docs
+```
+
+Serve locally:
+
+```bash
+cd docs
+uv run mkdocs serve
+```
+
+Visit http://127.0.0.1:8000
+
+## Building for Production
+
+```bash
+cd docs
+uv run mkdocs build
+```
+
+Output is in `site/` directory.
+
+## Testing Documentation Examples
+
+Run example test scripts:
+
+```bash
+cd ..  # back to project root
+uv run python docs/examples/getting_started_examples.py
+uv run python docs/examples/filtering_examples.py
+```
+
+All examples should pass before committing documentation updates.
+
+## Documentation Structure
+
+- `mkdocs.yml` - MkDocs configuration
+- `docs/` - All markdown content
+- `examples/` - Test scripts for documentation examples
+- `plans/` - Design documents and implementation plans
+
+## Deployment
+
+(Add deployment instructions for your hosting platform here)