From 220c58244ef1a7b52aabbb545f1e1d303b34fb08 Mon Sep 17 00:00:00 2001
From: Mike Fuller <mike@finops.org>
Date: Wed, 29 Oct 2025 07:00:59 +1100
Subject: [PATCH] Added transpile helper for those who need to see our example
 sql in a different engine type. Signed-off-by: Mike Fuller <mike@finops.org>

---
 .../supported_features/helpers/README.md      |  70 +++++++++
 .../helpers/requirements.txt                  |   1 +
 .../helpers/sql_transpile.py                  | 140 ++++++++++++++++++
 3 files changed, 211 insertions(+)
 create mode 100644 specification/supported_features/helpers/README.md
 create mode 100644 specification/supported_features/helpers/requirements.txt
 create mode 100755 specification/supported_features/helpers/sql_transpile.py

diff --git a/specification/supported_features/helpers/README.md b/specification/supported_features/helpers/README.md
new file mode 100644
index 000000000..dc8215488
--- /dev/null
+++ b/specification/supported_features/helpers/README.md
@@ -0,0 +1,70 @@
+# Helper Scripts
+
+This directory contains utility scripts that assist with maintaining and processing the FOCUS specification documentation.
+
+## Requirements
+
+- Python 3.7+
+- Dependencies listed in `requirements.txt`
+
+### Installation
+
+Before using any helper scripts, install the required dependencies:
+
+```bash
+pip install -r requirements.txt
+```
+
+## Available Scripts
+
+### sql_transpile.py
+
+A Python script that finds SQL code blocks in Markdown files and transpiles them between different SQL dialects using SQLGlot.
+
+#### Purpose
+
+- Extract SQL code blocks from Markdown documentation
+- Detect SQL dialects automatically or use provided hints
+- Transpile SQL between different database dialects (BigQuery, Trino, T-SQL, etc.)
+- Validate SQL syntax across different platforms
+
+#### Usage
+
+```bash
+# Transpile all SQL blocks in markdown files to T-SQL
+./sql_transpile.py ../*.md --to tsql
+
+# List all SQL blocks without transpiling
+./sql_transpile.py ../*.md --list
+
+# Transpile with dialect preference for detection
+./sql_transpile.py ../*.md --to bigquery --prefer trino
+
+# Process specific files
+./sql_transpile.py ../file1.md ../file2.md --to postgres
+```
+
+#### Options
+
+- `files`: Markdown files or glob patterns to process
+- `--to`: Target SQL dialect (default: ansi)
+- `--prefer`: Preferred dialect for detection (can be used multiple times)
+- `--list`: Only list SQL blocks found, don't transpile
+
+#### Supported Dialects
+
+BigQuery, Trino, Presto, DuckDB, MySQL, PostgreSQL, Snowflake, T-SQL, Spark, Hive, Redshift, SQLite, Oracle, and ANSI SQL.
+
+#### Dialect Hints
+
+You can specify a dialect hint in your SQL code blocks:
+
+````markdown
+```sql bigquery
+SELECT * FROM dataset.table
+```
+
+```sql:trino
+SELECT * FROM catalog.schema.table  
+```
+````
diff --git a/specification/supported_features/helpers/requirements.txt b/specification/supported_features/helpers/requirements.txt
new file mode 100644
index 000000000..883c74369
--- /dev/null
+++ b/specification/supported_features/helpers/requirements.txt
@@ -0,0 +1 @@
+sqlglot>=20.0.0
\ No newline at end of file
diff --git a/specification/supported_features/helpers/sql_transpile.py b/specification/supported_features/helpers/sql_transpile.py
new file mode 100755
index 000000000..91784cf9f
--- /dev/null
+++ b/specification/supported_features/helpers/sql_transpile.py
@@ -0,0 +1,140 @@
+#!/usr/bin/env python3
+import argparse
+import glob
+import re
+import sys
+from typing import Optional, List, Tuple
+
+import sqlglot
+from sqlglot.errors import ParseError
+
+# Prefer the engines you most commonly use by ordering them first
+CANDIDATE_DIALECTS: List[str] = [
+    "bigquery", "trino", "presto", "duckdb", "mysql", "postgres",
+    "snowflake", "tsql", "spark", "hive", "redshift", "sqlite", "oracle", None
+]
+
+# Matches fenced code blocks like:
+# ```sql
+# ...code...
+# ```
+# and also allows a dialect hint after `sql`, e.g.:
+# ```sql trino
+# ```sql:bigquery
+FENCE_RE = re.compile(
+    r"```sql(?:[ \t]+([\w:-]+))?[ \t]*\n(.*?)\n```",
+    re.IGNORECASE | re.DOTALL
+)
+
+def parse_fenced_sql(md_text: str) -> List[Tuple[int, Optional[str], str]]:
+    """
+    Return a list of (block_index, hint, sql_text) for each fenced ```sql block.
+    'hint' is an optional dialect hint following 'sql' in the fence.
+    """
+    blocks = []
+    for i, m in enumerate(FENCE_RE.finditer(md_text), start=1):
+        raw_hint = m.group(1) or ""
+        # Normalize hint: accept "trino", "sql:trino", "sql-trino", "trino:xyz"
+        hint = raw_hint.strip().lower()
+        hint = hint.replace("sql:", "").replace("sql-", "")
+        hint = hint.split(":")[0] if ":" in hint else hint  # keep left-most token if colon-separated
+        hint = hint or None
+        sql_text = m.group(2).strip()
+        blocks.append((i, hint, sql_text))
+    return blocks
+
+def detect_dialect(sql: str) -> Optional[str]:
+    """Heuristically detect a dialect by attempting to parse with common dialects."""
+    successes = []
+    for d in CANDIDATE_DIALECTS:
+        try:
+            sqlglot.parse(sql, read=d)
+            successes.append(d)
+        except ParseError:
+            continue
+    return successes[0] if successes else None
+
+def transpile_sql(sql: str, read: Optional[str], write: str) -> str:
+    if write is not None and write.lower() == "ansi":
+        write = None  # SQLGlot uses None for ANSI
+    if read is not None and read.lower() == "ansi":
+        read = None
+    parts = sqlglot.transpile(
+        sql,
+        read=read,     # None => let SQLGlot assume ANSI-ish
+        write=write,
+        pretty=True,
+    )
+    return ";\n".join(p.strip() for p in parts if p.strip())
+
+def main():
+    ap = argparse.ArgumentParser(
+        description="Find ```sql blocks in Markdown, detect dialect, and transpile with SQLGlot."
+    )
+    ap.add_argument("files", nargs='+', help="Markdown files or glob patterns (e.g., 'docs/*.md' or file1.md file2.md)")
+    ap.add_argument("--to", default="ansi", help="Target dialect (default: ansi)")
+    ap.add_argument("--prefer", action="append", default=[],
+                    help="Prefer this dialect in detection (can be given multiple times).")
+    ap.add_argument("--list", action="store_true",
+                    help="Only list sql blocks found, do not transpile.")
+    args = ap.parse_args()
+
+    # If user provided preferred dialects, move them to the front of the candidate list
+    if args.prefer:
+        preferred = [d.lower() for d in args.prefer]
+        uniq = []
+        for d in preferred + CANDIDATE_DIALECTS:
+            if d not in uniq:
+                uniq.append(d)
+        CANDIDATE_DIALECTS[:] = uniq
+
+    # Collect all files from patterns and individual files
+    all_files = []
+    for pattern_or_file in args.files:
+        matched = glob.glob(pattern_or_file)
+        if matched:
+            all_files.extend(matched)
+        else:
+            # If no glob match, treat as individual file
+            all_files.append(pattern_or_file)
+    
+    files = sorted(set(all_files))  # Remove duplicates and sort
+    if not files:
+        print("No files found.", file=sys.stderr)
+        sys.exit(1)
+
+    for path in files:
+        with open(path, "r", encoding="utf-8") as f:
+            md_text = f.read()
+
+        blocks = parse_fenced_sql(md_text)
+
+        print(f"=== {path} ===")
+        if not blocks:
+            print("(no ```sql code blocks found)\n")
+            continue
+
+        for idx, hint, sql in blocks:
+            print(f"[block #{idx}]")
+            if args.list:
+                print(f"  hint: {hint or '(none)'}")
+                print(f"  first line: {sql.splitlines()[0] if sql.splitlines() else ''}")
+                continue
+            if hint not in CANDIDATE_DIALECTS:
+                hint = None  # ignore unrecognized hints
+            read = hint or detect_dialect(sql)
+            
+            detected_label = read or "ansi (fallback)"
+            try:
+                out = transpile_sql(sql, read=read, write=args.to)
+                print(f"Detected: {detected_label} -> {args.to}")
+                print(out)
+            except ParseError as e:
+                print(f"Detected: {detected_label} -> {args.to}")
+                print(f"ERROR: {e}", file=sys.stderr)
+            print()  # spacing between blocks
+
+        print()  # spacing between files
+
+if __name__ == "__main__":
+    main()