Preserve the ErrorContext cause chain across a defer on the unwind pa… #6092
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: CI - BAML Language | |
| permissions: | |
| contents: read | |
| id-token: write | |
| pull-requests: write | |
| on: | |
| push: | |
| branches: [main, canary] | |
| pull_request: | |
| merge_group: | |
| types: [checks_requested] | |
| workflow_dispatch: | |
| concurrency: | |
| group: ${{ github.workflow }}-${{ github.ref_name }}-${{ github.event.pull_request.number || github.sha }} | |
| cancel-in-progress: true | |
| defaults: | |
| run: | |
| shell: bash | |
| env: | |
| CARGO_INCREMENTAL: 0 | |
| CARGO_NET_RETRY: 10 | |
| CARGO_TERM_COLOR: always | |
| RUSTUP_MAX_RETRIES: 10 | |
| jobs: | |
| determine_changes: | |
| name: "Determine changes" | |
| runs-on: blacksmith-4vcpu-ubuntu-2404 | |
| outputs: | |
| # Flag that is raised when any code in baml_language is changed | |
| code: ${{ steps.check_code.outputs.changed }} | |
| # Flag for lexer changes | |
| lexer: ${{ steps.check_lexer.outputs.changed }} | |
| # Flag for parser changes | |
| parser: ${{ steps.check_parser.outputs.changed }} | |
| # Flag for HIR changes | |
| hir: ${{ steps.check_hir.outputs.changed }} | |
| # Flag for THIR changes | |
| thir: ${{ steps.check_thir.outputs.changed }} | |
| # Flag for codegen changes | |
| codegen: ${{ steps.check_codegen.outputs.changed }} | |
| # Flag for docs/frontend changes | |
| docs: ${{ steps.check_docs.outputs.changed }} | |
| # Flag for typescript2/webview changes | |
| webview: ${{ steps.check_webview.outputs.changed }} | |
| # Flag for typescript2 grammar changes | |
| grammar: ${{ steps.check_grammar.outputs.changed }} | |
| # Flag for unsafe code changes (bex_heap) - triggers Miri tests | |
| unsafe: ${{ steps.check_unsafe.outputs.changed }} | |
| # Flag for proto codegen changes - triggers generated file sync check | |
| proto: ${{ steps.check_proto.outputs.changed }} | |
| # Flag for profiling-ring changes (bex_events) - triggers loom + miri | |
| prof: ${{ steps.check_prof.outputs.changed }} | |
| # Flag that is raised when perf benchmarks (CodSpeed) should run: always | |
| # after merge (push to main/canary) and on manual dispatch; on PRs only | |
| # when the PR title/body or a commit message opts in. | |
| run_perf: ${{ steps.check_perf.outputs.run }} | |
| steps: | |
| - uses: useblacksmith/checkout@v1 | |
| with: | |
| fetch-depth: 0 | |
| persist-credentials: false | |
| - name: Determine merge base | |
| id: merge_base | |
| env: | |
| BASE_REF: ${{ github.event.pull_request.base.ref || 'canary' }} | |
| run: | | |
| sha=$(git merge-base HEAD "origin/${BASE_REF}") | |
| echo "sha=${sha}" >> "$GITHUB_OUTPUT" | |
| - name: Check if code changed | |
| id: check_code | |
| env: | |
| MERGE_BASE: ${{ steps.merge_base.outputs.sha }} | |
| run: | | |
| if git diff --quiet "${MERGE_BASE}...HEAD" -- \ | |
| ':baml_language/**' \ | |
| ':scripts/baml-language-version' \ | |
| ':scripts/baml-wrapper-version' \ | |
| ':scripts/baml-release-manifests' \ | |
| ':scripts/baml-package-manager-artifacts' \ | |
| ':scripts/install.sh' \ | |
| ':scripts/install.ps1' \ | |
| ':packaging/aur/**' \ | |
| ':tools/pkg_boundaryml_com/**' \ | |
| ':.github/workflows/ci.yaml' \ | |
| ':.github/workflows/release-baml-language.yml' \ | |
| ':.github/workflows/build2-python-sdk.reusable.yaml' \ | |
| ':.github/workflows/build2-nodejs-sdk.reusable.yaml' \ | |
| ':.github/workflows/cargo-tests.reusable.yaml' \ | |
| ':.github/workflows/size-gate.reusable.yaml' \ | |
| ':.github/workflows/wasm-pack-tests.reusable.yaml' \ | |
| ; then | |
| echo "changed=false" >> "$GITHUB_OUTPUT" | |
| else | |
| echo "changed=true" >> "$GITHUB_OUTPUT" | |
| fi | |
| - name: Check if lexer code changed | |
| id: check_lexer | |
| env: | |
| MERGE_BASE: ${{ steps.merge_base.outputs.sha }} | |
| run: | | |
| if git diff --quiet "${MERGE_BASE}...HEAD" -- \ | |
| ':baml_language/crates/baml_compiler_lexer/**' \ | |
| ':baml_language/crates/baml_base/**' \ | |
| ':baml_language/Cargo.toml' \ | |
| ':baml_language/Cargo.lock' \ | |
| ; then | |
| echo "changed=false" >> "$GITHUB_OUTPUT" | |
| else | |
| echo "changed=true" >> "$GITHUB_OUTPUT" | |
| fi | |
| - name: Check if parser code changed | |
| id: check_parser | |
| env: | |
| MERGE_BASE: ${{ steps.merge_base.outputs.sha }} | |
| run: | | |
| if git diff --quiet "${MERGE_BASE}...HEAD" -- \ | |
| ':baml_language/crates/baml_compiler_parser/**' \ | |
| ':baml_language/crates/baml_compiler_syntax/**' \ | |
| ':baml_language/crates/baml_compiler_lexer/**' \ | |
| ':baml_language/crates/baml_base/**' \ | |
| ; then | |
| echo "changed=false" >> "$GITHUB_OUTPUT" | |
| else | |
| echo "changed=true" >> "$GITHUB_OUTPUT" | |
| fi | |
| - name: Check if HIR code changed | |
| id: check_hir | |
| env: | |
| MERGE_BASE: ${{ steps.merge_base.outputs.sha }} | |
| run: | | |
| if git diff --quiet "${MERGE_BASE}...HEAD" -- \ | |
| ':baml_language/crates/baml_compiler_hir/**' \ | |
| ':baml_language/crates/baml_workspace/**' \ | |
| ':baml_language/crates/baml_compiler_parser/**' \ | |
| ':baml_language/crates/baml_base/**' \ | |
| ; then | |
| echo "changed=false" >> "$GITHUB_OUTPUT" | |
| else | |
| echo "changed=true" >> "$GITHUB_OUTPUT" | |
| fi | |
| - name: Check if THIR code changed | |
| id: check_thir | |
| env: | |
| MERGE_BASE: ${{ steps.merge_base.outputs.sha }} | |
| run: | | |
| if git diff --quiet "${MERGE_BASE}...HEAD" -- \ | |
| ':baml_language/crates/baml_thir/**' \ | |
| ':baml_language/crates/baml_compiler_hir/**' \ | |
| ':baml_language/crates/baml_base/**' \ | |
| ; then | |
| echo "changed=false" >> "$GITHUB_OUTPUT" | |
| else | |
| echo "changed=true" >> "$GITHUB_OUTPUT" | |
| fi | |
| - name: Check if codegen code changed | |
| id: check_codegen | |
| env: | |
| MERGE_BASE: ${{ steps.merge_base.outputs.sha }} | |
| run: | | |
| if git diff --quiet "${MERGE_BASE}...HEAD" -- \ | |
| ':baml_language/crates/baml_compiler_emit/**' \ | |
| ':baml_language/crates/baml_thir/**' \ | |
| ':baml_language/crates/baml_base/**' \ | |
| ; then | |
| echo "changed=false" >> "$GITHUB_OUTPUT" | |
| else | |
| echo "changed=true" >> "$GITHUB_OUTPUT" | |
| fi | |
| - name: Check if docs changed | |
| id: check_docs | |
| env: | |
| MERGE_BASE: ${{ steps.merge_base.outputs.sha }} | |
| run: | | |
| if git diff --quiet "${MERGE_BASE}...HEAD" -- \ | |
| ':fern/**' \ | |
| ':typescript/apps/ask-baml-client/**' \ | |
| ':typescript/apps/sage-backend/**' \ | |
| ':.github/workflows/docs.reusable.yaml' \ | |
| ; then | |
| echo "changed=false" >> "$GITHUB_OUTPUT" | |
| else | |
| echo "changed=true" >> "$GITHUB_OUTPUT" | |
| fi | |
| - name: Check if webview changed | |
| id: check_webview | |
| env: | |
| MERGE_BASE: ${{ steps.merge_base.outputs.sha }} | |
| run: | | |
| if git diff --quiet "${MERGE_BASE}...HEAD" -- \ | |
| ':typescript2/app-vscode-webview/**' \ | |
| ':typescript2/pkg-playground/**' \ | |
| ':typescript2/pkg-proto/**' \ | |
| ':baml_language/crates/bridge_ctypes/types/**' \ | |
| ':.github/workflows/webview-tests.reusable.yaml' \ | |
| ; then | |
| echo "changed=false" >> "$GITHUB_OUTPUT" | |
| else | |
| echo "changed=true" >> "$GITHUB_OUTPUT" | |
| fi | |
| - name: Check if grammar changed | |
| id: check_grammar | |
| env: | |
| MERGE_BASE: ${{ steps.merge_base.outputs.sha }} | |
| run: | | |
| if git diff --quiet "${MERGE_BASE}...HEAD" -- \ | |
| ':typescript2/pkg-grammar/**' \ | |
| ':typescript2/app-vscode-ext/syntaxes/**' \ | |
| ':typescript2/app-vscode-ext/package.json' \ | |
| ':typescript2/pnpm-lock.yaml' \ | |
| ':.github/actions/setup-node2/**' \ | |
| ':.github/workflows/ci.yaml' \ | |
| ; then | |
| echo "changed=false" >> "$GITHUB_OUTPUT" | |
| else | |
| echo "changed=true" >> "$GITHUB_OUTPUT" | |
| fi | |
| - name: Check if unsafe code changed | |
| id: check_unsafe | |
| env: | |
| MERGE_BASE: ${{ steps.merge_base.outputs.sha }} | |
| run: | | |
| if git diff --quiet "${MERGE_BASE}...HEAD" -- \ | |
| ':baml_language/crates/bex_heap/**' \ | |
| ; then | |
| echo "changed=false" >> "$GITHUB_OUTPUT" | |
| else | |
| echo "changed=true" >> "$GITHUB_OUTPUT" | |
| fi | |
| - name: Check if proto sources changed | |
| id: check_proto | |
| env: | |
| MERGE_BASE: ${{ steps.merge_base.outputs.sha }} | |
| run: | | |
| if git diff --quiet "${MERGE_BASE}...HEAD" -- \ | |
| ':baml_language/crates/bridge_ctypes/**/*.proto' \ | |
| ; then | |
| echo "changed=false" >> "$GITHUB_OUTPUT" | |
| else | |
| echo "changed=true" >> "$GITHUB_OUTPUT" | |
| fi | |
| # Includes the job's own inputs (workspace manifest with the | |
| # loom/minstant dep versions + check-cfg list, the pinned toolchain, | |
| # and this workflow file) so a change that can break the loom/miri | |
| # build re-runs the gate. Cargo.lock is deliberately excluded: it | |
| # churns on every dep bump and the cargo-tests job already covers the | |
| # std halves of the suite. | |
| - name: Check if profiling ring code changed | |
| id: check_prof | |
| env: | |
| MERGE_BASE: ${{ steps.merge_base.outputs.sha }} | |
| run: | | |
| if git diff --quiet "${MERGE_BASE}...HEAD" -- \ | |
| ':baml_language/crates/bex_events/**' \ | |
| ':baml_language/Cargo.toml' \ | |
| ':baml_language/rust-toolchain.toml' \ | |
| ':.github/workflows/ci.yaml' \ | |
| ; then | |
| echo "changed=false" >> "$GITHUB_OUTPUT" | |
| else | |
| echo "changed=true" >> "$GITHUB_OUTPUT" | |
| fi | |
| # Decide whether perf benchmarks (CodSpeed) should run for this event. | |
| # Policy: benchmarks run AFTER MERGE (push to main/canary) only when the | |
| # merge actually touched baml_language/, and on manual dispatch. On PRs | |
| # they are OPT-IN — they no longer run on every code change. A PR opts in | |
| # by mentioning one of the perf markers in its title, body, OR any of its | |
| # commit messages. merge_group is always skipped (the PR already had its | |
| # chance to run). | |
| - name: Check perf benchmark opt-in | |
| id: check_perf | |
| env: | |
| EVENT_NAME: ${{ github.event_name }} | |
| PR_TITLE: ${{ github.event.pull_request.title }} | |
| PR_BODY: ${{ github.event.pull_request.body }} | |
| MERGE_BASE: ${{ steps.merge_base.outputs.sha }} | |
| PUSH_BEFORE: ${{ github.event.before }} | |
| PUSH_AFTER: ${{ github.sha }} | |
| run: | | |
| # Any of these, case-insensitive, in the PR title/body or a commit | |
| # message opts a PR into a perf run. Keep in sync with the instructions | |
| # posted by the "Perf benchmarks (PR notice)" job below. | |
| PATTERN='RUN_CODSPEED=1|run-perf|/perf' | |
| # After merge (push to canary/main): only run when the merge actually | |
| # touched baml_language/. NOTE: we diff the pushed range (before..after) | |
| # directly rather than reuse determine_changes' `code` flag — on a push | |
| # the merge-base of HEAD and origin/<branch> collapses to HEAD, so that | |
| # diff is always empty here. The push payload's before/after are the | |
| # real merged range. | |
| if [ "$EVENT_NAME" = "push" ]; then | |
| if [ -z "$PUSH_BEFORE" ] || [ "$PUSH_BEFORE" = "0000000000000000000000000000000000000000" ]; then | |
| # Branch creation / unknown previous tip — nothing to diff against, | |
| # so run to be safe rather than silently skip. | |
| echo "run=true" >> "$GITHUB_OUTPUT" | |
| exit 0 | |
| fi | |
| if git diff --quiet "$PUSH_BEFORE" "$PUSH_AFTER" -- ':baml_language/**'; then | |
| echo "run=false" >> "$GITHUB_OUTPUT" | |
| else | |
| echo "run=true" >> "$GITHUB_OUTPUT" | |
| fi | |
| exit 0 | |
| fi | |
| # Manual dispatch always runs. | |
| if [ "$EVENT_NAME" = "workflow_dispatch" ]; then | |
| echo "run=true" >> "$GITHUB_OUTPUT" | |
| exit 0 | |
| fi | |
| # Never run in the merge queue — it already ran (or was opted out) on | |
| # the PR. | |
| if [ "$EVENT_NAME" = "merge_group" ]; then | |
| echo "run=false" >> "$GITHUB_OUTPUT" | |
| exit 0 | |
| fi | |
| # On PRs: opt in via the PR title/body or any commit message in the PR. | |
| if printf '%s\n%s' "$PR_TITLE" "$PR_BODY" | grep -qiE "$PATTERN"; then | |
| echo "run=true" >> "$GITHUB_OUTPUT" | |
| exit 0 | |
| fi | |
| if [ -n "$MERGE_BASE" ] && git log --format='%B' "${MERGE_BASE}..HEAD" | grep -qiE "$PATTERN"; then | |
| echo "run=true" >> "$GITHUB_OUTPUT" | |
| exit 0 | |
| fi | |
| echo "run=false" >> "$GITHUB_OUTPUT" | |
| # Lint checks | |
| prek: | |
| name: "Pre-commit Checks" | |
| runs-on: blacksmith-16vcpu-ubuntu-2404 | |
| needs: determine_changes | |
| if: needs.determine_changes.outputs.code == 'true' || github.ref == 'refs/heads/main' || github.ref == 'refs/heads/canary' | |
| timeout-minutes: 20 | |
| steps: | |
| - name: "Checkout Branch" | |
| uses: useblacksmith/checkout@v1 | |
| with: | |
| persist-credentials: false | |
| # Install Rust toolchain BEFORE rust-cache so cache key uses correct Rust version | |
| - name: "Install Rust toolchain" | |
| run: rustup toolchain install | |
| working-directory: baml_language | |
| - uses: Swatinem/rust-cache@v2 | |
| with: | |
| workspaces: "baml_language -> target" | |
| # See the cache strategy explanation at the top of cargo-tests.reusable.yaml. | |
| save-if: false | |
| shared-key: "linux-cargo" | |
| # prek hooks invoke cargo (from rustup), mise tasks that shell out | |
| # to cargo, and python scripts that begin with `#!/usr/bin/env -S | |
| # uv run --script` (e.g. validate-markdown). We don't | |
| # need go/ruby/node/etc here, just the prek binary itself + a | |
| # python/uv pair for the hook scripts. | |
| - name: "Install mise" | |
| uses: ./.github/actions/setup-mise | |
| with: | |
| install_args: "cargo:prek python uv" | |
| - name: "Cache prek" | |
| uses: actions/cache@v5 | |
| with: | |
| path: ~/.cache/pre-commit | |
| key: pre-commit-${{ hashFiles('.pre-commit-config.yaml') }} | |
| # Separate step so if fetch is slow, build is fast. | |
| - name: "Fetch cargo dependencies" | |
| run: cargo fetch | |
| working-directory: baml_language | |
| - name: "Run prek" | |
| run: | | |
| echo '```console' > "$GITHUB_STEP_SUMMARY" | |
| # Enable color output for prek and remove it for the summary | |
| # Use --hook-stage=manual to enable slower hooks that are skipped by default | |
| SKIP=no-commit-to-branch prek run --all-files --show-diff-on-failure --color always --hook-stage manual | \ | |
| tee >(sed -E 's/\x1B\[([0-9]{1,2}(;[0-9]{1,2})*)?[mGK]//g' >> "$GITHUB_STEP_SUMMARY") >&1 | |
| exit_code="${PIPESTATUS[0]}" | |
| echo '```' >> "$GITHUB_STEP_SUMMARY" | |
| exit "$exit_code" | |
| release-metadata: | |
| name: "Release Metadata" | |
| runs-on: blacksmith-4vcpu-ubuntu-2404 | |
| needs: determine_changes | |
| if: needs.determine_changes.outputs.code == 'true' || github.ref == 'refs/heads/main' || github.ref == 'refs/heads/canary' | |
| timeout-minutes: 5 | |
| steps: | |
| - name: "Checkout" | |
| uses: useblacksmith/checkout@v1 | |
| with: | |
| fetch-depth: 0 | |
| persist-credentials: false | |
| - name: "Validate baml_language release metadata" | |
| run: scripts/baml-language-version check | |
| - name: "Validate canary/nightly version computation" | |
| env: | |
| BAML_LANGUAGE_VERSION_DATE: "20260522" | |
| run: | | |
| set -euo pipefail | |
| canary="$(scripts/baml-language-version show)" | |
| test "$(scripts/baml-language-version compute --channel canary)" = "$canary" | |
| test "$(scripts/baml-language-version compute --channel canary --pypi)" = "$canary" | |
| nightly="$(scripts/baml-language-version compute --channel nightly)" | |
| pypi="$(scripts/baml-language-version compute --channel nightly --pypi)" | |
| python3 - "$canary" "$nightly" "$pypi" <<'PY' | |
| import re | |
| import sys | |
| canary, nightly, pypi = sys.argv[1:] | |
| major, minor, patch = [int(part) for part in canary.split(".")] | |
| base = f"{major}.{minor}.{patch + 1}" | |
| match = re.fullmatch(rf"{re.escape(base)}-nightly\.20260522\.([a-z])", nightly) | |
| if not match: | |
| raise SystemExit(f"unexpected nightly version: {nightly}") | |
| index = ord(match.group(1)) - ord("a") | |
| expected_pypi = f"{base}.dev20260522{index:02d}" | |
| if pypi != expected_pypi: | |
| raise SystemExit(f"unexpected nightly PyPI version: {pypi} != {expected_pypi}") | |
| PY | |
| # Call reusable workflows | |
| cargo-tests: | |
| name: "Cargo Tests" | |
| needs: determine_changes | |
| if: needs.determine_changes.outputs.code == 'true' || github.ref == 'refs/heads/main' || github.ref == 'refs/heads/canary' | |
| uses: ./.github/workflows/cargo-tests.reusable.yaml | |
| secrets: inherit | |
| size-gate: | |
| name: "Size Gate" | |
| needs: determine_changes | |
| if: needs.determine_changes.outputs.code == 'true' || github.ref == 'refs/heads/main' || github.ref == 'refs/heads/canary' | |
| uses: ./.github/workflows/size-gate.reusable.yaml | |
| secrets: inherit | |
| permissions: | |
| contents: read | |
| pull-requests: write | |
| wasm-pack-tests: | |
| name: "WASM Pack Tests" | |
| needs: determine_changes | |
| if: needs.determine_changes.outputs.code == 'true' || github.ref == 'refs/heads/main' || github.ref == 'refs/heads/canary' | |
| uses: ./.github/workflows/wasm-pack-tests.reusable.yaml | |
| secrets: inherit | |
| docs: | |
| name: "Docs" | |
| needs: determine_changes | |
| uses: ./.github/workflows/docs.reusable.yaml | |
| with: | |
| docs_changed: ${{ needs.determine_changes.outputs.docs }} | |
| is_canary: ${{ github.ref == 'refs/heads/canary' }} | |
| secrets: inherit | |
| permissions: write-all | |
| webview-tests: | |
| name: "Webview Tests" | |
| needs: determine_changes | |
| if: needs.determine_changes.outputs.webview == 'true' || github.ref == 'refs/heads/main' || github.ref == 'refs/heads/canary' | |
| uses: ./.github/workflows/webview-tests.reusable.yaml | |
| secrets: inherit | |
| grammar-tests: | |
| name: "Grammar Tests" | |
| runs-on: blacksmith-4vcpu-ubuntu-2404 | |
| needs: determine_changes | |
| if: needs.determine_changes.outputs.grammar == 'true' || github.ref == 'refs/heads/main' || github.ref == 'refs/heads/canary' | |
| steps: | |
| - name: "Checkout Branch" | |
| uses: useblacksmith/checkout@v1 | |
| with: | |
| persist-credentials: false | |
| - name: "Setup Node.js for typescript2" | |
| uses: ./.github/actions/setup-node2 | |
| - name: "Run grammar snapshot tests" | |
| run: pnpm --filter @b/pkg-grammar test | |
| working-directory: typescript2 | |
| miri-tests: | |
| name: "Miri (unsafe code verification)" | |
| runs-on: blacksmith-4vcpu-ubuntu-2404 | |
| needs: determine_changes | |
| # TEMPORARILY DISABLED: miri tests timing out at 25 minutes, blocking CI/CD | |
| # TODO: Re-enable once miri test performance is addressed | |
| if: false # needs.determine_changes.outputs.unsafe == 'true' | |
| timeout-minutes: 25 | |
| steps: | |
| - name: "Checkout" | |
| uses: useblacksmith/checkout@v1 | |
| with: | |
| persist-credentials: false | |
| - name: "Install Rust nightly and Miri" | |
| run: | | |
| rustup toolchain install nightly --component miri | |
| rustup override set nightly | |
| working-directory: baml_language | |
| # Separate step so if fetch is slow, build is fast. | |
| - name: "Fetch cargo dependencies" | |
| run: cargo +nightly fetch | |
| working-directory: baml_language | |
| - name: "Run Miri tests on bex_heap" | |
| run: cargo miri test -p bex_heap --lib | |
| working-directory: baml_language | |
| # Loom + Miri verification for the lock-free profiling ring | |
| # (bex_events::prof). Scoped to the prof:: tests so it stays minutes-fast — | |
| # deliberately narrower than the (disabled) whole-crate miri-tests job | |
| # above, whose bex_heap runs were timing out. | |
| prof-concurrency: | |
| name: "Profiling ring (loom + miri)" | |
| runs-on: blacksmith-4vcpu-ubuntu-2404 | |
| needs: determine_changes | |
| if: needs.determine_changes.outputs.prof == 'true' | |
| timeout-minutes: 45 | |
| steps: | |
| - name: "Checkout" | |
| uses: useblacksmith/checkout@v1 | |
| with: | |
| persist-credentials: false | |
| # Install Rust toolchain BEFORE rust-cache so cache key uses correct Rust version. | |
| - name: "Install Rust toolchain" | |
| run: rustup toolchain install | |
| working-directory: baml_language | |
| - uses: Swatinem/rust-cache@v2 | |
| with: | |
| workspaces: "baml_language -> target" | |
| shared-key: "prof-concurrency" | |
| - name: "Fetch cargo dependencies" | |
| run: cargo fetch | |
| working-directory: baml_language | |
| # The model checker explores every interleaving (bounded at 3 | |
| # preemptions, set in the test harness) of the ring's | |
| # producer/consumer/lifecycle protocols. The custom cfg name | |
| # (baml_loom, not the conventional loom) keeps the flag from | |
| # half-activating loom support in third-party deps (e.g. boxcar) that | |
| # gate on cfg(loom) but need their own loom feature enabled to compile. | |
| - name: "Loom model checking (bex_events::prof)" | |
| run: 'cargo test -p bex_events --release --lib prof::' | |
| working-directory: baml_language | |
| env: | |
| RUSTFLAGS: "--cfg baml_loom" | |
| CARGO_TARGET_DIR: target/loom | |
| - name: "Install Rust nightly and Miri" | |
| run: rustup toolchain install nightly --component miri | |
| working-directory: baml_language | |
| # Miri checks the raw-pointer/UnsafeCell discipline of the same | |
| # scenarios on real threads. Leaked rings are by design (&'static | |
| # lifetime model); isolation is off for park_timeout/sleep in the | |
| # stress tests. | |
| - name: "Miri (bex_events::prof)" | |
| run: 'cargo +nightly miri test -p bex_events --lib prof::' | |
| working-directory: baml_language | |
| env: | |
| MIRIFLAGS: "-Zmiri-ignore-leaks -Zmiri-disable-isolation" | |
| proto-sync: | |
| name: "proto generated files sync" | |
| runs-on: blacksmith-4vcpu-ubuntu-2404 | |
| needs: determine_changes | |
| if: needs.determine_changes.outputs.proto == 'true' | |
| timeout-minutes: 20 | |
| steps: | |
| - name: "Checkout" | |
| uses: useblacksmith/checkout@v1 | |
| with: | |
| persist-credentials: false | |
| # Install Rust toolchain BEFORE rust-cache so cache key uses correct Rust version. | |
| - name: "Install Rust toolchain" | |
| run: rustup toolchain install | |
| working-directory: baml_language | |
| - uses: Swatinem/rust-cache@v2 | |
| with: | |
| workspaces: "baml_language -> target" | |
| # See the cache strategy explanation at the top of cargo-tests.reusable.yaml. | |
| save-if: false | |
| shared-key: "linux-cargo" | |
| # bridge_ctypes/build.rs needs `protoc`; bridge_go/build.sh needs | |
| # `protoc` + `protoc-gen-go`; bridge_nodejs needs node + pnpm. | |
| # The `cargo build -p bridge_ctypes` step uses cargo from rustup | |
| # (not mise), so we don't need `go` itself here either. | |
| - name: "Install mise" | |
| uses: ./.github/actions/setup-mise | |
| with: | |
| install_args: "node npm:pnpm protoc protoc-gen-go" | |
| # Separate step so if fetch is slow, build is fast. | |
| - name: "Fetch cargo dependencies" | |
| run: cargo fetch | |
| working-directory: baml_language | |
| # Rust (prost) + Python (protoc) — both driven by bridge_ctypes/build.rs. | |
| # Writes Python pb2/pyi into baml_language/sdks/python/src/baml_core/cffi/v1/. | |
| - name: "Generate Rust + Python proto bindings (bridge_ctypes)" | |
| run: cargo build -p bridge_ctypes | |
| working-directory: baml_language | |
| # Go (protoc-gen-go) — writes into sdks/go/bridge_go/cffi/proto/baml_core/cffi/v1/. | |
| - name: "Generate Go proto bindings (sdks/go/bridge_go)" | |
| run: ./build.sh | |
| working-directory: baml_language/sdks/go/bridge_go | |
| # --ignore-workspace: install from bridge_nodejs's own pnpm-lock.yaml | |
| # rather than the root workspace lockfile. The two resolve different | |
| # protobufjs-cli versions, and pbjs codegen output (committed under | |
| # typescript_src/proto/) must come from the version this package pins. | |
| # --ignore-scripts: skip dependency build scripts (esbuild's binary | |
| # fetch, protobufjs postinstall) — none are needed for pbjs/tsc/napi | |
| # codegen, and pnpm errors on unapproved build scripts otherwise. | |
| # Same flags as sdk_tests/crates/typescript_node/setup.sh. | |
| - name: "Install Node SDK dependencies" | |
| run: pnpm install --frozen-lockfile --ignore-workspace --ignore-scripts | |
| working-directory: baml_language/sdks/nodejs/bridge_nodejs | |
| # Node / TypeScript (protobufjs) — writes into sdks/nodejs/bridge_nodejs/typescript_src/proto/. | |
| # build:debug also regenerates typescript2/pkg-proto/src/generated/ (ts-proto / buf). | |
| - name: "Generate Node/TypeScript proto bindings (sdks/nodejs/bridge_nodejs)" | |
| run: pnpm build:debug | |
| working-directory: baml_language/sdks/nodejs/bridge_nodejs | |
| - name: "Check generated proto files are in sync" | |
| run: | | |
| # Check for both modified tracked files and untracked files across every | |
| # codegen output documented in baml_language/crates/bridge_ctypes/README.md. | |
| PATHS=( | |
| baml_language/sdks/nodejs/bridge_nodejs | |
| baml_language/crates/bridge_ctypes | |
| baml_language/sdks/go/bridge_go | |
| baml_language/sdks/python/src/baml_core/cffi | |
| typescript2/pkg-proto | |
| ) | |
| README="baml_language/crates/bridge_ctypes/README.md" | |
| STATUS=$(git status --porcelain -- "${PATHS[@]}") | |
| if [ -n "$STATUS" ]; then | |
| echo "::error::proto generated files are out of sync — consult ${README} for the regeneration commands and commit the resulting changes." | |
| echo "" | |
| echo "The following files are out of sync:" | |
| echo "$STATUS" | |
| echo "" | |
| echo "===== ${README} =====" | |
| cat "${README}" | |
| echo "===== end of ${README} =====" | |
| echo "" | |
| git diff -- "${PATHS[@]}" | |
| exit 1 | |
| fi | |
| echo "All generated proto files are in sync." | |
| # Benchmarks are split into two jobs so the slow compile happens on a fast, | |
| # cheap machine while the actual measurement happens on a stable CodSpeed | |
| # macro runner. | |
| # | |
| # benchmarks-build -> compiles the bench binaries (the part that used to | |
| # time out on the small CodSpeed runner) | |
| # benchmarks-run -> ONLY executes those prebuilt binaries on codspeed-macro | |
| # | |
| # CRITICAL — no double-build / flag mismatch: | |
| # `cargo codspeed build` compiles each bench target with CodSpeed's own | |
| # compilation flags and copies the resulting executables into | |
| # target/codspeed/walltime/<pkg>/<bench>. `cargo codspeed run` then ONLY | |
| # executes those copies — it never invokes the compiler. So the run job | |
| # cannot accidentally rebuild with default flags; if the artifact is missing | |
| # it errors ("No benchmarks found") instead of measuring the wrong binary. | |
| # | |
| # CRITICAL — ABI match: | |
| # codspeed-macro is an ARM64 machine, so the binaries MUST be built on ARM64 | |
| # too. We build on the OLDEST Ubuntu we have an ARM runner for (22.04, | |
| # glibc 2.35) so the binaries stay forward-compatible with whatever (newer | |
| # or equal) glibc the macro runner ships. | |
| benchmarks-build: | |
| name: "benchmarks build (baml)" | |
| runs-on: blacksmith-8vcpu-ubuntu-2204-arm | |
| needs: determine_changes | |
| # Perf benchmarks run AFTER MERGE (push to main/canary, only when the merge | |
| # touched baml_language/) and on manual dispatch. On PRs they are OPT-IN: a | |
| # PR runs them only when its title/body or a commit message mentions a perf | |
| # marker (RUN_CODSPEED=1, run-perf, /perf). See the `check_perf` step for the | |
| # policy | |
| # and the "Perf benchmarks (PR notice)" job for the comment PRs get when | |
| # benchmarks are skipped. Skipped in merge_group (already settled on the PR). | |
| if: needs.determine_changes.outputs.run_perf == 'true' | |
| timeout-minutes: 30 | |
| steps: | |
| - name: "Checkout Branch" | |
| uses: actions/checkout@v6 | |
| with: | |
| persist-credentials: false | |
| # Install Rust toolchain BEFORE rust-cache so cache key uses correct Rust version | |
| - name: "Install Rust toolchain" | |
| run: rustup show | |
| working-directory: baml_language | |
| # Swatinem caches the compiled target/ for this build job's own speed | |
| # (canary is the sole saver; PRs restore read-only). It is bench-specific | |
| # on purpose — its only saver compiles just baml_tests benches, a thin | |
| # seed not worth sharing with a general ARM job. | |
| - uses: Swatinem/rust-cache@v2 | |
| with: | |
| workspaces: "baml_language -> target" | |
| save-if: ${{ github.ref == 'refs/heads/canary' }} | |
| cache-all-crates: true | |
| cache-workspace-crates: true | |
| shared-key: "linux-arm-bench" | |
| # Pin to the same major+minor as codspeed-divan-compat in Cargo.lock | |
| # (4.7.0). cargo codspeed run aborts on a major-version mismatch. | |
| - name: "Install cargo-codspeed" | |
| uses: taiki-e/install-action@v2 | |
| with: | |
| tool: cargo-codspeed@4.7.0 | |
| # Separate step so if fetch is slow, build is fast. | |
| - name: "Fetch cargo dependencies" | |
| run: cargo fetch | |
| working-directory: baml_language | |
| # Only runtime_benchmark — cache_profile is a macOS/kperf harness that's a | |
| # no-op on Linux, so there's no reason to build or run it here. | |
| - name: "Build benchmarks (walltime)" | |
| run: cargo codspeed build -p baml_tests --bench runtime_benchmark -m walltime | |
| working-directory: baml_language | |
| - name: "Upload prebuilt benchmark binaries" | |
| uses: actions/upload-artifact@v7 | |
| with: | |
| name: codspeed-walltime-benches | |
| path: baml_language/target/codspeed/walltime | |
| if-no-files-found: error | |
| retention-days: 1 | |
| # Hand the resolved cargo home to benchmarks-run via an ARTIFACT, not a | |
| # cache. actions/cache addresses entries by (key, version) where version | |
| # depends on the runner's compression tooling, so a cache saved on this | |
| # blacksmith runner is invisible to the codspeed-macro run job even with an | |
| # identical key (verified: same key 4f01266e…, "Cache not found"). Artifacts | |
| # are content-addressed by name and transfer cleanly across runners. | |
| # benchmarks-run untars this into ~/.cargo so its offline `cargo metadata` | |
| # has every dep locally — no ~24-min cold git re-clone. | |
| # Exclude registry/src (extracted dep sources — the bulk of ~/.cargo). The | |
| # run job only does offline `cargo metadata`, which needs the registry | |
| # index + git checkouts, not the unpacked sources, so this keeps the | |
| # artifact (and the run job's download/untar) small. | |
| - name: "Pack cargo home for benchmarks-run" | |
| run: tar -C "$HOME" --exclude='.cargo/registry/src' -cf cargo-home.tar .cargo/registry .cargo/git | |
| - name: "Upload cargo home" | |
| uses: actions/upload-artifact@v7 | |
| with: | |
| name: codspeed-cargo-home | |
| path: cargo-home.tar | |
| if-no-files-found: error | |
| retention-days: 1 | |
| benchmarks-run: | |
| name: "benchmarks instrumented (baml)" | |
| runs-on: codspeed-macro | |
| needs: | |
| - determine_changes | |
| - benchmarks-build | |
| # Same gate as benchmarks-build — see determine_changes' `check_perf` step. | |
| if: needs.determine_changes.outputs.run_perf == 'true' | |
| timeout-minutes: 30 | |
| steps: | |
| - name: "Checkout Branch" | |
| uses: actions/checkout@v6 | |
| with: | |
| persist-credentials: false | |
| # `cargo codspeed run` needs `cargo metadata` to locate the benches and | |
| # their working directories. It does NOT compile anything here. | |
| - name: "Install Rust toolchain" | |
| run: rustup show | |
| working-directory: baml_language | |
| # Restore the resolved cargo home that benchmarks-build packed, so this | |
| # job's offline `cargo metadata` has every dep locally instead of cold | |
| # re-cloning git deps on the bare codspeed-macro runner (~24 min). Comes | |
| # via artifact, not cache — see the build job for why cache misses here. | |
| - name: "Download cargo home" | |
| uses: actions/download-artifact@v7 | |
| with: | |
| name: codspeed-cargo-home | |
| path: /tmp/cargo-home | |
| - name: "Unpack cargo home into ~/.cargo" | |
| run: tar -C "$HOME" -xf /tmp/cargo-home/cargo-home.tar | |
| - name: "Install cargo-codspeed" | |
| uses: taiki-e/install-action@v2 | |
| with: | |
| tool: cargo-codspeed@4.7.0 | |
| - name: "Download prebuilt benchmark binaries" | |
| uses: actions/download-artifact@v7 | |
| with: | |
| name: codspeed-walltime-benches | |
| path: baml_language/target/codspeed/walltime | |
| # actions/upload-artifact drops the Unix executable bit; restore it so the | |
| # binaries can be exec'd by `cargo codspeed run`. | |
| - name: "Restore executable bit on bench binaries" | |
| run: chmod -R +x baml_language/target/codspeed/walltime | |
| - name: "Run benchmarks (walltime)" | |
| uses: CodSpeedHQ/action@v4 | |
| # Benchmarks are advisory — never block a merge on a measurement blip. | |
| continue-on-error: true | |
| env: | |
| # The cargo home is restored above, so `cargo metadata` has every dep | |
| # locally. Force offline so it physically cannot stall on the network | |
| # (no index update, no git fetch) — turns the old ~24-min metadata | |
| # phase into seconds. Fails fast and loud if the cache ever misses, | |
| # instead of silently re-cloning for half an hour. | |
| CARGO_NET_OFFLINE: "true" | |
| # Adaptive sampling: cap each bench at ~2s of wall time instead of | |
| # CodSpeed walltime's fixed 100 samples/bench. divan then runs as many | |
| # samples as fit the budget — ~100 for cheap benches (full statistical | |
| # power kept), down to 1 for the heavy O(n²)/1M-iter workloads (whose | |
| # run-to-run variance is already tiny, so a single sample suffices). | |
| # | |
| # WHY: at fixed 100 samples the suite blew past the 30-min timeout. The | |
| # long pole was compute::bubble_sort_5k — ~14s/sample on the (≈5× | |
| # slower than local) codspeed-macro runner × 100 = ~23min for ONE | |
| # bench. With this cap the whole 36-bench suite finishes in ~2 min. | |
| # Measured locally: fixed-100 bubble_sort alone >90s; with the budget | |
| # the full suite ran in 80s. Tune the budget here, not in code. | |
| DIVAN_MAX_TIME: "2" | |
| with: | |
| mode: walltime | |
| # Executes ONLY the prebuilt binaries downloaded above; no compiler runs. | |
| # | |
| # Every bench is a `vm_speedtest_*` function auto-generated from the | |
| # cross-language workload corpus under tools/speedtest/workloads/*.md | |
| # (see baml_tests/build.rs). They measure *pure VM execution*: the BAML | |
| # is compiled and the tokio runtime is built once, OUTSIDE the measured | |
| # region (see bench_vm_main), so only `main()` is timed. Sleep-based | |
| # workloads are excluded at build time. | |
| # | |
| # The `vm_speedtest` filter selects exactly that suite; DIVAN_MAX_TIME | |
| # (above) bounds each bench so the pass stays well under the timeout. | |
| run: cd baml_language && cargo codspeed run -m walltime 'vm_speedtest' | |
| token: ${{ secrets.CODSPEED_TOKEN }} | |
| # Leaves (and keeps up to date) a single sticky comment on each PR explaining | |
| # the perf-benchmark policy: either confirming benchmarks were triggered, or — | |
| # when they were skipped — telling the author exactly how to opt in. Runs on | |
| # every PR event so the same comment is edited in place (no spam) and flips | |
| # between the two states as the author edits the PR / pushes commits. | |
| perf-pr-notice: | |
| name: "Perf benchmarks (PR notice)" | |
| runs-on: blacksmith-4vcpu-ubuntu-2404 | |
| needs: determine_changes | |
| if: github.event_name == 'pull_request' && github.event.pull_request.head.repo.fork == false | |
| permissions: | |
| pull-requests: write | |
| steps: | |
| - name: "Comment perf opt-in instructions" | |
| uses: actions/github-script@v7 | |
| env: | |
| RUN_PERF: ${{ needs.determine_changes.outputs.run_perf }} | |
| with: | |
| script: | | |
| // Hidden marker so we find & update our own comment instead of | |
| // posting a new one on every push. | |
| const marker = '<!-- perf-benchmarks-pr-notice -->'; | |
| const triggered = process.env.RUN_PERF === 'true'; | |
| const body = triggered | |
| ? [ | |
| marker, | |
| '### 🏎️ Performance benchmarks are running for this PR', | |
| '', | |
| 'CodSpeed perf benchmarks were triggered because this PR opted in. ' + | |
| 'Results will appear in the CodSpeed check / dashboard once they finish.', | |
| ].join('\n') | |
| : [ | |
| marker, | |
| '### ⏭️ Performance benchmarks were skipped', | |
| '', | |
| 'Perf benchmarks (CodSpeed) are **opt-in** on pull requests — they no ' + | |
| 'longer run on every push. They always run automatically after merge ' + | |
| 'to `canary`/`main`.', | |
| '', | |
| 'To run them on **this** PR, do any of the following, then push a commit ' + | |
| '(or re-run CI):', | |
| '', | |
| '- Add `RUN_CODSPEED=1` to the PR description, **or**', | |
| '- Include `run-perf` or `/perf` in the PR title or any commit message.', | |
| ].join('\n'); | |
| const { owner, repo } = context.repo; | |
| const issue_number = context.issue.number; | |
| const comments = await github.paginate(github.rest.issues.listComments, { | |
| owner, repo, issue_number, | |
| }); | |
| const existing = comments.find((c) => c.body && c.body.includes(marker)); | |
| if (existing) { | |
| await github.rest.issues.updateComment({ owner, repo, comment_id: existing.id, body }); | |
| } else { | |
| await github.rest.issues.createComment({ owner, repo, issue_number, body }); | |
| } | |
| # CI Failure Alert - Required status check using "skipped = success" pattern | |
| # See: https://devopsdirective.com/posts/2025/08/github-actions-required-checks-for-conditional-jobs/ | |
| # | |
| # This job exploits GitHub's behavior where skipped jobs report as "Success". | |
| # - When all tests pass (or are intentionally skipped): this job SKIPS → reports SUCCESS | |
| # - When any test fails or is cancelled: this job RUNS and FAILS → blocks merge | |
| # | |
| # Configure "CI - BAML Language / CI Failure Alert" as a required status check. | |
| ci-failure-alert: | |
| name: "CI-v2 Failure Alert" | |
| runs-on: blacksmith-4vcpu-ubuntu-2404 | |
| needs: | |
| - prek | |
| - release-metadata | |
| - cargo-tests | |
| - wasm-pack-tests | |
| - docs | |
| - webview-tests | |
| - grammar-tests | |
| # - miri-tests # TEMPORARILY DISABLED: miri tests timing out | |
| - prof-concurrency | |
| - proto-sync | |
| # NOTE: size-gate is intentionally NOT a required gate. It is an | |
| # informational signal (PR comment + daily baseline auto-refresh), not | |
| # a merge blocker: a size bump must never block the merge queue or force | |
| # a full CI re-run. Drift is caught by the daily refresh PR and by | |
| # reviewers eyeballing the size-gate comment, not by failing CI here. | |
| # Only run if something failed or was cancelled (otherwise skip → success) | |
| if: ${{ failure() || cancelled() }} | |
| steps: | |
| - name: Report failure | |
| run: | | |
| echo "## ❌ CI Failed" >> $GITHUB_STEP_SUMMARY | |
| echo "" >> $GITHUB_STEP_SUMMARY | |
| echo "One or more required jobs failed or were cancelled." >> $GITHUB_STEP_SUMMARY | |
| echo "" >> $GITHUB_STEP_SUMMARY | |
| echo "| Job | Result |" >> $GITHUB_STEP_SUMMARY | |
| echo "|-----|--------|" >> $GITHUB_STEP_SUMMARY | |
| echo "| prek | ${{ needs.prek.result }} |" >> $GITHUB_STEP_SUMMARY | |
| echo "| release-metadata | ${{ needs.release-metadata.result }} |" >> $GITHUB_STEP_SUMMARY | |
| echo "| cargo-tests | ${{ needs.cargo-tests.result }} |" >> $GITHUB_STEP_SUMMARY | |
| echo "| wasm-pack-tests | ${{ needs.wasm-pack-tests.result }} |" >> $GITHUB_STEP_SUMMARY | |
| echo "| docs | ${{ needs.docs.result }} |" >> $GITHUB_STEP_SUMMARY | |
| echo "| webview-tests | ${{ needs.webview-tests.result }} |" >> $GITHUB_STEP_SUMMARY | |
| echo "| grammar-tests | ${{ needs.grammar-tests.result }} |" >> $GITHUB_STEP_SUMMARY | |
| # echo "| miri-tests | ${{ needs.miri-tests.result }} |" >> $GITHUB_STEP_SUMMARY # TEMPORARILY DISABLED | |
| echo "| prof-concurrency | ${{ needs.prof-concurrency.result }} |" >> $GITHUB_STEP_SUMMARY | |
| echo "| proto-sync | ${{ needs.proto-sync.result }} |" >> $GITHUB_STEP_SUMMARY | |
| echo "" >> $GITHUB_STEP_SUMMARY | |
| echo "::error::One or more CI jobs failed!" | |
| exit 1 |