Skip to content

Preserve the ErrorContext cause chain across a defer on the unwind pa… #6092

Preserve the ErrorContext cause chain across a defer on the unwind pa…

Preserve the ErrorContext cause chain across a defer on the unwind pa… #6092

Workflow file for this run

name: CI - BAML Language
permissions:
contents: read
id-token: write
pull-requests: write
on:
push:
branches: [main, canary]
pull_request:
merge_group:
types: [checks_requested]
workflow_dispatch:
concurrency:
group: ${{ github.workflow }}-${{ github.ref_name }}-${{ github.event.pull_request.number || github.sha }}
cancel-in-progress: true
defaults:
run:
shell: bash
env:
CARGO_INCREMENTAL: 0
CARGO_NET_RETRY: 10
CARGO_TERM_COLOR: always
RUSTUP_MAX_RETRIES: 10
jobs:
determine_changes:
name: "Determine changes"
runs-on: blacksmith-4vcpu-ubuntu-2404
outputs:
# Flag that is raised when any code in baml_language is changed
code: ${{ steps.check_code.outputs.changed }}
# Flag for lexer changes
lexer: ${{ steps.check_lexer.outputs.changed }}
# Flag for parser changes
parser: ${{ steps.check_parser.outputs.changed }}
# Flag for HIR changes
hir: ${{ steps.check_hir.outputs.changed }}
# Flag for THIR changes
thir: ${{ steps.check_thir.outputs.changed }}
# Flag for codegen changes
codegen: ${{ steps.check_codegen.outputs.changed }}
# Flag for docs/frontend changes
docs: ${{ steps.check_docs.outputs.changed }}
# Flag for typescript2/webview changes
webview: ${{ steps.check_webview.outputs.changed }}
# Flag for typescript2 grammar changes
grammar: ${{ steps.check_grammar.outputs.changed }}
# Flag for unsafe code changes (bex_heap) - triggers Miri tests
unsafe: ${{ steps.check_unsafe.outputs.changed }}
# Flag for proto codegen changes - triggers generated file sync check
proto: ${{ steps.check_proto.outputs.changed }}
# Flag for profiling-ring changes (bex_events) - triggers loom + miri
prof: ${{ steps.check_prof.outputs.changed }}
# Flag that is raised when perf benchmarks (CodSpeed) should run: always
# after merge (push to main/canary) and on manual dispatch; on PRs only
# when the PR title/body or a commit message opts in.
run_perf: ${{ steps.check_perf.outputs.run }}
steps:
- uses: useblacksmith/checkout@v1
with:
fetch-depth: 0
persist-credentials: false
- name: Determine merge base
id: merge_base
env:
BASE_REF: ${{ github.event.pull_request.base.ref || 'canary' }}
run: |
sha=$(git merge-base HEAD "origin/${BASE_REF}")
echo "sha=${sha}" >> "$GITHUB_OUTPUT"
- name: Check if code changed
id: check_code
env:
MERGE_BASE: ${{ steps.merge_base.outputs.sha }}
run: |
if git diff --quiet "${MERGE_BASE}...HEAD" -- \
':baml_language/**' \
':scripts/baml-language-version' \
':scripts/baml-wrapper-version' \
':scripts/baml-release-manifests' \
':scripts/baml-package-manager-artifacts' \
':scripts/install.sh' \
':scripts/install.ps1' \
':packaging/aur/**' \
':tools/pkg_boundaryml_com/**' \
':.github/workflows/ci.yaml' \
':.github/workflows/release-baml-language.yml' \
':.github/workflows/build2-python-sdk.reusable.yaml' \
':.github/workflows/build2-nodejs-sdk.reusable.yaml' \
':.github/workflows/cargo-tests.reusable.yaml' \
':.github/workflows/size-gate.reusable.yaml' \
':.github/workflows/wasm-pack-tests.reusable.yaml' \
; then
echo "changed=false" >> "$GITHUB_OUTPUT"
else
echo "changed=true" >> "$GITHUB_OUTPUT"
fi
- name: Check if lexer code changed
id: check_lexer
env:
MERGE_BASE: ${{ steps.merge_base.outputs.sha }}
run: |
if git diff --quiet "${MERGE_BASE}...HEAD" -- \
':baml_language/crates/baml_compiler_lexer/**' \
':baml_language/crates/baml_base/**' \
':baml_language/Cargo.toml' \
':baml_language/Cargo.lock' \
; then
echo "changed=false" >> "$GITHUB_OUTPUT"
else
echo "changed=true" >> "$GITHUB_OUTPUT"
fi
- name: Check if parser code changed
id: check_parser
env:
MERGE_BASE: ${{ steps.merge_base.outputs.sha }}
run: |
if git diff --quiet "${MERGE_BASE}...HEAD" -- \
':baml_language/crates/baml_compiler_parser/**' \
':baml_language/crates/baml_compiler_syntax/**' \
':baml_language/crates/baml_compiler_lexer/**' \
':baml_language/crates/baml_base/**' \
; then
echo "changed=false" >> "$GITHUB_OUTPUT"
else
echo "changed=true" >> "$GITHUB_OUTPUT"
fi
- name: Check if HIR code changed
id: check_hir
env:
MERGE_BASE: ${{ steps.merge_base.outputs.sha }}
run: |
if git diff --quiet "${MERGE_BASE}...HEAD" -- \
':baml_language/crates/baml_compiler_hir/**' \
':baml_language/crates/baml_workspace/**' \
':baml_language/crates/baml_compiler_parser/**' \
':baml_language/crates/baml_base/**' \
; then
echo "changed=false" >> "$GITHUB_OUTPUT"
else
echo "changed=true" >> "$GITHUB_OUTPUT"
fi
- name: Check if THIR code changed
id: check_thir
env:
MERGE_BASE: ${{ steps.merge_base.outputs.sha }}
run: |
if git diff --quiet "${MERGE_BASE}...HEAD" -- \
':baml_language/crates/baml_thir/**' \
':baml_language/crates/baml_compiler_hir/**' \
':baml_language/crates/baml_base/**' \
; then
echo "changed=false" >> "$GITHUB_OUTPUT"
else
echo "changed=true" >> "$GITHUB_OUTPUT"
fi
- name: Check if codegen code changed
id: check_codegen
env:
MERGE_BASE: ${{ steps.merge_base.outputs.sha }}
run: |
if git diff --quiet "${MERGE_BASE}...HEAD" -- \
':baml_language/crates/baml_compiler_emit/**' \
':baml_language/crates/baml_thir/**' \
':baml_language/crates/baml_base/**' \
; then
echo "changed=false" >> "$GITHUB_OUTPUT"
else
echo "changed=true" >> "$GITHUB_OUTPUT"
fi
- name: Check if docs changed
id: check_docs
env:
MERGE_BASE: ${{ steps.merge_base.outputs.sha }}
run: |
if git diff --quiet "${MERGE_BASE}...HEAD" -- \
':fern/**' \
':typescript/apps/ask-baml-client/**' \
':typescript/apps/sage-backend/**' \
':.github/workflows/docs.reusable.yaml' \
; then
echo "changed=false" >> "$GITHUB_OUTPUT"
else
echo "changed=true" >> "$GITHUB_OUTPUT"
fi
- name: Check if webview changed
id: check_webview
env:
MERGE_BASE: ${{ steps.merge_base.outputs.sha }}
run: |
if git diff --quiet "${MERGE_BASE}...HEAD" -- \
':typescript2/app-vscode-webview/**' \
':typescript2/pkg-playground/**' \
':typescript2/pkg-proto/**' \
':baml_language/crates/bridge_ctypes/types/**' \
':.github/workflows/webview-tests.reusable.yaml' \
; then
echo "changed=false" >> "$GITHUB_OUTPUT"
else
echo "changed=true" >> "$GITHUB_OUTPUT"
fi
- name: Check if grammar changed
id: check_grammar
env:
MERGE_BASE: ${{ steps.merge_base.outputs.sha }}
run: |
if git diff --quiet "${MERGE_BASE}...HEAD" -- \
':typescript2/pkg-grammar/**' \
':typescript2/app-vscode-ext/syntaxes/**' \
':typescript2/app-vscode-ext/package.json' \
':typescript2/pnpm-lock.yaml' \
':.github/actions/setup-node2/**' \
':.github/workflows/ci.yaml' \
; then
echo "changed=false" >> "$GITHUB_OUTPUT"
else
echo "changed=true" >> "$GITHUB_OUTPUT"
fi
- name: Check if unsafe code changed
id: check_unsafe
env:
MERGE_BASE: ${{ steps.merge_base.outputs.sha }}
run: |
if git diff --quiet "${MERGE_BASE}...HEAD" -- \
':baml_language/crates/bex_heap/**' \
; then
echo "changed=false" >> "$GITHUB_OUTPUT"
else
echo "changed=true" >> "$GITHUB_OUTPUT"
fi
- name: Check if proto sources changed
id: check_proto
env:
MERGE_BASE: ${{ steps.merge_base.outputs.sha }}
run: |
if git diff --quiet "${MERGE_BASE}...HEAD" -- \
':baml_language/crates/bridge_ctypes/**/*.proto' \
; then
echo "changed=false" >> "$GITHUB_OUTPUT"
else
echo "changed=true" >> "$GITHUB_OUTPUT"
fi
# Includes the job's own inputs (workspace manifest with the
# loom/minstant dep versions + check-cfg list, the pinned toolchain,
# and this workflow file) so a change that can break the loom/miri
# build re-runs the gate. Cargo.lock is deliberately excluded: it
# churns on every dep bump and the cargo-tests job already covers the
# std halves of the suite.
- name: Check if profiling ring code changed
id: check_prof
env:
MERGE_BASE: ${{ steps.merge_base.outputs.sha }}
run: |
if git diff --quiet "${MERGE_BASE}...HEAD" -- \
':baml_language/crates/bex_events/**' \
':baml_language/Cargo.toml' \
':baml_language/rust-toolchain.toml' \
':.github/workflows/ci.yaml' \
; then
echo "changed=false" >> "$GITHUB_OUTPUT"
else
echo "changed=true" >> "$GITHUB_OUTPUT"
fi
# Decide whether perf benchmarks (CodSpeed) should run for this event.
# Policy: benchmarks run AFTER MERGE (push to main/canary) only when the
# merge actually touched baml_language/, and on manual dispatch. On PRs
# they are OPT-IN — they no longer run on every code change. A PR opts in
# by mentioning one of the perf markers in its title, body, OR any of its
# commit messages. merge_group is always skipped (the PR already had its
# chance to run).
- name: Check perf benchmark opt-in
id: check_perf
env:
EVENT_NAME: ${{ github.event_name }}
PR_TITLE: ${{ github.event.pull_request.title }}
PR_BODY: ${{ github.event.pull_request.body }}
MERGE_BASE: ${{ steps.merge_base.outputs.sha }}
PUSH_BEFORE: ${{ github.event.before }}
PUSH_AFTER: ${{ github.sha }}
run: |
# Any of these, case-insensitive, in the PR title/body or a commit
# message opts a PR into a perf run. Keep in sync with the instructions
# posted by the "Perf benchmarks (PR notice)" job below.
PATTERN='RUN_CODSPEED=1|run-perf|/perf'
# After merge (push to canary/main): only run when the merge actually
# touched baml_language/. NOTE: we diff the pushed range (before..after)
# directly rather than reuse determine_changes' `code` flag — on a push
# the merge-base of HEAD and origin/<branch> collapses to HEAD, so that
# diff is always empty here. The push payload's before/after are the
# real merged range.
if [ "$EVENT_NAME" = "push" ]; then
if [ -z "$PUSH_BEFORE" ] || [ "$PUSH_BEFORE" = "0000000000000000000000000000000000000000" ]; then
# Branch creation / unknown previous tip — nothing to diff against,
# so run to be safe rather than silently skip.
echo "run=true" >> "$GITHUB_OUTPUT"
exit 0
fi
if git diff --quiet "$PUSH_BEFORE" "$PUSH_AFTER" -- ':baml_language/**'; then
echo "run=false" >> "$GITHUB_OUTPUT"
else
echo "run=true" >> "$GITHUB_OUTPUT"
fi
exit 0
fi
# Manual dispatch always runs.
if [ "$EVENT_NAME" = "workflow_dispatch" ]; then
echo "run=true" >> "$GITHUB_OUTPUT"
exit 0
fi
# Never run in the merge queue — it already ran (or was opted out) on
# the PR.
if [ "$EVENT_NAME" = "merge_group" ]; then
echo "run=false" >> "$GITHUB_OUTPUT"
exit 0
fi
# On PRs: opt in via the PR title/body or any commit message in the PR.
if printf '%s\n%s' "$PR_TITLE" "$PR_BODY" | grep -qiE "$PATTERN"; then
echo "run=true" >> "$GITHUB_OUTPUT"
exit 0
fi
if [ -n "$MERGE_BASE" ] && git log --format='%B' "${MERGE_BASE}..HEAD" | grep -qiE "$PATTERN"; then
echo "run=true" >> "$GITHUB_OUTPUT"
exit 0
fi
echo "run=false" >> "$GITHUB_OUTPUT"
# Lint checks
prek:
name: "Pre-commit Checks"
runs-on: blacksmith-16vcpu-ubuntu-2404
needs: determine_changes
if: needs.determine_changes.outputs.code == 'true' || github.ref == 'refs/heads/main' || github.ref == 'refs/heads/canary'
timeout-minutes: 20
steps:
- name: "Checkout Branch"
uses: useblacksmith/checkout@v1
with:
persist-credentials: false
# Install Rust toolchain BEFORE rust-cache so cache key uses correct Rust version
- name: "Install Rust toolchain"
run: rustup toolchain install
working-directory: baml_language
- uses: Swatinem/rust-cache@v2
with:
workspaces: "baml_language -> target"
# See the cache strategy explanation at the top of cargo-tests.reusable.yaml.
save-if: false
shared-key: "linux-cargo"
# prek hooks invoke cargo (from rustup), mise tasks that shell out
# to cargo, and python scripts that begin with `#!/usr/bin/env -S
# uv run --script` (e.g. validate-markdown). We don't
# need go/ruby/node/etc here, just the prek binary itself + a
# python/uv pair for the hook scripts.
- name: "Install mise"
uses: ./.github/actions/setup-mise
with:
install_args: "cargo:prek python uv"
- name: "Cache prek"
uses: actions/cache@v5
with:
path: ~/.cache/pre-commit
key: pre-commit-${{ hashFiles('.pre-commit-config.yaml') }}
# Separate step so if fetch is slow, build is fast.
- name: "Fetch cargo dependencies"
run: cargo fetch
working-directory: baml_language
- name: "Run prek"
run: |
echo '```console' > "$GITHUB_STEP_SUMMARY"
# Enable color output for prek and remove it for the summary
# Use --hook-stage=manual to enable slower hooks that are skipped by default
SKIP=no-commit-to-branch prek run --all-files --show-diff-on-failure --color always --hook-stage manual | \
tee >(sed -E 's/\x1B\[([0-9]{1,2}(;[0-9]{1,2})*)?[mGK]//g' >> "$GITHUB_STEP_SUMMARY") >&1
exit_code="${PIPESTATUS[0]}"
echo '```' >> "$GITHUB_STEP_SUMMARY"
exit "$exit_code"
release-metadata:
name: "Release Metadata"
runs-on: blacksmith-4vcpu-ubuntu-2404
needs: determine_changes
if: needs.determine_changes.outputs.code == 'true' || github.ref == 'refs/heads/main' || github.ref == 'refs/heads/canary'
timeout-minutes: 5
steps:
- name: "Checkout"
uses: useblacksmith/checkout@v1
with:
fetch-depth: 0
persist-credentials: false
- name: "Validate baml_language release metadata"
run: scripts/baml-language-version check
- name: "Validate canary/nightly version computation"
env:
BAML_LANGUAGE_VERSION_DATE: "20260522"
run: |
set -euo pipefail
canary="$(scripts/baml-language-version show)"
test "$(scripts/baml-language-version compute --channel canary)" = "$canary"
test "$(scripts/baml-language-version compute --channel canary --pypi)" = "$canary"
nightly="$(scripts/baml-language-version compute --channel nightly)"
pypi="$(scripts/baml-language-version compute --channel nightly --pypi)"
python3 - "$canary" "$nightly" "$pypi" <<'PY'
import re
import sys
canary, nightly, pypi = sys.argv[1:]
major, minor, patch = [int(part) for part in canary.split(".")]
base = f"{major}.{minor}.{patch + 1}"
match = re.fullmatch(rf"{re.escape(base)}-nightly\.20260522\.([a-z])", nightly)
if not match:
raise SystemExit(f"unexpected nightly version: {nightly}")
index = ord(match.group(1)) - ord("a")
expected_pypi = f"{base}.dev20260522{index:02d}"
if pypi != expected_pypi:
raise SystemExit(f"unexpected nightly PyPI version: {pypi} != {expected_pypi}")
PY
# Call reusable workflows
cargo-tests:
name: "Cargo Tests"
needs: determine_changes
if: needs.determine_changes.outputs.code == 'true' || github.ref == 'refs/heads/main' || github.ref == 'refs/heads/canary'
uses: ./.github/workflows/cargo-tests.reusable.yaml
secrets: inherit
size-gate:
name: "Size Gate"
needs: determine_changes
if: needs.determine_changes.outputs.code == 'true' || github.ref == 'refs/heads/main' || github.ref == 'refs/heads/canary'
uses: ./.github/workflows/size-gate.reusable.yaml
secrets: inherit
permissions:
contents: read
pull-requests: write
wasm-pack-tests:
name: "WASM Pack Tests"
needs: determine_changes
if: needs.determine_changes.outputs.code == 'true' || github.ref == 'refs/heads/main' || github.ref == 'refs/heads/canary'
uses: ./.github/workflows/wasm-pack-tests.reusable.yaml
secrets: inherit
docs:
name: "Docs"
needs: determine_changes
uses: ./.github/workflows/docs.reusable.yaml
with:
docs_changed: ${{ needs.determine_changes.outputs.docs }}
is_canary: ${{ github.ref == 'refs/heads/canary' }}
secrets: inherit
permissions: write-all
webview-tests:
name: "Webview Tests"
needs: determine_changes
if: needs.determine_changes.outputs.webview == 'true' || github.ref == 'refs/heads/main' || github.ref == 'refs/heads/canary'
uses: ./.github/workflows/webview-tests.reusable.yaml
secrets: inherit
grammar-tests:
name: "Grammar Tests"
runs-on: blacksmith-4vcpu-ubuntu-2404
needs: determine_changes
if: needs.determine_changes.outputs.grammar == 'true' || github.ref == 'refs/heads/main' || github.ref == 'refs/heads/canary'
steps:
- name: "Checkout Branch"
uses: useblacksmith/checkout@v1
with:
persist-credentials: false
- name: "Setup Node.js for typescript2"
uses: ./.github/actions/setup-node2
- name: "Run grammar snapshot tests"
run: pnpm --filter @b/pkg-grammar test
working-directory: typescript2
miri-tests:
name: "Miri (unsafe code verification)"
runs-on: blacksmith-4vcpu-ubuntu-2404
needs: determine_changes
# TEMPORARILY DISABLED: miri tests timing out at 25 minutes, blocking CI/CD
# TODO: Re-enable once miri test performance is addressed
if: false # needs.determine_changes.outputs.unsafe == 'true'
timeout-minutes: 25
steps:
- name: "Checkout"
uses: useblacksmith/checkout@v1
with:
persist-credentials: false
- name: "Install Rust nightly and Miri"
run: |
rustup toolchain install nightly --component miri
rustup override set nightly
working-directory: baml_language
# Separate step so if fetch is slow, build is fast.
- name: "Fetch cargo dependencies"
run: cargo +nightly fetch
working-directory: baml_language
- name: "Run Miri tests on bex_heap"
run: cargo miri test -p bex_heap --lib
working-directory: baml_language
# Loom + Miri verification for the lock-free profiling ring
# (bex_events::prof). Scoped to the prof:: tests so it stays minutes-fast —
# deliberately narrower than the (disabled) whole-crate miri-tests job
# above, whose bex_heap runs were timing out.
prof-concurrency:
name: "Profiling ring (loom + miri)"
runs-on: blacksmith-4vcpu-ubuntu-2404
needs: determine_changes
if: needs.determine_changes.outputs.prof == 'true'
timeout-minutes: 45
steps:
- name: "Checkout"
uses: useblacksmith/checkout@v1
with:
persist-credentials: false
# Install Rust toolchain BEFORE rust-cache so cache key uses correct Rust version.
- name: "Install Rust toolchain"
run: rustup toolchain install
working-directory: baml_language
- uses: Swatinem/rust-cache@v2
with:
workspaces: "baml_language -> target"
shared-key: "prof-concurrency"
- name: "Fetch cargo dependencies"
run: cargo fetch
working-directory: baml_language
# The model checker explores every interleaving (bounded at 3
# preemptions, set in the test harness) of the ring's
# producer/consumer/lifecycle protocols. The custom cfg name
# (baml_loom, not the conventional loom) keeps the flag from
# half-activating loom support in third-party deps (e.g. boxcar) that
# gate on cfg(loom) but need their own loom feature enabled to compile.
- name: "Loom model checking (bex_events::prof)"
run: 'cargo test -p bex_events --release --lib prof::'
working-directory: baml_language
env:
RUSTFLAGS: "--cfg baml_loom"
CARGO_TARGET_DIR: target/loom
- name: "Install Rust nightly and Miri"
run: rustup toolchain install nightly --component miri
working-directory: baml_language
# Miri checks the raw-pointer/UnsafeCell discipline of the same
# scenarios on real threads. Leaked rings are by design (&'static
# lifetime model); isolation is off for park_timeout/sleep in the
# stress tests.
- name: "Miri (bex_events::prof)"
run: 'cargo +nightly miri test -p bex_events --lib prof::'
working-directory: baml_language
env:
MIRIFLAGS: "-Zmiri-ignore-leaks -Zmiri-disable-isolation"
proto-sync:
name: "proto generated files sync"
runs-on: blacksmith-4vcpu-ubuntu-2404
needs: determine_changes
if: needs.determine_changes.outputs.proto == 'true'
timeout-minutes: 20
steps:
- name: "Checkout"
uses: useblacksmith/checkout@v1
with:
persist-credentials: false
# Install Rust toolchain BEFORE rust-cache so cache key uses correct Rust version.
- name: "Install Rust toolchain"
run: rustup toolchain install
working-directory: baml_language
- uses: Swatinem/rust-cache@v2
with:
workspaces: "baml_language -> target"
# See the cache strategy explanation at the top of cargo-tests.reusable.yaml.
save-if: false
shared-key: "linux-cargo"
# bridge_ctypes/build.rs needs `protoc`; bridge_go/build.sh needs
# `protoc` + `protoc-gen-go`; bridge_nodejs needs node + pnpm.
# The `cargo build -p bridge_ctypes` step uses cargo from rustup
# (not mise), so we don't need `go` itself here either.
- name: "Install mise"
uses: ./.github/actions/setup-mise
with:
install_args: "node npm:pnpm protoc protoc-gen-go"
# Separate step so if fetch is slow, build is fast.
- name: "Fetch cargo dependencies"
run: cargo fetch
working-directory: baml_language
# Rust (prost) + Python (protoc) — both driven by bridge_ctypes/build.rs.
# Writes Python pb2/pyi into baml_language/sdks/python/src/baml_core/cffi/v1/.
- name: "Generate Rust + Python proto bindings (bridge_ctypes)"
run: cargo build -p bridge_ctypes
working-directory: baml_language
# Go (protoc-gen-go) — writes into sdks/go/bridge_go/cffi/proto/baml_core/cffi/v1/.
- name: "Generate Go proto bindings (sdks/go/bridge_go)"
run: ./build.sh
working-directory: baml_language/sdks/go/bridge_go
# --ignore-workspace: install from bridge_nodejs's own pnpm-lock.yaml
# rather than the root workspace lockfile. The two resolve different
# protobufjs-cli versions, and pbjs codegen output (committed under
# typescript_src/proto/) must come from the version this package pins.
# --ignore-scripts: skip dependency build scripts (esbuild's binary
# fetch, protobufjs postinstall) — none are needed for pbjs/tsc/napi
# codegen, and pnpm errors on unapproved build scripts otherwise.
# Same flags as sdk_tests/crates/typescript_node/setup.sh.
- name: "Install Node SDK dependencies"
run: pnpm install --frozen-lockfile --ignore-workspace --ignore-scripts
working-directory: baml_language/sdks/nodejs/bridge_nodejs
# Node / TypeScript (protobufjs) — writes into sdks/nodejs/bridge_nodejs/typescript_src/proto/.
# build:debug also regenerates typescript2/pkg-proto/src/generated/ (ts-proto / buf).
- name: "Generate Node/TypeScript proto bindings (sdks/nodejs/bridge_nodejs)"
run: pnpm build:debug
working-directory: baml_language/sdks/nodejs/bridge_nodejs
- name: "Check generated proto files are in sync"
run: |
# Check for both modified tracked files and untracked files across every
# codegen output documented in baml_language/crates/bridge_ctypes/README.md.
PATHS=(
baml_language/sdks/nodejs/bridge_nodejs
baml_language/crates/bridge_ctypes
baml_language/sdks/go/bridge_go
baml_language/sdks/python/src/baml_core/cffi
typescript2/pkg-proto
)
README="baml_language/crates/bridge_ctypes/README.md"
STATUS=$(git status --porcelain -- "${PATHS[@]}")
if [ -n "$STATUS" ]; then
echo "::error::proto generated files are out of sync — consult ${README} for the regeneration commands and commit the resulting changes."
echo ""
echo "The following files are out of sync:"
echo "$STATUS"
echo ""
echo "===== ${README} ====="
cat "${README}"
echo "===== end of ${README} ====="
echo ""
git diff -- "${PATHS[@]}"
exit 1
fi
echo "All generated proto files are in sync."
# Benchmarks are split into two jobs so the slow compile happens on a fast,
# cheap machine while the actual measurement happens on a stable CodSpeed
# macro runner.
#
# benchmarks-build -> compiles the bench binaries (the part that used to
# time out on the small CodSpeed runner)
# benchmarks-run -> ONLY executes those prebuilt binaries on codspeed-macro
#
# CRITICAL — no double-build / flag mismatch:
# `cargo codspeed build` compiles each bench target with CodSpeed's own
# compilation flags and copies the resulting executables into
# target/codspeed/walltime/<pkg>/<bench>. `cargo codspeed run` then ONLY
# executes those copies — it never invokes the compiler. So the run job
# cannot accidentally rebuild with default flags; if the artifact is missing
# it errors ("No benchmarks found") instead of measuring the wrong binary.
#
# CRITICAL — ABI match:
# codspeed-macro is an ARM64 machine, so the binaries MUST be built on ARM64
# too. We build on the OLDEST Ubuntu we have an ARM runner for (22.04,
# glibc 2.35) so the binaries stay forward-compatible with whatever (newer
# or equal) glibc the macro runner ships.
benchmarks-build:
name: "benchmarks build (baml)"
runs-on: blacksmith-8vcpu-ubuntu-2204-arm
needs: determine_changes
# Perf benchmarks run AFTER MERGE (push to main/canary, only when the merge
# touched baml_language/) and on manual dispatch. On PRs they are OPT-IN: a
# PR runs them only when its title/body or a commit message mentions a perf
# marker (RUN_CODSPEED=1, run-perf, /perf). See the `check_perf` step for the
# policy
# and the "Perf benchmarks (PR notice)" job for the comment PRs get when
# benchmarks are skipped. Skipped in merge_group (already settled on the PR).
if: needs.determine_changes.outputs.run_perf == 'true'
timeout-minutes: 30
steps:
- name: "Checkout Branch"
uses: actions/checkout@v6
with:
persist-credentials: false
# Install Rust toolchain BEFORE rust-cache so cache key uses correct Rust version
- name: "Install Rust toolchain"
run: rustup show
working-directory: baml_language
# Swatinem caches the compiled target/ for this build job's own speed
# (canary is the sole saver; PRs restore read-only). It is bench-specific
# on purpose — its only saver compiles just baml_tests benches, a thin
# seed not worth sharing with a general ARM job.
- uses: Swatinem/rust-cache@v2
with:
workspaces: "baml_language -> target"
save-if: ${{ github.ref == 'refs/heads/canary' }}
cache-all-crates: true
cache-workspace-crates: true
shared-key: "linux-arm-bench"
# Pin to the same major+minor as codspeed-divan-compat in Cargo.lock
# (4.7.0). cargo codspeed run aborts on a major-version mismatch.
- name: "Install cargo-codspeed"
uses: taiki-e/install-action@v2
with:
tool: cargo-codspeed@4.7.0
# Separate step so if fetch is slow, build is fast.
- name: "Fetch cargo dependencies"
run: cargo fetch
working-directory: baml_language
# Only runtime_benchmark — cache_profile is a macOS/kperf harness that's a
# no-op on Linux, so there's no reason to build or run it here.
- name: "Build benchmarks (walltime)"
run: cargo codspeed build -p baml_tests --bench runtime_benchmark -m walltime
working-directory: baml_language
- name: "Upload prebuilt benchmark binaries"
uses: actions/upload-artifact@v7
with:
name: codspeed-walltime-benches
path: baml_language/target/codspeed/walltime
if-no-files-found: error
retention-days: 1
# Hand the resolved cargo home to benchmarks-run via an ARTIFACT, not a
# cache. actions/cache addresses entries by (key, version) where version
# depends on the runner's compression tooling, so a cache saved on this
# blacksmith runner is invisible to the codspeed-macro run job even with an
# identical key (verified: same key 4f01266e…, "Cache not found"). Artifacts
# are content-addressed by name and transfer cleanly across runners.
# benchmarks-run untars this into ~/.cargo so its offline `cargo metadata`
# has every dep locally — no ~24-min cold git re-clone.
# Exclude registry/src (extracted dep sources — the bulk of ~/.cargo). The
# run job only does offline `cargo metadata`, which needs the registry
# index + git checkouts, not the unpacked sources, so this keeps the
# artifact (and the run job's download/untar) small.
- name: "Pack cargo home for benchmarks-run"
run: tar -C "$HOME" --exclude='.cargo/registry/src' -cf cargo-home.tar .cargo/registry .cargo/git
- name: "Upload cargo home"
uses: actions/upload-artifact@v7
with:
name: codspeed-cargo-home
path: cargo-home.tar
if-no-files-found: error
retention-days: 1
benchmarks-run:
name: "benchmarks instrumented (baml)"
runs-on: codspeed-macro
needs:
- determine_changes
- benchmarks-build
# Same gate as benchmarks-build — see determine_changes' `check_perf` step.
if: needs.determine_changes.outputs.run_perf == 'true'
timeout-minutes: 30
steps:
- name: "Checkout Branch"
uses: actions/checkout@v6
with:
persist-credentials: false
# `cargo codspeed run` needs `cargo metadata` to locate the benches and
# their working directories. It does NOT compile anything here.
- name: "Install Rust toolchain"
run: rustup show
working-directory: baml_language
# Restore the resolved cargo home that benchmarks-build packed, so this
# job's offline `cargo metadata` has every dep locally instead of cold
# re-cloning git deps on the bare codspeed-macro runner (~24 min). Comes
# via artifact, not cache — see the build job for why cache misses here.
- name: "Download cargo home"
uses: actions/download-artifact@v7
with:
name: codspeed-cargo-home
path: /tmp/cargo-home
- name: "Unpack cargo home into ~/.cargo"
run: tar -C "$HOME" -xf /tmp/cargo-home/cargo-home.tar
- name: "Install cargo-codspeed"
uses: taiki-e/install-action@v2
with:
tool: cargo-codspeed@4.7.0
- name: "Download prebuilt benchmark binaries"
uses: actions/download-artifact@v7
with:
name: codspeed-walltime-benches
path: baml_language/target/codspeed/walltime
# actions/upload-artifact drops the Unix executable bit; restore it so the
# binaries can be exec'd by `cargo codspeed run`.
- name: "Restore executable bit on bench binaries"
run: chmod -R +x baml_language/target/codspeed/walltime
- name: "Run benchmarks (walltime)"
uses: CodSpeedHQ/action@v4
# Benchmarks are advisory — never block a merge on a measurement blip.
continue-on-error: true
env:
# The cargo home is restored above, so `cargo metadata` has every dep
# locally. Force offline so it physically cannot stall on the network
# (no index update, no git fetch) — turns the old ~24-min metadata
# phase into seconds. Fails fast and loud if the cache ever misses,
# instead of silently re-cloning for half an hour.
CARGO_NET_OFFLINE: "true"
# Adaptive sampling: cap each bench at ~2s of wall time instead of
# CodSpeed walltime's fixed 100 samples/bench. divan then runs as many
# samples as fit the budget — ~100 for cheap benches (full statistical
# power kept), down to 1 for the heavy O(n²)/1M-iter workloads (whose
# run-to-run variance is already tiny, so a single sample suffices).
#
# WHY: at fixed 100 samples the suite blew past the 30-min timeout. The
# long pole was compute::bubble_sort_5k — ~14s/sample on the (≈5×
# slower than local) codspeed-macro runner × 100 = ~23min for ONE
# bench. With this cap the whole 36-bench suite finishes in ~2 min.
# Measured locally: fixed-100 bubble_sort alone >90s; with the budget
# the full suite ran in 80s. Tune the budget here, not in code.
DIVAN_MAX_TIME: "2"
with:
mode: walltime
# Executes ONLY the prebuilt binaries downloaded above; no compiler runs.
#
# Every bench is a `vm_speedtest_*` function auto-generated from the
# cross-language workload corpus under tools/speedtest/workloads/*.md
# (see baml_tests/build.rs). They measure *pure VM execution*: the BAML
# is compiled and the tokio runtime is built once, OUTSIDE the measured
# region (see bench_vm_main), so only `main()` is timed. Sleep-based
# workloads are excluded at build time.
#
# The `vm_speedtest` filter selects exactly that suite; DIVAN_MAX_TIME
# (above) bounds each bench so the pass stays well under the timeout.
run: cd baml_language && cargo codspeed run -m walltime 'vm_speedtest'
token: ${{ secrets.CODSPEED_TOKEN }}
# Leaves (and keeps up to date) a single sticky comment on each PR explaining
# the perf-benchmark policy: either confirming benchmarks were triggered, or —
# when they were skipped — telling the author exactly how to opt in. Runs on
# every PR event so the same comment is edited in place (no spam) and flips
# between the two states as the author edits the PR / pushes commits.
perf-pr-notice:
name: "Perf benchmarks (PR notice)"
runs-on: blacksmith-4vcpu-ubuntu-2404
needs: determine_changes
if: github.event_name == 'pull_request' && github.event.pull_request.head.repo.fork == false
permissions:
pull-requests: write
steps:
- name: "Comment perf opt-in instructions"
uses: actions/github-script@v7
env:
RUN_PERF: ${{ needs.determine_changes.outputs.run_perf }}
with:
script: |
// Hidden marker so we find & update our own comment instead of
// posting a new one on every push.
const marker = '<!-- perf-benchmarks-pr-notice -->';
const triggered = process.env.RUN_PERF === 'true';
const body = triggered
? [
marker,
'### 🏎️ Performance benchmarks are running for this PR',
'',
'CodSpeed perf benchmarks were triggered because this PR opted in. ' +
'Results will appear in the CodSpeed check / dashboard once they finish.',
].join('\n')
: [
marker,
'### ⏭️ Performance benchmarks were skipped',
'',
'Perf benchmarks (CodSpeed) are **opt-in** on pull requests — they no ' +
'longer run on every push. They always run automatically after merge ' +
'to `canary`/`main`.',
'',
'To run them on **this** PR, do any of the following, then push a commit ' +
'(or re-run CI):',
'',
'- Add `RUN_CODSPEED=1` to the PR description, **or**',
'- Include `run-perf` or `/perf` in the PR title or any commit message.',
].join('\n');
const { owner, repo } = context.repo;
const issue_number = context.issue.number;
const comments = await github.paginate(github.rest.issues.listComments, {
owner, repo, issue_number,
});
const existing = comments.find((c) => c.body && c.body.includes(marker));
if (existing) {
await github.rest.issues.updateComment({ owner, repo, comment_id: existing.id, body });
} else {
await github.rest.issues.createComment({ owner, repo, issue_number, body });
}
# CI Failure Alert - Required status check using "skipped = success" pattern
# See: https://devopsdirective.com/posts/2025/08/github-actions-required-checks-for-conditional-jobs/
#
# This job exploits GitHub's behavior where skipped jobs report as "Success".
# - When all tests pass (or are intentionally skipped): this job SKIPS → reports SUCCESS
# - When any test fails or is cancelled: this job RUNS and FAILS → blocks merge
#
# Configure "CI - BAML Language / CI Failure Alert" as a required status check.
ci-failure-alert:
name: "CI-v2 Failure Alert"
runs-on: blacksmith-4vcpu-ubuntu-2404
needs:
- prek
- release-metadata
- cargo-tests
- wasm-pack-tests
- docs
- webview-tests
- grammar-tests
# - miri-tests # TEMPORARILY DISABLED: miri tests timing out
- prof-concurrency
- proto-sync
# NOTE: size-gate is intentionally NOT a required gate. It is an
# informational signal (PR comment + daily baseline auto-refresh), not
# a merge blocker: a size bump must never block the merge queue or force
# a full CI re-run. Drift is caught by the daily refresh PR and by
# reviewers eyeballing the size-gate comment, not by failing CI here.
# Only run if something failed or was cancelled (otherwise skip → success)
if: ${{ failure() || cancelled() }}
steps:
- name: Report failure
run: |
echo "## ❌ CI Failed" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
echo "One or more required jobs failed or were cancelled." >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
echo "| Job | Result |" >> $GITHUB_STEP_SUMMARY
echo "|-----|--------|" >> $GITHUB_STEP_SUMMARY
echo "| prek | ${{ needs.prek.result }} |" >> $GITHUB_STEP_SUMMARY
echo "| release-metadata | ${{ needs.release-metadata.result }} |" >> $GITHUB_STEP_SUMMARY
echo "| cargo-tests | ${{ needs.cargo-tests.result }} |" >> $GITHUB_STEP_SUMMARY
echo "| wasm-pack-tests | ${{ needs.wasm-pack-tests.result }} |" >> $GITHUB_STEP_SUMMARY
echo "| docs | ${{ needs.docs.result }} |" >> $GITHUB_STEP_SUMMARY
echo "| webview-tests | ${{ needs.webview-tests.result }} |" >> $GITHUB_STEP_SUMMARY
echo "| grammar-tests | ${{ needs.grammar-tests.result }} |" >> $GITHUB_STEP_SUMMARY
# echo "| miri-tests | ${{ needs.miri-tests.result }} |" >> $GITHUB_STEP_SUMMARY # TEMPORARILY DISABLED
echo "| prof-concurrency | ${{ needs.prof-concurrency.result }} |" >> $GITHUB_STEP_SUMMARY
echo "| proto-sync | ${{ needs.proto-sync.result }} |" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
echo "::error::One or more CI jobs failed!"
exit 1