Preserve the ErrorContext cause chain across a defer on the unwind pa… #6092

Workflow file for this run

	name: CI - BAML Language
	permissions:
	contents: read
	id-token: write
	pull-requests: write
	on:
	push:
	branches: [main, canary]
	pull_request:
	merge_group:
	types: [checks_requested]
	workflow_dispatch:

	concurrency:
	group: ${{ github.workflow }}-${{ github.ref_name }}-${{ github.event.pull_request.number \|\| github.sha }}
	cancel-in-progress: true

	defaults:
	run:
	shell: bash

	env:
	CARGO_INCREMENTAL: 0
	CARGO_NET_RETRY: 10
	CARGO_TERM_COLOR: always
	RUSTUP_MAX_RETRIES: 10

	jobs:
	determine_changes:
	name: "Determine changes"
	runs-on: blacksmith-4vcpu-ubuntu-2404
	outputs:
	# Flag that is raised when any code in baml_language is changed
	code: ${{ steps.check_code.outputs.changed }}
	# Flag for lexer changes
	lexer: ${{ steps.check_lexer.outputs.changed }}
	# Flag for parser changes
	parser: ${{ steps.check_parser.outputs.changed }}
	# Flag for HIR changes
	hir: ${{ steps.check_hir.outputs.changed }}
	# Flag for THIR changes
	thir: ${{ steps.check_thir.outputs.changed }}
	# Flag for codegen changes
	codegen: ${{ steps.check_codegen.outputs.changed }}
	# Flag for docs/frontend changes
	docs: ${{ steps.check_docs.outputs.changed }}
	# Flag for typescript2/webview changes
	webview: ${{ steps.check_webview.outputs.changed }}
	# Flag for typescript2 grammar changes
	grammar: ${{ steps.check_grammar.outputs.changed }}
	# Flag for unsafe code changes (bex_heap) - triggers Miri tests
	unsafe: ${{ steps.check_unsafe.outputs.changed }}
	# Flag for proto codegen changes - triggers generated file sync check
	proto: ${{ steps.check_proto.outputs.changed }}
	# Flag for profiling-ring changes (bex_events) - triggers loom + miri
	prof: ${{ steps.check_prof.outputs.changed }}
	# Flag that is raised when perf benchmarks (CodSpeed) should run: always
	# after merge (push to main/canary) and on manual dispatch; on PRs only
	# when the PR title/body or a commit message opts in.
	run_perf: ${{ steps.check_perf.outputs.run }}
	steps:
	- uses: useblacksmith/checkout@v1
	with:
	fetch-depth: 0
	persist-credentials: false

	- name: Determine merge base
	id: merge_base
	env:
	BASE_REF: ${{ github.event.pull_request.base.ref \|\| 'canary' }}
	run: \|
	sha=$(git merge-base HEAD "origin/${BASE_REF}")
	echo "sha=${sha}" >> "$GITHUB_OUTPUT"

	- name: Check if code changed
	id: check_code
	env:
	MERGE_BASE: ${{ steps.merge_base.outputs.sha }}
	run: \|
	if git diff --quiet "${MERGE_BASE}...HEAD" -- \
	':baml_language/**' \
	':scripts/baml-language-version' \
	':scripts/baml-wrapper-version' \
	':scripts/baml-release-manifests' \
	':scripts/baml-package-manager-artifacts' \
	':scripts/install.sh' \
	':scripts/install.ps1' \
	':packaging/aur/**' \
	':tools/pkg_boundaryml_com/**' \
	':.github/workflows/ci.yaml' \
	':.github/workflows/release-baml-language.yml' \
	':.github/workflows/build2-python-sdk.reusable.yaml' \
	':.github/workflows/build2-nodejs-sdk.reusable.yaml' \
	':.github/workflows/cargo-tests.reusable.yaml' \
	':.github/workflows/size-gate.reusable.yaml' \
	':.github/workflows/wasm-pack-tests.reusable.yaml' \
	; then
	echo "changed=false" >> "$GITHUB_OUTPUT"
	else
	echo "changed=true" >> "$GITHUB_OUTPUT"
	fi

	- name: Check if lexer code changed
	id: check_lexer
	env:
	MERGE_BASE: ${{ steps.merge_base.outputs.sha }}
	run: \|
	if git diff --quiet "${MERGE_BASE}...HEAD" -- \
	':baml_language/crates/baml_compiler_lexer/**' \
	':baml_language/crates/baml_base/**' \
	':baml_language/Cargo.toml' \
	':baml_language/Cargo.lock' \
	; then
	echo "changed=false" >> "$GITHUB_OUTPUT"
	else
	echo "changed=true" >> "$GITHUB_OUTPUT"
	fi

	- name: Check if parser code changed
	id: check_parser
	env:
	MERGE_BASE: ${{ steps.merge_base.outputs.sha }}
	run: \|
	if git diff --quiet "${MERGE_BASE}...HEAD" -- \
	':baml_language/crates/baml_compiler_parser/**' \
	':baml_language/crates/baml_compiler_syntax/**' \
	':baml_language/crates/baml_compiler_lexer/**' \
	':baml_language/crates/baml_base/**' \
	; then
	echo "changed=false" >> "$GITHUB_OUTPUT"
	else
	echo "changed=true" >> "$GITHUB_OUTPUT"
	fi

	- name: Check if HIR code changed
	id: check_hir
	env:
	MERGE_BASE: ${{ steps.merge_base.outputs.sha }}
	run: \|
	if git diff --quiet "${MERGE_BASE}...HEAD" -- \
	':baml_language/crates/baml_compiler_hir/**' \
	':baml_language/crates/baml_workspace/**' \
	':baml_language/crates/baml_compiler_parser/**' \
	':baml_language/crates/baml_base/**' \
	; then
	echo "changed=false" >> "$GITHUB_OUTPUT"
	else
	echo "changed=true" >> "$GITHUB_OUTPUT"
	fi

	- name: Check if THIR code changed
	id: check_thir
	env:
	MERGE_BASE: ${{ steps.merge_base.outputs.sha }}
	run: \|
	if git diff --quiet "${MERGE_BASE}...HEAD" -- \
	':baml_language/crates/baml_thir/**' \
	':baml_language/crates/baml_compiler_hir/**' \
	':baml_language/crates/baml_base/**' \
	; then
	echo "changed=false" >> "$GITHUB_OUTPUT"
	else
	echo "changed=true" >> "$GITHUB_OUTPUT"
	fi

	- name: Check if codegen code changed
	id: check_codegen
	env:
	MERGE_BASE: ${{ steps.merge_base.outputs.sha }}
	run: \|
	if git diff --quiet "${MERGE_BASE}...HEAD" -- \
	':baml_language/crates/baml_compiler_emit/**' \
	':baml_language/crates/baml_thir/**' \
	':baml_language/crates/baml_base/**' \
	; then
	echo "changed=false" >> "$GITHUB_OUTPUT"
	else
	echo "changed=true" >> "$GITHUB_OUTPUT"
	fi

	- name: Check if docs changed
	id: check_docs
	env:
	MERGE_BASE: ${{ steps.merge_base.outputs.sha }}
	run: \|
	if git diff --quiet "${MERGE_BASE}...HEAD" -- \
	':fern/**' \
	':typescript/apps/ask-baml-client/**' \
	':typescript/apps/sage-backend/**' \
	':.github/workflows/docs.reusable.yaml' \
	; then
	echo "changed=false" >> "$GITHUB_OUTPUT"
	else
	echo "changed=true" >> "$GITHUB_OUTPUT"
	fi

	- name: Check if webview changed
	id: check_webview
	env:
	MERGE_BASE: ${{ steps.merge_base.outputs.sha }}
	run: \|
	if git diff --quiet "${MERGE_BASE}...HEAD" -- \
	':typescript2/app-vscode-webview/**' \
	':typescript2/pkg-playground/**' \
	':typescript2/pkg-proto/**' \
	':baml_language/crates/bridge_ctypes/types/**' \
	':.github/workflows/webview-tests.reusable.yaml' \
	; then
	echo "changed=false" >> "$GITHUB_OUTPUT"
	else
	echo "changed=true" >> "$GITHUB_OUTPUT"
	fi

	- name: Check if grammar changed
	id: check_grammar
	env:
	MERGE_BASE: ${{ steps.merge_base.outputs.sha }}
	run: \|
	if git diff --quiet "${MERGE_BASE}...HEAD" -- \
	':typescript2/pkg-grammar/**' \
	':typescript2/app-vscode-ext/syntaxes/**' \
	':typescript2/app-vscode-ext/package.json' \
	':typescript2/pnpm-lock.yaml' \
	':.github/actions/setup-node2/**' \
	':.github/workflows/ci.yaml' \
	; then
	echo "changed=false" >> "$GITHUB_OUTPUT"
	else
	echo "changed=true" >> "$GITHUB_OUTPUT"
	fi

	- name: Check if unsafe code changed
	id: check_unsafe
	env:
	MERGE_BASE: ${{ steps.merge_base.outputs.sha }}
	run: \|
	if git diff --quiet "${MERGE_BASE}...HEAD" -- \
	':baml_language/crates/bex_heap/**' \
	; then
	echo "changed=false" >> "$GITHUB_OUTPUT"
	else
	echo "changed=true" >> "$GITHUB_OUTPUT"
	fi

	- name: Check if proto sources changed
	id: check_proto
	env:
	MERGE_BASE: ${{ steps.merge_base.outputs.sha }}
	run: \|
	if git diff --quiet "${MERGE_BASE}...HEAD" -- \
	':baml_language/crates/bridge_ctypes/*/.proto' \
	; then
	echo "changed=false" >> "$GITHUB_OUTPUT"
	else
	echo "changed=true" >> "$GITHUB_OUTPUT"
	fi

	# Includes the job's own inputs (workspace manifest with the
	# loom/minstant dep versions + check-cfg list, the pinned toolchain,
	# and this workflow file) so a change that can break the loom/miri
	# build re-runs the gate. Cargo.lock is deliberately excluded: it
	# churns on every dep bump and the cargo-tests job already covers the
	# std halves of the suite.
	- name: Check if profiling ring code changed
	id: check_prof
	env:
	MERGE_BASE: ${{ steps.merge_base.outputs.sha }}
	run: \|
	if git diff --quiet "${MERGE_BASE}...HEAD" -- \
	':baml_language/crates/bex_events/**' \
	':baml_language/Cargo.toml' \
	':baml_language/rust-toolchain.toml' \
	':.github/workflows/ci.yaml' \
	; then
	echo "changed=false" >> "$GITHUB_OUTPUT"
	else
	echo "changed=true" >> "$GITHUB_OUTPUT"
	fi

	# Decide whether perf benchmarks (CodSpeed) should run for this event.
	# Policy: benchmarks run AFTER MERGE (push to main/canary) only when the
	# merge actually touched baml_language/, and on manual dispatch. On PRs
	# they are OPT-IN — they no longer run on every code change. A PR opts in
	# by mentioning one of the perf markers in its title, body, OR any of its
	# commit messages. merge_group is always skipped (the PR already had its
	# chance to run).
	- name: Check perf benchmark opt-in
	id: check_perf
	env:
	EVENT_NAME: ${{ github.event_name }}
	PR_TITLE: ${{ github.event.pull_request.title }}
	PR_BODY: ${{ github.event.pull_request.body }}
	MERGE_BASE: ${{ steps.merge_base.outputs.sha }}
	PUSH_BEFORE: ${{ github.event.before }}
	PUSH_AFTER: ${{ github.sha }}
	run: \|
	# Any of these, case-insensitive, in the PR title/body or a commit
	# message opts a PR into a perf run. Keep in sync with the instructions
	# posted by the "Perf benchmarks (PR notice)" job below.
	PATTERN='RUN_CODSPEED=1\|run-perf\|/perf'

	# After merge (push to canary/main): only run when the merge actually
	# touched baml_language/. NOTE: we diff the pushed range (before..after)
	# directly rather than reuse determine_changes' `code` flag — on a push
	# the merge-base of HEAD and origin/<branch> collapses to HEAD, so that
	# diff is always empty here. The push payload's before/after are the
	# real merged range.
	if [ "$EVENT_NAME" = "push" ]; then
	if [ -z "$PUSH_BEFORE" ] \|\| [ "$PUSH_BEFORE" = "0000000000000000000000000000000000000000" ]; then
	# Branch creation / unknown previous tip — nothing to diff against,
	# so run to be safe rather than silently skip.
	echo "run=true" >> "$GITHUB_OUTPUT"
	exit 0
	fi
	if git diff --quiet "$PUSH_BEFORE" "$PUSH_AFTER" -- ':baml_language/**'; then
	echo "run=false" >> "$GITHUB_OUTPUT"
	else
	echo "run=true" >> "$GITHUB_OUTPUT"
	fi
	exit 0
	fi

	# Manual dispatch always runs.
	if [ "$EVENT_NAME" = "workflow_dispatch" ]; then
	echo "run=true" >> "$GITHUB_OUTPUT"
	exit 0
	fi

	# Never run in the merge queue — it already ran (or was opted out) on
	# the PR.
	if [ "$EVENT_NAME" = "merge_group" ]; then
	echo "run=false" >> "$GITHUB_OUTPUT"
	exit 0
	fi

	# On PRs: opt in via the PR title/body or any commit message in the PR.
	if printf '%s\n%s' "$PR_TITLE" "$PR_BODY" \| grep -qiE "$PATTERN"; then
	echo "run=true" >> "$GITHUB_OUTPUT"
	exit 0
	fi
	if [ -n "$MERGE_BASE" ] && git log --format='%B' "${MERGE_BASE}..HEAD" \| grep -qiE "$PATTERN"; then
	echo "run=true" >> "$GITHUB_OUTPUT"
	exit 0
	fi

	echo "run=false" >> "$GITHUB_OUTPUT"

	# Lint checks
	prek:
	name: "Pre-commit Checks"
	runs-on: blacksmith-16vcpu-ubuntu-2404
	needs: determine_changes
	if: needs.determine_changes.outputs.code == 'true' \|\| github.ref == 'refs/heads/main' \|\| github.ref == 'refs/heads/canary'
	timeout-minutes: 20
	steps:
	- name: "Checkout Branch"
	uses: useblacksmith/checkout@v1
	with:
	persist-credentials: false

	# Install Rust toolchain BEFORE rust-cache so cache key uses correct Rust version
	- name: "Install Rust toolchain"
	run: rustup toolchain install
	working-directory: baml_language

	- uses: Swatinem/rust-cache@v2
	with:
	workspaces: "baml_language -> target"
	# See the cache strategy explanation at the top of cargo-tests.reusable.yaml.
	save-if: false
	shared-key: "linux-cargo"

	# prek hooks invoke cargo (from rustup), mise tasks that shell out
	# to cargo, and python scripts that begin with `#!/usr/bin/env -S
	# uv run --script` (e.g. validate-markdown). We don't
	# need go/ruby/node/etc here, just the prek binary itself + a
	# python/uv pair for the hook scripts.
	- name: "Install mise"
	uses: ./.github/actions/setup-mise
	with:
	install_args: "cargo:prek python uv"

	- name: "Cache prek"
	uses: actions/cache@v5
	with:
	path: ~/.cache/pre-commit
	key: pre-commit-${{ hashFiles('.pre-commit-config.yaml') }}

	# Separate step so if fetch is slow, build is fast.
	- name: "Fetch cargo dependencies"
	run: cargo fetch
	working-directory: baml_language

	- name: "Run prek"
	run: \|
	echo '```console' > "$GITHUB_STEP_SUMMARY"
	# Enable color output for prek and remove it for the summary
	# Use --hook-stage=manual to enable slower hooks that are skipped by default
	SKIP=no-commit-to-branch prek run --all-files --show-diff-on-failure --color always --hook-stage manual \| \
	tee >(sed -E 's/\x1B\[([0-9]{1,2}(;[0-9]{1,2})*)?[mGK]//g' >> "$GITHUB_STEP_SUMMARY") >&1
	exit_code="${PIPESTATUS[0]}"
	echo '```' >> "$GITHUB_STEP_SUMMARY"
	exit "$exit_code"

	release-metadata:
	name: "Release Metadata"
	runs-on: blacksmith-4vcpu-ubuntu-2404
	needs: determine_changes
	if: needs.determine_changes.outputs.code == 'true' \|\| github.ref == 'refs/heads/main' \|\| github.ref == 'refs/heads/canary'
	timeout-minutes: 5
	steps:
	- name: "Checkout"
	uses: useblacksmith/checkout@v1
	with:
	fetch-depth: 0
	persist-credentials: false

	- name: "Validate baml_language release metadata"
	run: scripts/baml-language-version check

	- name: "Validate canary/nightly version computation"
	env:
	BAML_LANGUAGE_VERSION_DATE: "20260522"
	run: \|
	set -euo pipefail
	canary="$(scripts/baml-language-version show)"
	test "$(scripts/baml-language-version compute --channel canary)" = "$canary"
	test "$(scripts/baml-language-version compute --channel canary --pypi)" = "$canary"

	nightly="$(scripts/baml-language-version compute --channel nightly)"
	pypi="$(scripts/baml-language-version compute --channel nightly --pypi)"
	python3 - "$canary" "$nightly" "$pypi" <<'PY'
	import re
	import sys

	canary, nightly, pypi = sys.argv[1:]
	major, minor, patch = [int(part) for part in canary.split(".")]
	base = f"{major}.{minor}.{patch + 1}"
	match = re.fullmatch(rf"{re.escape(base)}-nightly\.20260522\.([a-z])", nightly)
	if not match:
	raise SystemExit(f"unexpected nightly version: {nightly}")

	index = ord(match.group(1)) - ord("a")
	expected_pypi = f"{base}.dev20260522{index:02d}"
	if pypi != expected_pypi:
	raise SystemExit(f"unexpected nightly PyPI version: {pypi} != {expected_pypi}")
	PY

	# Call reusable workflows
	cargo-tests:
	name: "Cargo Tests"
	needs: determine_changes
	if: needs.determine_changes.outputs.code == 'true' \|\| github.ref == 'refs/heads/main' \|\| github.ref == 'refs/heads/canary'
	uses: ./.github/workflows/cargo-tests.reusable.yaml
	secrets: inherit

	size-gate:
	name: "Size Gate"
	needs: determine_changes
	if: needs.determine_changes.outputs.code == 'true' \|\| github.ref == 'refs/heads/main' \|\| github.ref == 'refs/heads/canary'
	uses: ./.github/workflows/size-gate.reusable.yaml
	secrets: inherit
	permissions:
	contents: read
	pull-requests: write

	wasm-pack-tests:
	name: "WASM Pack Tests"
	needs: determine_changes
	if: needs.determine_changes.outputs.code == 'true' \|\| github.ref == 'refs/heads/main' \|\| github.ref == 'refs/heads/canary'
	uses: ./.github/workflows/wasm-pack-tests.reusable.yaml
	secrets: inherit

	docs:
	name: "Docs"
	needs: determine_changes
	uses: ./.github/workflows/docs.reusable.yaml
	with:
	docs_changed: ${{ needs.determine_changes.outputs.docs }}
	is_canary: ${{ github.ref == 'refs/heads/canary' }}
	secrets: inherit
	permissions: write-all

	webview-tests:
	name: "Webview Tests"
	needs: determine_changes
	if: needs.determine_changes.outputs.webview == 'true' \|\| github.ref == 'refs/heads/main' \|\| github.ref == 'refs/heads/canary'
	uses: ./.github/workflows/webview-tests.reusable.yaml
	secrets: inherit

	grammar-tests:
	name: "Grammar Tests"
	runs-on: blacksmith-4vcpu-ubuntu-2404
	needs: determine_changes
	if: needs.determine_changes.outputs.grammar == 'true' \|\| github.ref == 'refs/heads/main' \|\| github.ref == 'refs/heads/canary'
	steps:
	- name: "Checkout Branch"
	uses: useblacksmith/checkout@v1
	with:
	persist-credentials: false

	- name: "Setup Node.js for typescript2"
	uses: ./.github/actions/setup-node2

	- name: "Run grammar snapshot tests"
	run: pnpm --filter @b/pkg-grammar test
	working-directory: typescript2

	miri-tests:
	name: "Miri (unsafe code verification)"
	runs-on: blacksmith-4vcpu-ubuntu-2404
	needs: determine_changes
	# TEMPORARILY DISABLED: miri tests timing out at 25 minutes, blocking CI/CD
	# TODO: Re-enable once miri test performance is addressed
	if: false # needs.determine_changes.outputs.unsafe == 'true'
	timeout-minutes: 25
	steps:
	- name: "Checkout"
	uses: useblacksmith/checkout@v1
	with:
	persist-credentials: false

	- name: "Install Rust nightly and Miri"
	run: \|
	rustup toolchain install nightly --component miri
	rustup override set nightly
	working-directory: baml_language

	# Separate step so if fetch is slow, build is fast.
	- name: "Fetch cargo dependencies"
	run: cargo +nightly fetch
	working-directory: baml_language

	- name: "Run Miri tests on bex_heap"
	run: cargo miri test -p bex_heap --lib
	working-directory: baml_language

	# Loom + Miri verification for the lock-free profiling ring
	# (bex_events::prof). Scoped to the prof:: tests so it stays minutes-fast —
	# deliberately narrower than the (disabled) whole-crate miri-tests job
	# above, whose bex_heap runs were timing out.
	prof-concurrency:
	name: "Profiling ring (loom + miri)"
	runs-on: blacksmith-4vcpu-ubuntu-2404
	needs: determine_changes
	if: needs.determine_changes.outputs.prof == 'true'
	timeout-minutes: 45
	steps:
	- name: "Checkout"
	uses: useblacksmith/checkout@v1
	with:
	persist-credentials: false

	# Install Rust toolchain BEFORE rust-cache so cache key uses correct Rust version.
	- name: "Install Rust toolchain"
	run: rustup toolchain install
	working-directory: baml_language

	- uses: Swatinem/rust-cache@v2
	with:
	workspaces: "baml_language -> target"
	shared-key: "prof-concurrency"

	- name: "Fetch cargo dependencies"
	run: cargo fetch
	working-directory: baml_language

	# The model checker explores every interleaving (bounded at 3
	# preemptions, set in the test harness) of the ring's
	# producer/consumer/lifecycle protocols. The custom cfg name
	# (baml_loom, not the conventional loom) keeps the flag from
	# half-activating loom support in third-party deps (e.g. boxcar) that
	# gate on cfg(loom) but need their own loom feature enabled to compile.
	- name: "Loom model checking (bex_events::prof)"
	run: 'cargo test -p bex_events --release --lib prof::'
	working-directory: baml_language
	env:
	RUSTFLAGS: "--cfg baml_loom"
	CARGO_TARGET_DIR: target/loom

	- name: "Install Rust nightly and Miri"
	run: rustup toolchain install nightly --component miri
	working-directory: baml_language

	# Miri checks the raw-pointer/UnsafeCell discipline of the same
	# scenarios on real threads. Leaked rings are by design (&'static
	# lifetime model); isolation is off for park_timeout/sleep in the
	# stress tests.
	- name: "Miri (bex_events::prof)"
	run: 'cargo +nightly miri test -p bex_events --lib prof::'
	working-directory: baml_language
	env:
	MIRIFLAGS: "-Zmiri-ignore-leaks -Zmiri-disable-isolation"

	proto-sync:
	name: "proto generated files sync"
	runs-on: blacksmith-4vcpu-ubuntu-2404
	needs: determine_changes
	if: needs.determine_changes.outputs.proto == 'true'
	timeout-minutes: 20
	steps:
	- name: "Checkout"
	uses: useblacksmith/checkout@v1
	with:
	persist-credentials: false

	# Install Rust toolchain BEFORE rust-cache so cache key uses correct Rust version.
	- name: "Install Rust toolchain"
	run: rustup toolchain install
	working-directory: baml_language

	- uses: Swatinem/rust-cache@v2
	with:
	workspaces: "baml_language -> target"
	# See the cache strategy explanation at the top of cargo-tests.reusable.yaml.
	save-if: false
	shared-key: "linux-cargo"

	# bridge_ctypes/build.rs needs `protoc`; bridge_go/build.sh needs
	# `protoc` + `protoc-gen-go`; bridge_nodejs needs node + pnpm.
	# The `cargo build -p bridge_ctypes` step uses cargo from rustup
	# (not mise), so we don't need `go` itself here either.
	- name: "Install mise"
	uses: ./.github/actions/setup-mise
	with:
	install_args: "node npm:pnpm protoc protoc-gen-go"

	# Separate step so if fetch is slow, build is fast.
	- name: "Fetch cargo dependencies"
	run: cargo fetch
	working-directory: baml_language

	# Rust (prost) + Python (protoc) — both driven by bridge_ctypes/build.rs.
	# Writes Python pb2/pyi into baml_language/sdks/python/src/baml_core/cffi/v1/.
	- name: "Generate Rust + Python proto bindings (bridge_ctypes)"
	run: cargo build -p bridge_ctypes
	working-directory: baml_language

	# Go (protoc-gen-go) — writes into sdks/go/bridge_go/cffi/proto/baml_core/cffi/v1/.
	- name: "Generate Go proto bindings (sdks/go/bridge_go)"
	run: ./build.sh
	working-directory: baml_language/sdks/go/bridge_go

	# --ignore-workspace: install from bridge_nodejs's own pnpm-lock.yaml
	# rather than the root workspace lockfile. The two resolve different
	# protobufjs-cli versions, and pbjs codegen output (committed under
	# typescript_src/proto/) must come from the version this package pins.
	# --ignore-scripts: skip dependency build scripts (esbuild's binary
	# fetch, protobufjs postinstall) — none are needed for pbjs/tsc/napi
	# codegen, and pnpm errors on unapproved build scripts otherwise.
	# Same flags as sdk_tests/crates/typescript_node/setup.sh.
	- name: "Install Node SDK dependencies"
	run: pnpm install --frozen-lockfile --ignore-workspace --ignore-scripts
	working-directory: baml_language/sdks/nodejs/bridge_nodejs

	# Node / TypeScript (protobufjs) — writes into sdks/nodejs/bridge_nodejs/typescript_src/proto/.
	# build:debug also regenerates typescript2/pkg-proto/src/generated/ (ts-proto / buf).
	- name: "Generate Node/TypeScript proto bindings (sdks/nodejs/bridge_nodejs)"
	run: pnpm build:debug
	working-directory: baml_language/sdks/nodejs/bridge_nodejs

	- name: "Check generated proto files are in sync"
	run: \|
	# Check for both modified tracked files and untracked files across every
	# codegen output documented in baml_language/crates/bridge_ctypes/README.md.
	PATHS=(
	baml_language/sdks/nodejs/bridge_nodejs
	baml_language/crates/bridge_ctypes
	baml_language/sdks/go/bridge_go
	baml_language/sdks/python/src/baml_core/cffi
	typescript2/pkg-proto
	)
	README="baml_language/crates/bridge_ctypes/README.md"
	STATUS=$(git status --porcelain -- "${PATHS[@]}")
	if [ -n "$STATUS" ]; then
	echo "::error::proto generated files are out of sync — consult ${README} for the regeneration commands and commit the resulting changes."
	echo ""
	echo "The following files are out of sync:"
	echo "$STATUS"
	echo ""
	echo "===== ${README} ====="
	cat "${README}"
	echo "===== end of ${README} ====="
	echo ""
	git diff -- "${PATHS[@]}"
	exit 1
	fi
	echo "All generated proto files are in sync."

	# Benchmarks are split into two jobs so the slow compile happens on a fast,
	# cheap machine while the actual measurement happens on a stable CodSpeed
	# macro runner.
	#
	# benchmarks-build -> compiles the bench binaries (the part that used to
	# time out on the small CodSpeed runner)
	# benchmarks-run -> ONLY executes those prebuilt binaries on codspeed-macro
	#
	# CRITICAL — no double-build / flag mismatch:
	# `cargo codspeed build` compiles each bench target with CodSpeed's own
	# compilation flags and copies the resulting executables into
	# target/codspeed/walltime/<pkg>/<bench>. `cargo codspeed run` then ONLY
	# executes those copies — it never invokes the compiler. So the run job
	# cannot accidentally rebuild with default flags; if the artifact is missing
	# it errors ("No benchmarks found") instead of measuring the wrong binary.
	#
	# CRITICAL — ABI match:
	# codspeed-macro is an ARM64 machine, so the binaries MUST be built on ARM64
	# too. We build on the OLDEST Ubuntu we have an ARM runner for (22.04,
	# glibc 2.35) so the binaries stay forward-compatible with whatever (newer
	# or equal) glibc the macro runner ships.
	benchmarks-build:
	name: "benchmarks build (baml)"
	runs-on: blacksmith-8vcpu-ubuntu-2204-arm
	needs: determine_changes
	# Perf benchmarks run AFTER MERGE (push to main/canary, only when the merge
	# touched baml_language/) and on manual dispatch. On PRs they are OPT-IN: a
	# PR runs them only when its title/body or a commit message mentions a perf
	# marker (RUN_CODSPEED=1, run-perf, /perf). See the `check_perf` step for the
	# policy
	# and the "Perf benchmarks (PR notice)" job for the comment PRs get when
	# benchmarks are skipped. Skipped in merge_group (already settled on the PR).
	if: needs.determine_changes.outputs.run_perf == 'true'
	timeout-minutes: 30
	steps:
	- name: "Checkout Branch"
	uses: actions/checkout@v6
	with:
	persist-credentials: false

	# Install Rust toolchain BEFORE rust-cache so cache key uses correct Rust version
	- name: "Install Rust toolchain"
	run: rustup show
	working-directory: baml_language

	# Swatinem caches the compiled target/ for this build job's own speed
	# (canary is the sole saver; PRs restore read-only). It is bench-specific
	# on purpose — its only saver compiles just baml_tests benches, a thin
	# seed not worth sharing with a general ARM job.
	- uses: Swatinem/rust-cache@v2
	with:
	workspaces: "baml_language -> target"
	save-if: ${{ github.ref == 'refs/heads/canary' }}
	cache-all-crates: true
	cache-workspace-crates: true
	shared-key: "linux-arm-bench"

	# Pin to the same major+minor as codspeed-divan-compat in Cargo.lock
	# (4.7.0). cargo codspeed run aborts on a major-version mismatch.
	- name: "Install cargo-codspeed"
	uses: taiki-e/install-action@v2
	with:
	tool: cargo-codspeed@4.7.0

	# Separate step so if fetch is slow, build is fast.
	- name: "Fetch cargo dependencies"
	run: cargo fetch
	working-directory: baml_language

	# Only runtime_benchmark — cache_profile is a macOS/kperf harness that's a
	# no-op on Linux, so there's no reason to build or run it here.
	- name: "Build benchmarks (walltime)"
	run: cargo codspeed build -p baml_tests --bench runtime_benchmark -m walltime
	working-directory: baml_language

	- name: "Upload prebuilt benchmark binaries"
	uses: actions/upload-artifact@v7
	with:
	name: codspeed-walltime-benches
	path: baml_language/target/codspeed/walltime
	if-no-files-found: error
	retention-days: 1

	# Hand the resolved cargo home to benchmarks-run via an ARTIFACT, not a
	# cache. actions/cache addresses entries by (key, version) where version
	# depends on the runner's compression tooling, so a cache saved on this
	# blacksmith runner is invisible to the codspeed-macro run job even with an
	# identical key (verified: same key 4f01266e…, "Cache not found"). Artifacts
	# are content-addressed by name and transfer cleanly across runners.
	# benchmarks-run untars this into ~/.cargo so its offline `cargo metadata`
	# has every dep locally — no ~24-min cold git re-clone.
	# Exclude registry/src (extracted dep sources — the bulk of ~/.cargo). The
	# run job only does offline `cargo metadata`, which needs the registry
	# index + git checkouts, not the unpacked sources, so this keeps the
	# artifact (and the run job's download/untar) small.
	- name: "Pack cargo home for benchmarks-run"
	run: tar -C "$HOME" --exclude='.cargo/registry/src' -cf cargo-home.tar .cargo/registry .cargo/git
	- name: "Upload cargo home"
	uses: actions/upload-artifact@v7
	with:
	name: codspeed-cargo-home
	path: cargo-home.tar
	if-no-files-found: error
	retention-days: 1

	benchmarks-run:
	name: "benchmarks instrumented (baml)"
	runs-on: codspeed-macro
	needs:
	- determine_changes
	- benchmarks-build
	# Same gate as benchmarks-build — see determine_changes' `check_perf` step.
	if: needs.determine_changes.outputs.run_perf == 'true'
	timeout-minutes: 30
	steps:
	- name: "Checkout Branch"
	uses: actions/checkout@v6
	with:
	persist-credentials: false

	# `cargo codspeed run` needs `cargo metadata` to locate the benches and
	# their working directories. It does NOT compile anything here.
	- name: "Install Rust toolchain"
	run: rustup show
	working-directory: baml_language

	# Restore the resolved cargo home that benchmarks-build packed, so this
	# job's offline `cargo metadata` has every dep locally instead of cold
	# re-cloning git deps on the bare codspeed-macro runner (~24 min). Comes
	# via artifact, not cache — see the build job for why cache misses here.
	- name: "Download cargo home"
	uses: actions/download-artifact@v7
	with:
	name: codspeed-cargo-home
	path: /tmp/cargo-home
	- name: "Unpack cargo home into ~/.cargo"
	run: tar -C "$HOME" -xf /tmp/cargo-home/cargo-home.tar

	- name: "Install cargo-codspeed"
	uses: taiki-e/install-action@v2
	with:
	tool: cargo-codspeed@4.7.0

	- name: "Download prebuilt benchmark binaries"
	uses: actions/download-artifact@v7
	with:
	name: codspeed-walltime-benches
	path: baml_language/target/codspeed/walltime

	# actions/upload-artifact drops the Unix executable bit; restore it so the
	# binaries can be exec'd by `cargo codspeed run`.
	- name: "Restore executable bit on bench binaries"
	run: chmod -R +x baml_language/target/codspeed/walltime

	- name: "Run benchmarks (walltime)"
	uses: CodSpeedHQ/action@v4
	# Benchmarks are advisory — never block a merge on a measurement blip.
	continue-on-error: true
	env:
	# The cargo home is restored above, so `cargo metadata` has every dep
	# locally. Force offline so it physically cannot stall on the network
	# (no index update, no git fetch) — turns the old ~24-min metadata
	# phase into seconds. Fails fast and loud if the cache ever misses,
	# instead of silently re-cloning for half an hour.
	CARGO_NET_OFFLINE: "true"
	# Adaptive sampling: cap each bench at ~2s of wall time instead of
	# CodSpeed walltime's fixed 100 samples/bench. divan then runs as many
	# samples as fit the budget — ~100 for cheap benches (full statistical
	# power kept), down to 1 for the heavy O(n²)/1M-iter workloads (whose
	# run-to-run variance is already tiny, so a single sample suffices).
	#
	# WHY: at fixed 100 samples the suite blew past the 30-min timeout. The
	# long pole was compute::bubble_sort_5k — ~14s/sample on the (≈5×
	# slower than local) codspeed-macro runner × 100 = ~23min for ONE
	# bench. With this cap the whole 36-bench suite finishes in ~2 min.
	# Measured locally: fixed-100 bubble_sort alone >90s; with the budget
	# the full suite ran in 80s. Tune the budget here, not in code.
	DIVAN_MAX_TIME: "2"
	with:
	mode: walltime
	# Executes ONLY the prebuilt binaries downloaded above; no compiler runs.
	#
	# Every bench is a `vm_speedtest_*` function auto-generated from the
	# cross-language workload corpus under tools/speedtest/workloads/*.md
	# (see baml_tests/build.rs). They measure pure VM execution: the BAML
	# is compiled and the tokio runtime is built once, OUTSIDE the measured
	# region (see bench_vm_main), so only `main()` is timed. Sleep-based
	# workloads are excluded at build time.
	#
	# The `vm_speedtest` filter selects exactly that suite; DIVAN_MAX_TIME
	# (above) bounds each bench so the pass stays well under the timeout.
	run: cd baml_language && cargo codspeed run -m walltime 'vm_speedtest'
	token: ${{ secrets.CODSPEED_TOKEN }}

	# Leaves (and keeps up to date) a single sticky comment on each PR explaining
	# the perf-benchmark policy: either confirming benchmarks were triggered, or —
	# when they were skipped — telling the author exactly how to opt in. Runs on
	# every PR event so the same comment is edited in place (no spam) and flips
	# between the two states as the author edits the PR / pushes commits.
	perf-pr-notice:
	name: "Perf benchmarks (PR notice)"
	runs-on: blacksmith-4vcpu-ubuntu-2404
	needs: determine_changes
	if: github.event_name == 'pull_request' && github.event.pull_request.head.repo.fork == false
	permissions:
	pull-requests: write
	steps:
	- name: "Comment perf opt-in instructions"
	uses: actions/github-script@v7
	env:
	RUN_PERF: ${{ needs.determine_changes.outputs.run_perf }}
	with:
	script: \|
	// Hidden marker so we find & update our own comment instead of
	// posting a new one on every push.
	const marker = '<!-- perf-benchmarks-pr-notice -->';
	const triggered = process.env.RUN_PERF === 'true';
	const body = triggered
	? [
	marker,
	'### 🏎️ Performance benchmarks are running for this PR',
	'',
	'CodSpeed perf benchmarks were triggered because this PR opted in. ' +
	'Results will appear in the CodSpeed check / dashboard once they finish.',
	].join('\n')
	: [
	marker,
	'### ⏭️ Performance benchmarks were skipped',
	'',
	'Perf benchmarks (CodSpeed) are opt-in on pull requests — they no ' +
	'longer run on every push. They always run automatically after merge ' +
	'to `canary`/`main`.',
	'',
	'To run them on this PR, do any of the following, then push a commit ' +
	'(or re-run CI):',
	'',
	'- Add `RUN_CODSPEED=1` to the PR description, or',
	'- Include `run-perf` or `/perf` in the PR title or any commit message.',
	].join('\n');

	const { owner, repo } = context.repo;
	const issue_number = context.issue.number;
	const comments = await github.paginate(github.rest.issues.listComments, {
	owner, repo, issue_number,
	});
	const existing = comments.find((c) => c.body && c.body.includes(marker));
	if (existing) {
	await github.rest.issues.updateComment({ owner, repo, comment_id: existing.id, body });
	} else {
	await github.rest.issues.createComment({ owner, repo, issue_number, body });
	}

	# CI Failure Alert - Required status check using "skipped = success" pattern
	# See: https://devopsdirective.com/posts/2025/08/github-actions-required-checks-for-conditional-jobs/
	#
	# This job exploits GitHub's behavior where skipped jobs report as "Success".
	# - When all tests pass (or are intentionally skipped): this job SKIPS → reports SUCCESS
	# - When any test fails or is cancelled: this job RUNS and FAILS → blocks merge
	#
	# Configure "CI - BAML Language / CI Failure Alert" as a required status check.
	ci-failure-alert:
	name: "CI-v2 Failure Alert"
	runs-on: blacksmith-4vcpu-ubuntu-2404
	needs:
	- prek
	- release-metadata
	- cargo-tests
	- wasm-pack-tests
	- docs
	- webview-tests
	- grammar-tests
	# - miri-tests # TEMPORARILY DISABLED: miri tests timing out
	- prof-concurrency
	- proto-sync
	# NOTE: size-gate is intentionally NOT a required gate. It is an
	# informational signal (PR comment + daily baseline auto-refresh), not
	# a merge blocker: a size bump must never block the merge queue or force
	# a full CI re-run. Drift is caught by the daily refresh PR and by
	# reviewers eyeballing the size-gate comment, not by failing CI here.
	# Only run if something failed or was cancelled (otherwise skip → success)
	if: ${{ failure() \|\| cancelled() }}
	steps:
	- name: Report failure
	run: \|
	echo "## ❌ CI Failed" >> $GITHUB_STEP_SUMMARY
	echo "" >> $GITHUB_STEP_SUMMARY
	echo "One or more required jobs failed or were cancelled." >> $GITHUB_STEP_SUMMARY
	echo "" >> $GITHUB_STEP_SUMMARY
	echo "\| Job \| Result \|" >> $GITHUB_STEP_SUMMARY
	echo "\|-----\|--------\|" >> $GITHUB_STEP_SUMMARY
	echo "\| prek \| ${{ needs.prek.result }} \|" >> $GITHUB_STEP_SUMMARY
	echo "\| release-metadata \| ${{ needs.release-metadata.result }} \|" >> $GITHUB_STEP_SUMMARY
	echo "\| cargo-tests \| ${{ needs.cargo-tests.result }} \|" >> $GITHUB_STEP_SUMMARY
	echo "\| wasm-pack-tests \| ${{ needs.wasm-pack-tests.result }} \|" >> $GITHUB_STEP_SUMMARY
	echo "\| docs \| ${{ needs.docs.result }} \|" >> $GITHUB_STEP_SUMMARY
	echo "\| webview-tests \| ${{ needs.webview-tests.result }} \|" >> $GITHUB_STEP_SUMMARY
	echo "\| grammar-tests \| ${{ needs.grammar-tests.result }} \|" >> $GITHUB_STEP_SUMMARY
	# echo "\| miri-tests \| ${{ needs.miri-tests.result }} \|" >> $GITHUB_STEP_SUMMARY # TEMPORARILY DISABLED
	echo "\| prof-concurrency \| ${{ needs.prof-concurrency.result }} \|" >> $GITHUB_STEP_SUMMARY
	echo "\| proto-sync \| ${{ needs.proto-sync.result }} \|" >> $GITHUB_STEP_SUMMARY
	echo "" >> $GITHUB_STEP_SUMMARY
	echo "::error::One or more CI jobs failed!"
	exit 1

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Preserve the ErrorContext cause chain across a defer on the unwind pa… #6092

Workflow file

Preserve the ErrorContext cause chain across a defer on the unwind pa… #6092

Uh oh!

Workflow file for this run