Skip to content

Nightly Compatibility #86

Nightly Compatibility

Nightly Compatibility #86

Workflow file for this run

name: Nightly Compatibility
on:
schedule:
- cron: "0 3 * * *" # 03:00 UTC daily
workflow_dispatch: # Manual trigger
# No workflow-level permissions — each job declares the minimal set it needs.
# Set FORGELM_OPERATOR so future operator-identity checks (Phase 3 hardening)
# do not raise on distroless runners where USER is unset.
env:
FORGELM_OPERATOR: ci-smoke
jobs:
test-latest-deps:
name: Test with latest dependencies
runs-on: ubuntu-latest
permissions:
contents: read
strategy:
fail-fast: false
matrix:
python-version: ["3.11", "3.12"]
steps:
- uses: actions/checkout@v5
- uses: actions/setup-python@v6
with:
python-version: ${{ matrix.python-version }}
- name: Install with latest deps (no upper bounds)
# NOTE: intentionally unconstrained — designed to surface upstream-API
# breakage early. The notify-failure job dedups against existing
# 'nightly-failure' issues to bound noise (see also: testing.md
# CI gates discussion). Do NOT add `|| true` here.
#
# This list MUST mirror ``pyproject.toml`` ``[project].dependencies``
# because the next step (``pip install -e . --no-deps``) does not
# install transitive deps. When a new runtime dependency lands in
# pyproject, add it here too or the matrix job will crash in
# ``import forgelm`` before any test runs. Each entry below maps
# 1:1 to a pyproject line; the comment after each call out the
# source-of-truth.
run: |
pip install --upgrade pip
# Core: torch / transformers / peft / datasets / accelerate /
# trl / pydantic / pyyaml / tensorboard / huggingface_hub /
# requests / requests-toolbelt (the last needed by
# forgelm._http for HostHeaderSSLAdapter; SSRF DNS-rebinding
# hardening, issue #14, v0.6.0).
pip install torch transformers peft datasets accelerate trl \
pydantic pyyaml tensorboard huggingface_hub requests requests-toolbelt
pip install pytest pytest-cov ruff
- name: Install ForgeLM (editable, no deps — already installed above)
run: pip install -e . --no-deps
- name: Show dependency versions
run: |
python -c "
import importlib
for pkg in ['torch', 'transformers', 'peft', 'datasets', 'accelerate', 'trl', 'pydantic']:
mod = importlib.import_module(pkg)
print(f'{pkg}: {getattr(mod, \"__version__\", \"unknown\")}')
"
- name: Lint check
run: ruff check .
- name: Run tests
run: pytest tests/ -q --tb=short
- name: CLI smoke test
run: |
forgelm --version
forgelm --config config_template.yaml --dry-run
forgelm --config config_template.yaml --dry-run --output-format json
- name: Quickstart templates smoke test
run: |
# Every bundled template must render to a valid YAML that
# passes pydantic validation. Catches template drift the
# moment a config schema changes.
#
# Note: smoke-tests 4 of 5 templates. `domain-expert` is BYOD
# (no bundled dataset), so `quickstart --dry-run` rejects it
# without `--dataset PATH`. It's covered separately by the
# pytest unit `test_domain_expert_intentionally_has_no_bundled_data`.
forgelm quickstart --list
for tpl in customer-support code-assistant medical-qa-tr grpo-math; do
echo "=== Quickstart dry-run: $tpl ==="
forgelm quickstart "$tpl" --dry-run --output "/tmp/qs-$tpl.yaml"
forgelm --config "/tmp/qs-$tpl.yaml" --dry-run
done
- name: Forbid git+https in notebooks
run: |
# Notebooks must install ForgeLM from PyPI with a pinned version,
# never from a git+https URL. Closure plan Faz 5 — git+https makes
# Colab installs depend on a specific branch HEAD, breaking
# reproducibility and confusing users when main moves ahead of
# the latest released wheel.
if grep -rn 'git+https://github.com/HodeTech/ForgeLM' notebooks/; then
echo "::error::Notebooks must install from PyPI, not git+https"
exit 1
fi
- name: Ingestion + audit smoke test (Phase 11)
run: |
# Minimal end-to-end: TXT in → JSONL out → audit report out.
# Plain TXT path doesn't need the [ingestion] extra; the audit
# module is pure stdlib. Catches CLI wiring drift without paying
# for pypdf / langdetect installs.
mkdir -p /tmp/p11
echo "Article 10 governs data quality." > /tmp/p11/sample.txt
echo "Section two: representativeness, traceability, bias review." > /tmp/p11/sample2.txt
forgelm ingest /tmp/p11/ --recursive --output /tmp/p11/out.jsonl
test -s /tmp/p11/out.jsonl
forgelm --data-audit /tmp/p11/out.jsonl --output /tmp/p11/audit/
test -s /tmp/p11/audit/data_audit_report.json
wheel-install-smoke:
# ------------------------------------------------------------------
# This is the only test that catches package_data globs being broken
# — editable installs always hide this. `pip install -e .` resolves
# forgelm/templates/* via Path(__file__).parent regardless of what
# setuptools would actually copy into the wheel, so a broken
# [tool.setuptools.package-data] entry is invisible until a real user
# runs `pip install forgelm` from PyPI and gets a missing-asset
# FileNotFoundError on `forgelm quickstart`.
#
# We split this into a dedicated job (rather than appending to
# test-latest-deps) because:
# 1. Building a wheel + spawning a fresh venv is unrelated to the
# "latest deps still resolve" axis and adds ~1 minute the matrix
# doesn't need to pay twice.
# 2. A failure here points unambiguously at packaging, not at a
# transitive dep bump that broke training.
# 3. We deliberately install from the wheel WITHOUT --no-deps so
# the install path matches what end-users hit.
# ------------------------------------------------------------------
name: Wheel install smoke test
runs-on: ubuntu-latest
permissions:
contents: read
steps:
- uses: actions/checkout@v5
- uses: actions/setup-python@v6
with:
python-version: "3.11"
- name: Build wheel
run: |
set -euo pipefail
pip install --upgrade pip build
python -m build --wheel
- name: Install wheel into a fresh venv and run quickstart from /tmp
run: |
set -euo pipefail
# Build a clean venv so nothing from the checkout's cwd leaks in.
python -m venv /tmp/wheel-test
/tmp/wheel-test/bin/pip install --upgrade pip
# Resolve the wheel glob explicitly: there must be exactly one
# forgelm-*.whl, otherwise the test environment is ambiguous.
shopt -s nullglob
wheels=(dist/forgelm-*.whl)
if [ "${#wheels[@]}" -ne 1 ]; then
echo "Expected exactly one forgelm wheel in dist/; found ${#wheels[@]}: ${wheels[*]:-(none)}" >&2
exit 1
fi
wheel="${wheels[0]}"
echo "Installing: $wheel"
/tmp/wheel-test/bin/pip install "$wheel"
# Run from /tmp specifically so the source tree is NOT on
# sys.path — any template asset must come from the wheel's
# site-packages copy, not from the checkout.
cd /tmp
echo "=== forgelm quickstart --list ==="
/tmp/wheel-test/bin/forgelm quickstart --list | tee /tmp/qs-list.txt
# Every registered template must appear in the listing.
for tpl in customer-support code-assistant domain-expert medical-qa-tr grpo-math; do
if ! grep -q "$tpl" /tmp/qs-list.txt; then
echo "MISSING template '$tpl' from quickstart --list output" >&2
exit 1
fi
done
echo "=== forgelm quickstart customer-support --dry-run ==="
/tmp/wheel-test/bin/forgelm quickstart customer-support \
--dry-run --output /tmp/wheel-qs.yaml
if [ ! -f /tmp/wheel-qs.yaml ]; then
echo "Quickstart did not materialize /tmp/wheel-qs.yaml" >&2
exit 1
fi
echo "Wheel-install smoke test passed."
test-min-deps:
name: Test with minimum supported versions
runs-on: ubuntu-latest
permissions:
contents: read
steps:
- uses: actions/checkout@v5
- uses: actions/setup-python@v6
with:
python-version: "3.10"
- name: Install minimum dependency versions
# This list MUST mirror the documented minimums in
# ``pyproject.toml`` ``[project].dependencies`` because the
# next step (``pip install -e . --no-deps``) does not install
# transitive deps. When a new runtime dependency lands in
# pyproject, add it here too or the job will crash in ``import
# forgelm`` before any test runs.
run: |
pip install --upgrade pip
# Install TRL first (most restrictive), let pip resolve transitive deps
pip install \
"torch==2.3.0" \
"trl==0.12.0" \
"peft==0.11.0" \
"pydantic==2.0.0" \
"pyyaml==6.0.1" \
"tensorboard==2.15.0" \
"requests-toolbelt==1.0.0"
pip install pytest pytest-cov
- name: Install ForgeLM
run: pip install -e . --no-deps
- name: Run tests
run: pytest tests/ -q --tb=short
supply-chain-security:
# ------------------------------------------------------------------
# Wave 4 / Faz 23: ISO 27001 / SOC 2 Type II alignment supply-chain
# security gate.
#
# Two scans, both run on the latest published deps so we catch CVEs
# the moment a dependency advisory drops, even before the team
# re-bumps versions:
#
# - pip-audit (transitive CVEs against the OSV / GHSA databases)
# - bandit (static-analysis on forgelm/ production code only)
#
# Severity policy (per tools/check_pip_audit.py + tools/check_bandit.py):
# HIGH → fail; MEDIUM → ::warning::; LOW → silent.
# (CRITICAL findings are reported by pip-audit at HIGH tier — same gate.)
# Operators install the same tooling locally via
# `pip install forgelm[security]`.
# ------------------------------------------------------------------
name: Supply-chain security (pip-audit + bandit)
runs-on: ubuntu-latest
permissions:
contents: read
steps:
- uses: actions/checkout@v5
- uses: actions/setup-python@v6
with:
python-version: "3.11"
cache: pip
- name: Install ForgeLM with [security] extra
run: |
pip install --upgrade pip
pip install -e ".[security]"
- name: pip-audit (transitive CVE scan)
run: |
# pip-audit captures every finding; tools/check_pip_audit.py
# applies ForgeLM's severity policy (HIGH → fail,
# MEDIUM → ::warning::, UNKNOWN → fail closed) on the JSON.
#
# Project-internal CVE suppressions live in
# tools/pip_audit_ignores.yaml; --ignores is opt-in so
# standalone deployer invocations of check_pip_audit.py
# inherit nothing (per docs/reference/supply_chain_security.md).
# Adding an entry to that file is a security-policy change;
# see the file header for the required schema (id, package,
# reason, threat_model, verified_at, reevaluate_after).
pip-audit \
--format json \
--output /tmp/pip-audit.json \
|| true
python3 tools/check_pip_audit.py /tmp/pip-audit.json \
--ignores tools/pip_audit_ignores.yaml
- name: bandit (static security analysis)
run: |
# Same scope/severity policy as ci.yml; nightly cadence catches
# newly-introduced HIGH issues even on long-lived branches.
bandit -c pyproject.toml -r forgelm/ -f json -o /tmp/bandit.json || true
python3 tools/check_bandit.py /tmp/bandit.json
- name: Upload security scan artifacts
if: always()
uses: actions/upload-artifact@v5
with:
name: supply-chain-scans
path: |
/tmp/pip-audit.json
/tmp/bandit.json
notify-failure:
name: Notify on failure
needs: [test-latest-deps, test-min-deps, wheel-install-smoke, supply-chain-security]
if: failure()
runs-on: ubuntu-latest
permissions:
contents: read # github-script needs to read the repo
issues: write # required for creating failure notification issues
steps:
- name: Create issue on failure
uses: actions/github-script@v8
with:
script: |
const title = `Nightly CI failure — ${new Date().toISOString().split('T')[0]}`;
const existing = await github.rest.issues.listForRepo({
owner: context.repo.owner,
repo: context.repo.repo,
state: 'open',
labels: 'nightly-failure',
per_page: 1,
});
if (existing.data.length > 0) {
await github.rest.issues.createComment({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: existing.data[0].number,
body: `Nightly CI failed again on ${new Date().toISOString().split('T')[0]}.\n\n[View run](${context.serverUrl}/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId})`,
});
} else {
await github.rest.issues.create({
owner: context.repo.owner,
repo: context.repo.repo,
title: title,
labels: ['nightly-failure', 'bug'],
body: `## Nightly CI Failure\n\nThe nightly compatibility test failed.\n\n**Run:** [View details](${context.serverUrl}/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId})\n\nThis may indicate a breaking change in a dependency. Check the test logs for details.`,
});
}