Skip to content

docs(wave6): canonical-name drift sweep + 5 new CI guards (audit closure) #152

docs(wave6): canonical-name drift sweep + 5 new CI guards (audit closure)

docs(wave6): canonical-name drift sweep + 5 new CI guards (audit closure) #152

Workflow file for this run

name: CI
on:
push:
branches: [main]
pull_request:
branches: [main]
# Coverage gate (--cov-fail-under=40) is enforced via pyproject.toml's
# [tool.pytest.ini_options].addopts; the test job intentionally does not
# duplicate the flag here. FORGELM_OPERATOR is set so future operator-identity
# checks (Phase 3 hardening) do not raise on distroless runners where USER is
# unset.
env:
FORGELM_OPERATOR: ci-smoke
jobs:
# --- Job 1: Lint (fast, no heavy deps) ---
lint:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v5
- uses: actions/setup-python@v6
with:
python-version: "3.11"
- name: Install linter
run: pip install ruff
- name: Ruff lint
run: ruff check .
- name: Ruff format check
run: ruff format --check .
# Wave 2a Round-2 F-XPR-02-03: project-wide HTTP discipline gate
# (per docs/standards/architecture.md "HTTP discipline"). Every
# outbound HTTP call must go through forgelm/_http.py (safe_post /
# safe_get). This grep guard fails CI if any new module reaches
# for requests / urllib / httpx directly.
# Phase 16: every Pydantic field in forgelm/config.py must carry a
# description= argument so the autogenerated configuration reference
# stays in lockstep with the schema. Strict mode exits 1 on any
# undocumented field — a new contributor adding a field forgets the
# description here, not silently drifting from the operator-facing docs.
- name: Pydantic description= guard
run: python tools/check_field_descriptions.py --strict forgelm/config.py
- name: HTTP discipline guard
run: |
set +e
# Match ACTUAL CALLS only (the function name followed by an
# opening paren) so docstring prose mentioning the same names
# is not flagged.
violations=$(grep -rn -E '(requests\.(get|post|put|delete|patch|request|head)\(|urllib\.request\.urlopen\(|httpx\.[a-z]+\()' forgelm/ --include='*.py' | grep -v 'forgelm/_http.py')
if [ -n "$violations" ]; then
echo "::error::Found undisciplined HTTP call(s) outside forgelm/_http.py:"
echo "$violations"
echo ""
echo "Route through forgelm._http.safe_post / safe_get instead."
echo "See docs/standards/architecture.md 'HTTP discipline' section."
exit 1
fi
# --- Job 2: Test (matrix across Python versions) ---
test:
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
python-version: ["3.10", "3.11", "3.12", "3.13"]
steps:
- uses: actions/checkout@v5
- uses: actions/setup-python@v6
with:
python-version: ${{ matrix.python-version }}
cache: pip
- name: Install package (dev)
run: |
python -m pip install --upgrade pip
python -m pip install -e ".[dev]"
- name: Run tests with coverage
# --cov, --cov-report=term-missing, and --cov-fail-under=40 come from
# pyproject.toml addopts (single source of truth). Only the XML report
# is added here because it is CI-specific (artifact upload).
run: pytest -q --tb=short --cov-report=xml:coverage.xml
- name: Upload coverage (3.11 only)
# Upload coverage from one combo only — avoids 4× artifact duplication; 3.11 is canonical (matches publish.yml build job).
if: matrix.python-version == '3.11' && always()
uses: actions/upload-artifact@v5
with:
name: coverage-report
path: coverage.xml
# --- Job 3: Validate (config, CLI, assets) ---
validate:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v5
- uses: actions/setup-python@v6
with:
python-version: "3.11"
cache: pip
- name: Install package
run: |
python -m pip install --upgrade pip
python -m pip install -e ".[dev]"
- name: CLI smoke checks
run: |
forgelm --version
forgelm --config config_template.yaml --dry-run
forgelm --config config_template.yaml --dry-run --output-format json
- name: Config template validation
run: |
python -c "
from forgelm.config import load_config
cfg = load_config('config_template.yaml')
assert cfg.model.name_or_path, 'model.name_or_path is empty'
assert cfg.model.trust_remote_code is False, 'trust_remote_code should default to False'
assert cfg.model.offline is False, 'offline should default to False'
assert cfg.training.trainer_type == 'sft', 'default trainer_type should be sft'
assert cfg.data.dataset_name_or_path, 'data.dataset_name_or_path is empty'
print('Config template validation passed.')
"
- name: DeepSpeed config validation
run: |
python -c "
import json, os
configs_dir = 'configs/deepspeed'
for preset in ['zero2.json', 'zero3.json', 'zero3_offload.json']:
path = os.path.join(configs_dir, preset)
assert os.path.isfile(path), f'Missing: {path}'
with open(path) as f:
data = json.load(f)
assert 'zero_optimization' in data, f'{preset}: missing zero_optimization'
assert data['train_batch_size'] == 'auto', f'{preset}: train_batch_size should be auto'
print(f'{preset}: OK (ZeRO stage {data[\"zero_optimization\"][\"stage\"]})')
"
- name: Notebook validation
run: |
python -c "
import json, os
for nb_file in os.listdir('notebooks'):
if nb_file.endswith('.ipynb'):
path = os.path.join('notebooks', nb_file)
with open(path) as f:
nb = json.load(f)
assert nb['nbformat'] == 4, f'{nb_file}: invalid nbformat'
assert len(nb['cells']) > 0, f'{nb_file}: no cells'
print(f'{nb_file}: OK ({len(nb[\"cells\"])} cells)')
"
- name: Safety prompts validation
run: |
python -c "
import json, os
prompts_dir = 'configs/safety_prompts'
assert os.path.isdir(prompts_dir), 'Missing configs/safety_prompts/'
total = 0
for f in os.listdir(prompts_dir):
if f.endswith('.jsonl'):
path = os.path.join(prompts_dir, f)
with open(path) as fh:
for i, line in enumerate(fh):
data = json.loads(line)
assert 'prompt' in data, f'{f} line {i+1}: missing prompt key'
total += 1
print(f'{f}: OK')
assert total >= 140, f'Expected 140+ safety prompts, found {total}'
print(f'Total safety prompts: {total}')
"
- name: Synthetic module import check
run: |
python -c "
from forgelm.synthetic import SyntheticDataGenerator
print('synthetic.py import: OK')
"
- name: Site-as-tested-surface guard (strict)
# Diffs site/*.html claims against the Python sources of truth
# (compliance artefacts, quickstart templates, GPU profile count,
# pyproject version). Fails the build on any drift; see
# tools/check_site_claims.py for what's checked and why.
run: python3 tools/check_site_claims.py --strict
- name: Bilingual doc H2/H3/H4 parity check (strict)
# Phase 24: extended structural parity guard. Where the prior
# inline check counted H2 only and missed reordered / demoted
# sections, ``tools/check_bilingual_parity.py`` walks every
# registered EN/TR pair and compares the full H2 + H3 + H4
# spine. Translated text differs by definition; structural
# depth + ordering must not. Add a new pair: register it in
# ``tools/check_bilingual_parity.py::_PAIRS``; CI picks it up
# automatically the next run.
run: python3 tools/check_bilingual_parity.py --strict
- name: Markdown anchor resolution check (strict)
# Wave 5 / Faz 30 Task N: every Markdown anchor + relative-link
# under docs/ must resolve. Catches renamed/removed targets,
# slug-case drift, and forward-refs to pages that never landed
# (the GH-014/017/019 bug class). Strict mode wired up after
# the Wave 5 cleanup zeroed the live drift count from 36 → 0;
# the tool itself remains usable in advisory mode (no flag) for
# local feedback during a docs PR. See tools/check_anchor_resolution.py
# for what's checked and the GFM slug approximation used.
run: python3 tools/check_anchor_resolution.py --strict
- name: CLI / docs help-consistency drift check (strict)
# Wave 5 / Faz 30 Task J: catches docs that cite ghost flags
# or non-existent subcommands (the GH-008/011/016/018/020
# bug class). Strict mode wired up after the Wave 5 cleanup
# commit zeroed the live drift count from 40 → 0 (matches the
# check_anchor_resolution.py precedent: gate lands advisory,
# baseline cleanup follows, later commit flips to --strict).
# The tool itself remains usable in advisory mode (no flag)
# for local feedback during a docs PR.
run: python3 tools/check_cli_help_consistency.py --strict
- name: License check
run: |
test -f LICENSE || (echo "LICENSE file missing" && exit 1)
head -1 LICENSE | grep -q "Apache" || (echo "Expected Apache License" && exit 1)
echo "License: OK"
# Wave 4 / Faz 23: bandit static-security analysis on forgelm/.
# tests/ is excluded via [tool.bandit] in pyproject.toml because
# test fixtures legitimately use insecure patterns (assert, dummy
# secrets). HIGH → fail; MEDIUM → ::warning::; LOW → silent.
# (Mirrors nightly.yml supply-chain-security severity policy.)
# See tools/check_bandit.py for the severity-tiering helper.
- name: bandit (static security analysis)
run: |
python -m pip install 'bandit[toml]>=1.7.0,<2.0.0'
# bandit returns exit 1 on ANY findings. We capture the JSON
# report and apply our own severity policy via the tiering
# helper (tools/check_bandit.py).
bandit -c pyproject.toml -r forgelm/ -f json -o /tmp/bandit.json || true
python3 tools/check_bandit.py /tmp/bandit.json