Skip to content

Merge pull request #64 from HodeTech/docs/post-pr63-doc-audit #253

Merge pull request #64 from HodeTech/docs/post-pr63-doc-audit

Merge pull request #64 from HodeTech/docs/post-pr63-doc-audit #253

Workflow file for this run

name: CI
on:
push:
branches: [main]
pull_request:
branches: [main]
# Coverage gate (--cov-fail-under=40) is enforced via pyproject.toml's
# [tool.pytest.ini_options].addopts; the test job intentionally does not
# duplicate the flag here. FORGELM_OPERATOR is set so future operator-identity
# checks (Phase 3 hardening) do not raise on distroless runners where USER is
# unset.
env:
FORGELM_OPERATOR: ci-smoke
jobs:
# --- Job 1: Lint (fast, no heavy deps) ---
lint:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v5
- uses: actions/setup-python@v6
with:
python-version: "3.11"
- name: Install linter
run: pip install ruff
- name: Ruff lint
run: ruff check .
- name: Ruff format check
run: ruff format --check .
# Wave 2a Round-2 F-XPR-02-03: project-wide HTTP discipline gate
# (per docs/standards/architecture.md "HTTP discipline"). Every
# outbound HTTP call must go through forgelm/_http.py (safe_post /
# safe_get). This grep guard fails CI if any new module reaches
# for requests / urllib / httpx directly.
# Phase 16: every Pydantic field in forgelm/config.py must carry a
# description= argument so the hand-maintained configuration reference
# (docs/reference/configuration.md + -tr.md mirror) always has
# authoritative field text to mirror. Strict mode exits 1 on any
# undocumented field — a new contributor adding a field forgets the
# description here, not silently drifting from the operator-facing docs.
- name: Pydantic description= guard
run: python tools/check_field_descriptions.py --strict forgelm/config.py
# F-P8-C-19: promoted from an inline grep (which missed
# requests.Session()/aliased-import/whitespace-before-paren forms
# and had no own test) to a tested tool with its own
# tests/test_check_http_discipline.py.
- name: HTTP discipline guard
run: python tools/check_http_discipline.py
# --- Job 2: Test (matrix across Python versions) ---
test:
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
python-version: ["3.10", "3.11", "3.12", "3.13"]
steps:
- uses: actions/checkout@v5
- uses: actions/setup-python@v6
with:
python-version: ${{ matrix.python-version }}
cache: pip
- name: Install package (dev)
run: |
python -m pip install --upgrade pip
python -m pip install -e ".[dev]"
- name: Run tests with coverage
# --cov, --cov-report=term-missing, and --cov-fail-under=40 come from
# pyproject.toml addopts (single source of truth). Only the XML report
# is added here because it is CI-specific (artifact upload).
run: pytest -q --tb=short --cov-report=xml:coverage.xml
- name: Upload coverage (3.11 only)
# Upload coverage from one combo only — avoids 4× artifact duplication; 3.11 is canonical (matches publish.yml build job).
if: matrix.python-version == '3.11' && always()
uses: actions/upload-artifact@v5
with:
name: coverage-report
path: coverage.xml
# --- Job 3: Validate (config, CLI, assets) ---
validate:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v5
- uses: actions/setup-python@v6
with:
python-version: "3.11"
cache: pip
- name: Install package
run: |
python -m pip install --upgrade pip
python -m pip install -e ".[dev]"
- name: CLI smoke checks
run: |
forgelm --version
forgelm --config config_template.yaml --dry-run
forgelm --config config_template.yaml --dry-run --output-format json
- name: Config template validation
run: |
python -c "
from forgelm.config import load_config
cfg = load_config('config_template.yaml')
assert cfg.model.name_or_path, 'model.name_or_path is empty'
assert cfg.model.trust_remote_code is False, 'trust_remote_code should default to False'
assert cfg.model.offline is False, 'offline should default to False'
assert cfg.training.trainer_type == 'sft', 'default trainer_type should be sft'
assert cfg.data.dataset_name_or_path, 'data.dataset_name_or_path is empty'
print('Config template validation passed.')
"
- name: DeepSpeed config validation
run: |
python -c "
import json, os
configs_dir = 'configs/deepspeed'
for preset in ['zero2.json', 'zero3.json', 'zero3_offload.json']:
path = os.path.join(configs_dir, preset)
assert os.path.isfile(path), f'Missing: {path}'
with open(path) as f:
data = json.load(f)
assert 'zero_optimization' in data, f'{preset}: missing zero_optimization'
assert data['train_batch_size'] == 'auto', f'{preset}: train_batch_size should be auto'
print(f'{preset}: OK (ZeRO stage {data[\"zero_optimization\"][\"stage\"]})')
"
- name: Notebook validation
run: |
python -c "
import json, os
for nb_file in os.listdir('notebooks'):
if nb_file.endswith('.ipynb'):
path = os.path.join('notebooks', nb_file)
with open(path) as f:
nb = json.load(f)
assert nb['nbformat'] == 4, f'{nb_file}: invalid nbformat'
assert len(nb['cells']) > 0, f'{nb_file}: no cells'
print(f'{nb_file}: OK ({len(nb[\"cells\"])} cells)')
"
- name: Safety prompts validation
run: |
python -c "
import json, os
prompts_dir = 'configs/safety_prompts'
assert os.path.isdir(prompts_dir), 'Missing configs/safety_prompts/'
total = 0
for f in os.listdir(prompts_dir):
if f.endswith('.jsonl'):
path = os.path.join(prompts_dir, f)
with open(path) as fh:
for i, line in enumerate(fh):
data = json.loads(line)
assert 'prompt' in data, f'{f} line {i+1}: missing prompt key'
total += 1
print(f'{f}: OK')
assert total >= 140, f'Expected 140+ safety prompts, found {total}'
print(f'Total safety prompts: {total}')
"
- name: Synthetic module import check
run: |
python -c "
from forgelm.synthetic import SyntheticDataGenerator
print('synthetic.py import: OK')
"
- name: Site-as-tested-surface guard (strict)
# Diffs site/*.html claims against the Python sources of truth
# (compliance artefacts, quickstart templates, GPU profile count,
# pyproject version). Fails the build on any drift; see
# tools/check_site_claims.py for what's checked and why.
run: python3 tools/check_site_claims.py --strict
- name: Doc numerical-claims drift check (strict)
# F-P8-C-06 (W1/H5): re-derives the canonical counts (secret
# families, trainer types, quickstart templates, webhook events)
# from the Python sources of truth and fails the build when a doc
# asserts a stale number — e.g. the "five webhook events" prose that
# drifted after the vocabulary grew to eight. Wired once H5's 5->8
# doc-drift fix made the guard green at HEAD.
run: python3 tools/check_doc_numerical_claims.py --strict
- name: Bilingual doc H2/H3/H4 parity check (strict)
# Phase 24: extended structural parity guard. Where the prior
# inline check counted H2 only and missed reordered / demoted
# sections, ``tools/check_bilingual_parity.py`` walks every
# registered EN/TR pair and compares the full H2 + H3 + H4
# spine. Translated text differs by definition; structural
# depth + ordering must not. Add a new pair: register it in
# ``tools/check_bilingual_parity.py::_PAIRS``; CI picks it up
# automatically the next run.
run: python3 tools/check_bilingual_parity.py --strict
- name: Markdown anchor resolution check (strict)
# Wave 5 / Faz 30 Task N: every Markdown anchor + relative-link
# under docs/ must resolve. Catches renamed/removed targets,
# slug-case drift, and forward-refs to pages that never landed
# (the GH-014/017/019 bug class). Strict mode wired up after
# the Wave 5 cleanup zeroed the live drift count from 36 → 0;
# the tool itself remains usable in advisory mode (no flag) for
# local feedback during a docs PR. See tools/check_anchor_resolution.py
# for what's checked and the GFM slug approximation used.
run: python3 tools/check_anchor_resolution.py --strict
- name: CLI / docs help-consistency drift check (strict)
# Wave 5 / Faz 30 Task J: catches docs that cite ghost flags
# or non-existent subcommands (the GH-008/011/016/018/020
# bug class). Strict mode wired up after the Wave 5 cleanup
# commit zeroed the live drift count from 40 → 0 (matches the
# check_anchor_resolution.py precedent: gate lands advisory,
# baseline cleanup follows, later commit flips to --strict).
# The tool itself remains usable in advisory mode (no flag)
# for local feedback during a docs PR.
run: python3 tools/check_cli_help_consistency.py --strict
- name: Unguarded sys.modules.pop guard
# v0.5.7 round-4 absorption: the v0.5.7 round-3 review traced
# 35 spurious full-suite failures to three test sites that
# popped torch / numpy from sys.modules without restoring
# them, half-loading torch._C for every later test in the
# pytest session. This guard fails CI on any new
# ``sys.modules.pop("<heavy-module>")`` /
# ``del sys.modules["<heavy-module>"]`` site, steering authors
# to ``monkeypatch.delitem`` (auto-restores on teardown).
run: python3 tools/check_no_unguarded_sys_modules_pop.py
- name: Audit-event catalog drift check (strict)
# W0/C7 (full-project review): the append-only audit log is a
# public compliance contract (EU AI Act Art. 12). This guard
# cross-checks every dotted audit event emitted in forgelm/
# (log_event / _audit_event / event= / _EVT_* constants) against
# the canonical table in docs/reference/audit_event_catalog.md, in
# both directions — an undocumented emit OR a ghost catalog row
# fails CI. Previously unwired: six pipeline.* stage events drifted
# into the code with zero tripwire. See
# tools/check_audit_event_catalog.py.
run: python3 tools/check_audit_event_catalog.py --strict
- name: TR cross-links prefer the TR mirror (strict)
# W1/H11 (F-P8-C-04): a docs/**/*-tr.md page must route its in-prose
# cross-references to the Turkish sibling when a <stem>-tr.md mirror
# exists — a Turkish operator following a 'Bkz.'/'See also' link must
# stay in Turkish, not silently land on the English page. Neither
# check_anchor_resolution (link resolves) nor check_bilingual_parity
# (heading spine) catches this. The **Ayna:** backlink line is exempt.
# 62 leaks across 19 files at HEAD were swept to zero in H11.
run: python3 tools/check_tr_links_prefer_mirror.py --strict
- name: Library-API doc ↔ __all__ drift check (strict)
# W1/H11 (F-P8-C-07): forgelm.__all__ must match the symbol roster in
# docs/reference/library_api_reference.md in both directions. Previously
# unwired — a renamed/removed public symbol could drift from the doc.
run: python3 tools/check_library_api_doc.py --strict
- name: Bilingual fenced-code-block parity (strict)
# W1/H11 (F-P8-C-13): the Wave 6 fenced-block-count + per-ordinal
# YAML-key parity guard was an orphan (wired nowhere, no own test).
# Catches TR code-block / YAML-key drift the spine guard cannot see.
run: python3 tools/check_bilingual_code_blocks.py --strict
- name: ForgeConfig YAML snippet validation (strict)
# W1/H11 (F-P8-C-07): every ForgeConfig-shaped YAML snippet in the docs
# must pass Pydantic validation. Previously unwired despite catching
# real doc-vs-schema drift.
run: python3 tools/check_yaml_snippets.py --strict
- name: Site chrome (EN/TR translation) parity
# W1/H11 (F-P8-C-07): the active-tier (EN<->TR) translation-key sets in
# site/js/translations.js must stay in lockstep. Run WITHOUT --strict:
# --strict additionally gates the deferred de/fr/es/zh tiers, which are
# a known v0.6.x backlog the guard's own help text says is "NOT wired
# into CI". Default mode enforces only the active-tier parity rule.
run: python3 tools/check_site_chrome_parity.py
- name: Module-size ceiling guard (strict)
# W1/H11 (F-P8-C-07): no forgelm/ module may drift past the
# architecture-doc ~1000-LOC sub-package-split ceiling. Previously
# unwired — module-size regressions could merge with green CI.
run: python3 tools/check_module_size.py --strict
- name: Wizard defaults schema-sync (strict)
# W1/H11 (F-P8-C-07): the wizard's shipped defaults JSON must match the
# Pydantic schema source of truth. Was gauntlet-only (human discipline).
run: python3 tools/check_wizard_defaults_sync.py
- name: No public-tree refs into gitignored working-memory
# W1/H11 (F-P8-C-07): public-tree files must not cite docs/analysis/ or
# docs/marketing/. Was gauntlet-only (human discipline) until now.
run: python3 tools/check_no_analysis_refs.py
- name: Marketing-site version sync (strict)
# W1/H11 (F-P8-C-07): the marketing site's displayed version must match
# CHANGELOG's latest released header. Was gauntlet-only.
run: python3 tools/update_site_version.py --check
- name: Notebook forgelm pin lockstep (strict)
# W2/M7 (F-P8-C-09): every notebooks/*.ipynb !pip install forgelm pin
# must target a shipping wheel — the exact pyproject version or the
# latest released CHANGELOG version (so onboarding notebooks never
# point users at a pre-release rc). Previously unwired; the pins had
# drifted two minors (0.5.7 vs released 0.7.0) undetected.
run: python3 tools/check_notebook_pins.py --strict
- name: User-manual self-contained link guard (strict)
# Post-v0.7.0 cycle: the docs/usermanuals/ tree is the source of
# truth for the static-site SPA viewer (site/usermanual.html +
# site/js/guide.js). The viewer only resolves SPA hash-router
# routes ``#/<section>/<page>`` and external HTTPS URLs.
# Anything else — repo-relative ``../../../guides/...`` paths,
# intra-manual ``../section/page.md`` paths, SPA routes that
# point at a non-existent page — 404s when the user clicks.
# This guard walks every *.md under docs/usermanuals/ and
# fails the gate on any link that would break in the SPA.
# See docs/standards/documentation.md "User-manual link
# discipline" for the full ruleset.
run: python3 tools/check_usermanual_self_contained.py --strict
- name: License check
run: |
test -f LICENSE || (echo "LICENSE file missing" && exit 1)
head -1 LICENSE | grep -q "Apache" || (echo "Expected Apache License" && exit 1)
echo "License: OK"
# Wave 4 / Faz 23: bandit static-security analysis on forgelm/.
# tests/ is excluded via [tool.bandit] in pyproject.toml because
# test fixtures legitimately use insecure patterns (assert, dummy
# secrets). HIGH → fail; MEDIUM → ::warning::; LOW → silent.
# (Mirrors nightly.yml supply-chain-security severity policy.)
# See tools/check_bandit.py for the severity-tiering helper.
- name: bandit (static security analysis)
run: |
python -m pip install 'bandit[toml]>=1.7.0,<2.0.0'
# bandit returns exit 1 on ANY findings. We capture the JSON
# report and apply our own severity policy via the tiering
# helper (tools/check_bandit.py).
bandit -c pyproject.toml -r forgelm/ -f json -o /tmp/bandit.json || true
python3 tools/check_bandit.py /tmp/bandit.json