Full-project-review remediation: Waves 1–4 (H2→N1, 78 commits) #245

Workflow file for this run

	name: CI

	on:
	push:
	branches: [main]
	pull_request:
	branches: [main]

	# Coverage gate (--cov-fail-under=40) is enforced via pyproject.toml's
	# [tool.pytest.ini_options].addopts; the test job intentionally does not
	# duplicate the flag here. FORGELM_OPERATOR is set so future operator-identity
	# checks (Phase 3 hardening) do not raise on distroless runners where USER is
	# unset.
	env:
	FORGELM_OPERATOR: ci-smoke

	jobs:
	# --- Job 1: Lint (fast, no heavy deps) ---
	lint:
	runs-on: ubuntu-latest
	steps:
	- uses: actions/checkout@v5

	- uses: actions/setup-python@v6
	with:
	python-version: "3.11"

	- name: Install linter
	run: pip install ruff

	- name: Ruff lint
	run: ruff check .

	- name: Ruff format check
	run: ruff format --check .

	# Wave 2a Round-2 F-XPR-02-03: project-wide HTTP discipline gate
	# (per docs/standards/architecture.md "HTTP discipline"). Every
	# outbound HTTP call must go through forgelm/_http.py (safe_post /
	# safe_get). This grep guard fails CI if any new module reaches
	# for requests / urllib / httpx directly.
	# Phase 16: every Pydantic field in forgelm/config.py must carry a
	# description= argument so the hand-maintained configuration reference
	# (docs/reference/configuration.md + -tr.md mirror) always has
	# authoritative field text to mirror. Strict mode exits 1 on any
	# undocumented field — a new contributor adding a field forgets the
	# description here, not silently drifting from the operator-facing docs.
	- name: Pydantic description= guard
	run: python tools/check_field_descriptions.py --strict forgelm/config.py

	# F-P8-C-19: promoted from an inline grep (which missed
	# requests.Session()/aliased-import/whitespace-before-paren forms
	# and had no own test) to a tested tool with its own
	# tests/test_check_http_discipline.py.
	- name: HTTP discipline guard
	run: python tools/check_http_discipline.py

	# --- Job 2: Test (matrix across Python versions) ---
	test:
	runs-on: ubuntu-latest
	strategy:
	fail-fast: false
	matrix:
	python-version: ["3.10", "3.11", "3.12", "3.13"]
	steps:
	- uses: actions/checkout@v5

	- uses: actions/setup-python@v6
	with:
	python-version: ${{ matrix.python-version }}
	cache: pip

	- name: Install package (dev)
	run: \|
	python -m pip install --upgrade pip
	python -m pip install -e ".[dev]"

	- name: Run tests with coverage
	# --cov, --cov-report=term-missing, and --cov-fail-under=40 come from
	# pyproject.toml addopts (single source of truth). Only the XML report
	# is added here because it is CI-specific (artifact upload).
	run: pytest -q --tb=short --cov-report=xml:coverage.xml

	- name: Upload coverage (3.11 only)
	# Upload coverage from one combo only — avoids 4× artifact duplication; 3.11 is canonical (matches publish.yml build job).
	if: matrix.python-version == '3.11' && always()
	uses: actions/upload-artifact@v5
	with:
	name: coverage-report
	path: coverage.xml

	# --- Job 3: Validate (config, CLI, assets) ---
	validate:
	runs-on: ubuntu-latest
	steps:
	- uses: actions/checkout@v5

	- uses: actions/setup-python@v6
	with:
	python-version: "3.11"
	cache: pip

	- name: Install package
	run: \|
	python -m pip install --upgrade pip
	python -m pip install -e ".[dev]"

	- name: CLI smoke checks
	run: \|
	forgelm --version
	forgelm --config config_template.yaml --dry-run
	forgelm --config config_template.yaml --dry-run --output-format json

	- name: Config template validation
	run: \|
	python -c "
	from forgelm.config import load_config
	cfg = load_config('config_template.yaml')
	assert cfg.model.name_or_path, 'model.name_or_path is empty'
	assert cfg.model.trust_remote_code is False, 'trust_remote_code should default to False'
	assert cfg.model.offline is False, 'offline should default to False'
	assert cfg.training.trainer_type == 'sft', 'default trainer_type should be sft'
	assert cfg.data.dataset_name_or_path, 'data.dataset_name_or_path is empty'
	print('Config template validation passed.')
	"

	- name: DeepSpeed config validation
	run: \|
	python -c "
	import json, os
	configs_dir = 'configs/deepspeed'
	for preset in ['zero2.json', 'zero3.json', 'zero3_offload.json']:
	path = os.path.join(configs_dir, preset)
	assert os.path.isfile(path), f'Missing: {path}'
	with open(path) as f:
	data = json.load(f)
	assert 'zero_optimization' in data, f'{preset}: missing zero_optimization'
	assert data['train_batch_size'] == 'auto', f'{preset}: train_batch_size should be auto'
	print(f'{preset}: OK (ZeRO stage {data[\"zero_optimization\"][\"stage\"]})')
	"

	- name: Notebook validation
	run: \|
	python -c "
	import json, os
	for nb_file in os.listdir('notebooks'):
	if nb_file.endswith('.ipynb'):
	path = os.path.join('notebooks', nb_file)
	with open(path) as f:
	nb = json.load(f)
	assert nb['nbformat'] == 4, f'{nb_file}: invalid nbformat'
	assert len(nb['cells']) > 0, f'{nb_file}: no cells'
	print(f'{nb_file}: OK ({len(nb[\"cells\"])} cells)')
	"

	- name: Safety prompts validation
	run: \|
	python -c "
	import json, os
	prompts_dir = 'configs/safety_prompts'
	assert os.path.isdir(prompts_dir), 'Missing configs/safety_prompts/'
	total = 0
	for f in os.listdir(prompts_dir):
	if f.endswith('.jsonl'):
	path = os.path.join(prompts_dir, f)
	with open(path) as fh:
	for i, line in enumerate(fh):
	data = json.loads(line)
	assert 'prompt' in data, f'{f} line {i+1}: missing prompt key'
	total += 1
	print(f'{f}: OK')
	assert total >= 140, f'Expected 140+ safety prompts, found {total}'
	print(f'Total safety prompts: {total}')
	"

	- name: Synthetic module import check
	run: \|
	python -c "
	from forgelm.synthetic import SyntheticDataGenerator
	print('synthetic.py import: OK')
	"

	- name: Site-as-tested-surface guard (strict)
	# Diffs site/*.html claims against the Python sources of truth
	# (compliance artefacts, quickstart templates, GPU profile count,
	# pyproject version). Fails the build on any drift; see
	# tools/check_site_claims.py for what's checked and why.
	run: python3 tools/check_site_claims.py --strict

	- name: Doc numerical-claims drift check (strict)
	# F-P8-C-06 (W1/H5): re-derives the canonical counts (secret
	# families, trainer types, quickstart templates, webhook events)
	# from the Python sources of truth and fails the build when a doc
	# asserts a stale number — e.g. the "five webhook events" prose that
	# drifted after the vocabulary grew to eight. Wired once H5's 5->8
	# doc-drift fix made the guard green at HEAD.
	run: python3 tools/check_doc_numerical_claims.py --strict

	- name: Bilingual doc H2/H3/H4 parity check (strict)
	# Phase 24: extended structural parity guard. Where the prior
	# inline check counted H2 only and missed reordered / demoted
	# sections, ``tools/check_bilingual_parity.py`` walks every
	# registered EN/TR pair and compares the full H2 + H3 + H4
	# spine. Translated text differs by definition; structural
	# depth + ordering must not. Add a new pair: register it in
	# ``tools/check_bilingual_parity.py::_PAIRS``; CI picks it up
	# automatically the next run.
	run: python3 tools/check_bilingual_parity.py --strict

	- name: Markdown anchor resolution check (strict)
	# Wave 5 / Faz 30 Task N: every Markdown anchor + relative-link
	# under docs/ must resolve. Catches renamed/removed targets,
	# slug-case drift, and forward-refs to pages that never landed
	# (the GH-014/017/019 bug class). Strict mode wired up after
	# the Wave 5 cleanup zeroed the live drift count from 36 → 0;
	# the tool itself remains usable in advisory mode (no flag) for
	# local feedback during a docs PR. See tools/check_anchor_resolution.py
	# for what's checked and the GFM slug approximation used.
	run: python3 tools/check_anchor_resolution.py --strict

	- name: CLI / docs help-consistency drift check (strict)
	# Wave 5 / Faz 30 Task J: catches docs that cite ghost flags
	# or non-existent subcommands (the GH-008/011/016/018/020
	# bug class). Strict mode wired up after the Wave 5 cleanup
	# commit zeroed the live drift count from 40 → 0 (matches the
	# check_anchor_resolution.py precedent: gate lands advisory,
	# baseline cleanup follows, later commit flips to --strict).
	# The tool itself remains usable in advisory mode (no flag)
	# for local feedback during a docs PR.
	run: python3 tools/check_cli_help_consistency.py --strict

	- name: Unguarded sys.modules.pop guard
	# v0.5.7 round-4 absorption: the v0.5.7 round-3 review traced
	# 35 spurious full-suite failures to three test sites that
	# popped torch / numpy from sys.modules without restoring
	# them, half-loading torch._C for every later test in the
	# pytest session. This guard fails CI on any new
	# ``sys.modules.pop("<heavy-module>")`` /
	# ``del sys.modules["<heavy-module>"]`` site, steering authors
	# to ``monkeypatch.delitem`` (auto-restores on teardown).
	run: python3 tools/check_no_unguarded_sys_modules_pop.py

	- name: Audit-event catalog drift check (strict)
	# W0/C7 (full-project review): the append-only audit log is a
	# public compliance contract (EU AI Act Art. 12). This guard
	# cross-checks every dotted audit event emitted in forgelm/
	# (log_event / _audit_event / event= / _EVT_* constants) against
	# the canonical table in docs/reference/audit_event_catalog.md, in
	# both directions — an undocumented emit OR a ghost catalog row
	# fails CI. Previously unwired: six pipeline.* stage events drifted
	# into the code with zero tripwire. See
	# tools/check_audit_event_catalog.py.
	run: python3 tools/check_audit_event_catalog.py --strict

	- name: TR cross-links prefer the TR mirror (strict)
	# W1/H11 (F-P8-C-04): a docs/*/-tr.md page must route its in-prose
	# cross-references to the Turkish sibling when a <stem>-tr.md mirror
	# exists — a Turkish operator following a 'Bkz.'/'See also' link must
	# stay in Turkish, not silently land on the English page. Neither
	# check_anchor_resolution (link resolves) nor check_bilingual_parity
	# (heading spine) catches this. The Ayna: backlink line is exempt.
	# 62 leaks across 19 files at HEAD were swept to zero in H11.
	run: python3 tools/check_tr_links_prefer_mirror.py --strict

	- name: Library-API doc ↔ __all__ drift check (strict)
	# W1/H11 (F-P8-C-07): forgelm.__all__ must match the symbol roster in
	# docs/reference/library_api_reference.md in both directions. Previously
	# unwired — a renamed/removed public symbol could drift from the doc.
	run: python3 tools/check_library_api_doc.py --strict

	- name: Bilingual fenced-code-block parity (strict)
	# W1/H11 (F-P8-C-13): the Wave 6 fenced-block-count + per-ordinal
	# YAML-key parity guard was an orphan (wired nowhere, no own test).
	# Catches TR code-block / YAML-key drift the spine guard cannot see.
	run: python3 tools/check_bilingual_code_blocks.py --strict

	- name: ForgeConfig YAML snippet validation (strict)
	# W1/H11 (F-P8-C-07): every ForgeConfig-shaped YAML snippet in the docs
	# must pass Pydantic validation. Previously unwired despite catching
	# real doc-vs-schema drift.
	run: python3 tools/check_yaml_snippets.py --strict

	- name: Site chrome (EN/TR translation) parity
	# W1/H11 (F-P8-C-07): the active-tier (EN<->TR) translation-key sets in
	# site/js/translations.js must stay in lockstep. Run WITHOUT --strict:
	# --strict additionally gates the deferred de/fr/es/zh tiers, which are
	# a known v0.6.x backlog the guard's own help text says is "NOT wired
	# into CI". Default mode enforces only the active-tier parity rule.
	run: python3 tools/check_site_chrome_parity.py

	- name: Module-size ceiling guard (strict)
	# W1/H11 (F-P8-C-07): no forgelm/ module may drift past the
	# architecture-doc ~1000-LOC sub-package-split ceiling. Previously
	# unwired — module-size regressions could merge with green CI.
	run: python3 tools/check_module_size.py --strict

	- name: Wizard defaults schema-sync (strict)
	# W1/H11 (F-P8-C-07): the wizard's shipped defaults JSON must match the
	# Pydantic schema source of truth. Was gauntlet-only (human discipline).
	run: python3 tools/check_wizard_defaults_sync.py

	- name: No public-tree refs into gitignored working-memory
	# W1/H11 (F-P8-C-07): public-tree files must not cite docs/analysis/ or
	# docs/marketing/. Was gauntlet-only (human discipline) until now.
	run: python3 tools/check_no_analysis_refs.py

	- name: Marketing-site version sync (strict)
	# W1/H11 (F-P8-C-07): the marketing site's displayed version must match
	# CHANGELOG's latest released header. Was gauntlet-only.
	run: python3 tools/update_site_version.py --check

	- name: Notebook forgelm pin lockstep (strict)
	# W2/M7 (F-P8-C-09): every notebooks/*.ipynb !pip install forgelm pin
	# must target a shipping wheel — the exact pyproject version or the
	# latest released CHANGELOG version (so onboarding notebooks never
	# point users at a pre-release rc). Previously unwired; the pins had
	# drifted two minors (0.5.7 vs released 0.7.0) undetected.
	run: python3 tools/check_notebook_pins.py --strict

	- name: User-manual self-contained link guard (strict)
	# Post-v0.7.0 cycle: the docs/usermanuals/ tree is the source of
	# truth for the static-site SPA viewer (site/usermanual.html +
	# site/js/guide.js). The viewer only resolves SPA hash-router
	# routes ``#/<section>/<page>`` and external HTTPS URLs.
	# Anything else — repo-relative ``../../../guides/...`` paths,
	# intra-manual ``../section/page.md`` paths, SPA routes that
	# point at a non-existent page — 404s when the user clicks.
	# This guard walks every *.md under docs/usermanuals/ and
	# fails the gate on any link that would break in the SPA.
	# See docs/standards/documentation.md "User-manual link
	# discipline" for the full ruleset.
	run: python3 tools/check_usermanual_self_contained.py --strict

	- name: License check
	run: \|
	test -f LICENSE \|\| (echo "LICENSE file missing" && exit 1)
	head -1 LICENSE \| grep -q "Apache" \|\| (echo "Expected Apache License" && exit 1)
	echo "License: OK"

	# Wave 4 / Faz 23: bandit static-security analysis on forgelm/.
	# tests/ is excluded via [tool.bandit] in pyproject.toml because
	# test fixtures legitimately use insecure patterns (assert, dummy
	# secrets). HIGH → fail; MEDIUM → ::warning::; LOW → silent.
	# (Mirrors nightly.yml supply-chain-security severity policy.)
	# See tools/check_bandit.py for the severity-tiering helper.
	- name: bandit (static security analysis)
	run: \|
	python -m pip install 'bandit[toml]>=1.7.0,<2.0.0'
	# bandit returns exit 1 on ANY findings. We capture the JSON
	# report and apply our own severity policy via the tiering
	# helper (tools/check_bandit.py).
	bandit -c pyproject.toml -r forgelm/ -f json -o /tmp/bandit.json \|\| true
	python3 tools/check_bandit.py /tmp/bandit.json

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Full-project-review remediation: Waves 1–4 (H2→N1, 78 commits) #245

Workflow file

Full-project-review remediation: Waves 1–4 (H2→N1, 78 commits) #245

Uh oh!

Workflow file for this run