Full-project-review remediation: Waves 1–4 (H2→N1, 78 commits) #245
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: CI | |
| on: | |
| push: | |
| branches: [main] | |
| pull_request: | |
| branches: [main] | |
| # Coverage gate (--cov-fail-under=40) is enforced via pyproject.toml's | |
| # [tool.pytest.ini_options].addopts; the test job intentionally does not | |
| # duplicate the flag here. FORGELM_OPERATOR is set so future operator-identity | |
| # checks (Phase 3 hardening) do not raise on distroless runners where USER is | |
| # unset. | |
| env: | |
| FORGELM_OPERATOR: ci-smoke | |
| jobs: | |
| # --- Job 1: Lint (fast, no heavy deps) --- | |
| lint: | |
| runs-on: ubuntu-latest | |
| steps: | |
| - uses: actions/checkout@v5 | |
| - uses: actions/setup-python@v6 | |
| with: | |
| python-version: "3.11" | |
| - name: Install linter | |
| run: pip install ruff | |
| - name: Ruff lint | |
| run: ruff check . | |
| - name: Ruff format check | |
| run: ruff format --check . | |
| # Wave 2a Round-2 F-XPR-02-03: project-wide HTTP discipline gate | |
| # (per docs/standards/architecture.md "HTTP discipline"). Every | |
| # outbound HTTP call must go through forgelm/_http.py (safe_post / | |
| # safe_get). This grep guard fails CI if any new module reaches | |
| # for requests / urllib / httpx directly. | |
| # Phase 16: every Pydantic field in forgelm/config.py must carry a | |
| # description= argument so the hand-maintained configuration reference | |
| # (docs/reference/configuration.md + -tr.md mirror) always has | |
| # authoritative field text to mirror. Strict mode exits 1 on any | |
| # undocumented field — a new contributor adding a field forgets the | |
| # description here, not silently drifting from the operator-facing docs. | |
| - name: Pydantic description= guard | |
| run: python tools/check_field_descriptions.py --strict forgelm/config.py | |
| # F-P8-C-19: promoted from an inline grep (which missed | |
| # requests.Session()/aliased-import/whitespace-before-paren forms | |
| # and had no own test) to a tested tool with its own | |
| # tests/test_check_http_discipline.py. | |
| - name: HTTP discipline guard | |
| run: python tools/check_http_discipline.py | |
| # --- Job 2: Test (matrix across Python versions) --- | |
| test: | |
| runs-on: ubuntu-latest | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| python-version: ["3.10", "3.11", "3.12", "3.13"] | |
| steps: | |
| - uses: actions/checkout@v5 | |
| - uses: actions/setup-python@v6 | |
| with: | |
| python-version: ${{ matrix.python-version }} | |
| cache: pip | |
| - name: Install package (dev) | |
| run: | | |
| python -m pip install --upgrade pip | |
| python -m pip install -e ".[dev]" | |
| - name: Run tests with coverage | |
| # --cov, --cov-report=term-missing, and --cov-fail-under=40 come from | |
| # pyproject.toml addopts (single source of truth). Only the XML report | |
| # is added here because it is CI-specific (artifact upload). | |
| run: pytest -q --tb=short --cov-report=xml:coverage.xml | |
| - name: Upload coverage (3.11 only) | |
| # Upload coverage from one combo only — avoids 4× artifact duplication; 3.11 is canonical (matches publish.yml build job). | |
| if: matrix.python-version == '3.11' && always() | |
| uses: actions/upload-artifact@v5 | |
| with: | |
| name: coverage-report | |
| path: coverage.xml | |
| # --- Job 3: Validate (config, CLI, assets) --- | |
| validate: | |
| runs-on: ubuntu-latest | |
| steps: | |
| - uses: actions/checkout@v5 | |
| - uses: actions/setup-python@v6 | |
| with: | |
| python-version: "3.11" | |
| cache: pip | |
| - name: Install package | |
| run: | | |
| python -m pip install --upgrade pip | |
| python -m pip install -e ".[dev]" | |
| - name: CLI smoke checks | |
| run: | | |
| forgelm --version | |
| forgelm --config config_template.yaml --dry-run | |
| forgelm --config config_template.yaml --dry-run --output-format json | |
| - name: Config template validation | |
| run: | | |
| python -c " | |
| from forgelm.config import load_config | |
| cfg = load_config('config_template.yaml') | |
| assert cfg.model.name_or_path, 'model.name_or_path is empty' | |
| assert cfg.model.trust_remote_code is False, 'trust_remote_code should default to False' | |
| assert cfg.model.offline is False, 'offline should default to False' | |
| assert cfg.training.trainer_type == 'sft', 'default trainer_type should be sft' | |
| assert cfg.data.dataset_name_or_path, 'data.dataset_name_or_path is empty' | |
| print('Config template validation passed.') | |
| " | |
| - name: DeepSpeed config validation | |
| run: | | |
| python -c " | |
| import json, os | |
| configs_dir = 'configs/deepspeed' | |
| for preset in ['zero2.json', 'zero3.json', 'zero3_offload.json']: | |
| path = os.path.join(configs_dir, preset) | |
| assert os.path.isfile(path), f'Missing: {path}' | |
| with open(path) as f: | |
| data = json.load(f) | |
| assert 'zero_optimization' in data, f'{preset}: missing zero_optimization' | |
| assert data['train_batch_size'] == 'auto', f'{preset}: train_batch_size should be auto' | |
| print(f'{preset}: OK (ZeRO stage {data[\"zero_optimization\"][\"stage\"]})') | |
| " | |
| - name: Notebook validation | |
| run: | | |
| python -c " | |
| import json, os | |
| for nb_file in os.listdir('notebooks'): | |
| if nb_file.endswith('.ipynb'): | |
| path = os.path.join('notebooks', nb_file) | |
| with open(path) as f: | |
| nb = json.load(f) | |
| assert nb['nbformat'] == 4, f'{nb_file}: invalid nbformat' | |
| assert len(nb['cells']) > 0, f'{nb_file}: no cells' | |
| print(f'{nb_file}: OK ({len(nb[\"cells\"])} cells)') | |
| " | |
| - name: Safety prompts validation | |
| run: | | |
| python -c " | |
| import json, os | |
| prompts_dir = 'configs/safety_prompts' | |
| assert os.path.isdir(prompts_dir), 'Missing configs/safety_prompts/' | |
| total = 0 | |
| for f in os.listdir(prompts_dir): | |
| if f.endswith('.jsonl'): | |
| path = os.path.join(prompts_dir, f) | |
| with open(path) as fh: | |
| for i, line in enumerate(fh): | |
| data = json.loads(line) | |
| assert 'prompt' in data, f'{f} line {i+1}: missing prompt key' | |
| total += 1 | |
| print(f'{f}: OK') | |
| assert total >= 140, f'Expected 140+ safety prompts, found {total}' | |
| print(f'Total safety prompts: {total}') | |
| " | |
| - name: Synthetic module import check | |
| run: | | |
| python -c " | |
| from forgelm.synthetic import SyntheticDataGenerator | |
| print('synthetic.py import: OK') | |
| " | |
| - name: Site-as-tested-surface guard (strict) | |
| # Diffs site/*.html claims against the Python sources of truth | |
| # (compliance artefacts, quickstart templates, GPU profile count, | |
| # pyproject version). Fails the build on any drift; see | |
| # tools/check_site_claims.py for what's checked and why. | |
| run: python3 tools/check_site_claims.py --strict | |
| - name: Doc numerical-claims drift check (strict) | |
| # F-P8-C-06 (W1/H5): re-derives the canonical counts (secret | |
| # families, trainer types, quickstart templates, webhook events) | |
| # from the Python sources of truth and fails the build when a doc | |
| # asserts a stale number — e.g. the "five webhook events" prose that | |
| # drifted after the vocabulary grew to eight. Wired once H5's 5->8 | |
| # doc-drift fix made the guard green at HEAD. | |
| run: python3 tools/check_doc_numerical_claims.py --strict | |
| - name: Bilingual doc H2/H3/H4 parity check (strict) | |
| # Phase 24: extended structural parity guard. Where the prior | |
| # inline check counted H2 only and missed reordered / demoted | |
| # sections, ``tools/check_bilingual_parity.py`` walks every | |
| # registered EN/TR pair and compares the full H2 + H3 + H4 | |
| # spine. Translated text differs by definition; structural | |
| # depth + ordering must not. Add a new pair: register it in | |
| # ``tools/check_bilingual_parity.py::_PAIRS``; CI picks it up | |
| # automatically the next run. | |
| run: python3 tools/check_bilingual_parity.py --strict | |
| - name: Markdown anchor resolution check (strict) | |
| # Wave 5 / Faz 30 Task N: every Markdown anchor + relative-link | |
| # under docs/ must resolve. Catches renamed/removed targets, | |
| # slug-case drift, and forward-refs to pages that never landed | |
| # (the GH-014/017/019 bug class). Strict mode wired up after | |
| # the Wave 5 cleanup zeroed the live drift count from 36 → 0; | |
| # the tool itself remains usable in advisory mode (no flag) for | |
| # local feedback during a docs PR. See tools/check_anchor_resolution.py | |
| # for what's checked and the GFM slug approximation used. | |
| run: python3 tools/check_anchor_resolution.py --strict | |
| - name: CLI / docs help-consistency drift check (strict) | |
| # Wave 5 / Faz 30 Task J: catches docs that cite ghost flags | |
| # or non-existent subcommands (the GH-008/011/016/018/020 | |
| # bug class). Strict mode wired up after the Wave 5 cleanup | |
| # commit zeroed the live drift count from 40 → 0 (matches the | |
| # check_anchor_resolution.py precedent: gate lands advisory, | |
| # baseline cleanup follows, later commit flips to --strict). | |
| # The tool itself remains usable in advisory mode (no flag) | |
| # for local feedback during a docs PR. | |
| run: python3 tools/check_cli_help_consistency.py --strict | |
| - name: Unguarded sys.modules.pop guard | |
| # v0.5.7 round-4 absorption: the v0.5.7 round-3 review traced | |
| # 35 spurious full-suite failures to three test sites that | |
| # popped torch / numpy from sys.modules without restoring | |
| # them, half-loading torch._C for every later test in the | |
| # pytest session. This guard fails CI on any new | |
| # ``sys.modules.pop("<heavy-module>")`` / | |
| # ``del sys.modules["<heavy-module>"]`` site, steering authors | |
| # to ``monkeypatch.delitem`` (auto-restores on teardown). | |
| run: python3 tools/check_no_unguarded_sys_modules_pop.py | |
| - name: Audit-event catalog drift check (strict) | |
| # W0/C7 (full-project review): the append-only audit log is a | |
| # public compliance contract (EU AI Act Art. 12). This guard | |
| # cross-checks every dotted audit event emitted in forgelm/ | |
| # (log_event / _audit_event / event= / _EVT_* constants) against | |
| # the canonical table in docs/reference/audit_event_catalog.md, in | |
| # both directions — an undocumented emit OR a ghost catalog row | |
| # fails CI. Previously unwired: six pipeline.* stage events drifted | |
| # into the code with zero tripwire. See | |
| # tools/check_audit_event_catalog.py. | |
| run: python3 tools/check_audit_event_catalog.py --strict | |
| - name: TR cross-links prefer the TR mirror (strict) | |
| # W1/H11 (F-P8-C-04): a docs/**/*-tr.md page must route its in-prose | |
| # cross-references to the Turkish sibling when a <stem>-tr.md mirror | |
| # exists — a Turkish operator following a 'Bkz.'/'See also' link must | |
| # stay in Turkish, not silently land on the English page. Neither | |
| # check_anchor_resolution (link resolves) nor check_bilingual_parity | |
| # (heading spine) catches this. The **Ayna:** backlink line is exempt. | |
| # 62 leaks across 19 files at HEAD were swept to zero in H11. | |
| run: python3 tools/check_tr_links_prefer_mirror.py --strict | |
| - name: Library-API doc ↔ __all__ drift check (strict) | |
| # W1/H11 (F-P8-C-07): forgelm.__all__ must match the symbol roster in | |
| # docs/reference/library_api_reference.md in both directions. Previously | |
| # unwired — a renamed/removed public symbol could drift from the doc. | |
| run: python3 tools/check_library_api_doc.py --strict | |
| - name: Bilingual fenced-code-block parity (strict) | |
| # W1/H11 (F-P8-C-13): the Wave 6 fenced-block-count + per-ordinal | |
| # YAML-key parity guard was an orphan (wired nowhere, no own test). | |
| # Catches TR code-block / YAML-key drift the spine guard cannot see. | |
| run: python3 tools/check_bilingual_code_blocks.py --strict | |
| - name: ForgeConfig YAML snippet validation (strict) | |
| # W1/H11 (F-P8-C-07): every ForgeConfig-shaped YAML snippet in the docs | |
| # must pass Pydantic validation. Previously unwired despite catching | |
| # real doc-vs-schema drift. | |
| run: python3 tools/check_yaml_snippets.py --strict | |
| - name: Site chrome (EN/TR translation) parity | |
| # W1/H11 (F-P8-C-07): the active-tier (EN<->TR) translation-key sets in | |
| # site/js/translations.js must stay in lockstep. Run WITHOUT --strict: | |
| # --strict additionally gates the deferred de/fr/es/zh tiers, which are | |
| # a known v0.6.x backlog the guard's own help text says is "NOT wired | |
| # into CI". Default mode enforces only the active-tier parity rule. | |
| run: python3 tools/check_site_chrome_parity.py | |
| - name: Module-size ceiling guard (strict) | |
| # W1/H11 (F-P8-C-07): no forgelm/ module may drift past the | |
| # architecture-doc ~1000-LOC sub-package-split ceiling. Previously | |
| # unwired — module-size regressions could merge with green CI. | |
| run: python3 tools/check_module_size.py --strict | |
| - name: Wizard defaults schema-sync (strict) | |
| # W1/H11 (F-P8-C-07): the wizard's shipped defaults JSON must match the | |
| # Pydantic schema source of truth. Was gauntlet-only (human discipline). | |
| run: python3 tools/check_wizard_defaults_sync.py | |
| - name: No public-tree refs into gitignored working-memory | |
| # W1/H11 (F-P8-C-07): public-tree files must not cite docs/analysis/ or | |
| # docs/marketing/. Was gauntlet-only (human discipline) until now. | |
| run: python3 tools/check_no_analysis_refs.py | |
| - name: Marketing-site version sync (strict) | |
| # W1/H11 (F-P8-C-07): the marketing site's displayed version must match | |
| # CHANGELOG's latest released header. Was gauntlet-only. | |
| run: python3 tools/update_site_version.py --check | |
| - name: Notebook forgelm pin lockstep (strict) | |
| # W2/M7 (F-P8-C-09): every notebooks/*.ipynb !pip install forgelm pin | |
| # must target a shipping wheel — the exact pyproject version or the | |
| # latest released CHANGELOG version (so onboarding notebooks never | |
| # point users at a pre-release rc). Previously unwired; the pins had | |
| # drifted two minors (0.5.7 vs released 0.7.0) undetected. | |
| run: python3 tools/check_notebook_pins.py --strict | |
| - name: User-manual self-contained link guard (strict) | |
| # Post-v0.7.0 cycle: the docs/usermanuals/ tree is the source of | |
| # truth for the static-site SPA viewer (site/usermanual.html + | |
| # site/js/guide.js). The viewer only resolves SPA hash-router | |
| # routes ``#/<section>/<page>`` and external HTTPS URLs. | |
| # Anything else — repo-relative ``../../../guides/...`` paths, | |
| # intra-manual ``../section/page.md`` paths, SPA routes that | |
| # point at a non-existent page — 404s when the user clicks. | |
| # This guard walks every *.md under docs/usermanuals/ and | |
| # fails the gate on any link that would break in the SPA. | |
| # See docs/standards/documentation.md "User-manual link | |
| # discipline" for the full ruleset. | |
| run: python3 tools/check_usermanual_self_contained.py --strict | |
| - name: License check | |
| run: | | |
| test -f LICENSE || (echo "LICENSE file missing" && exit 1) | |
| head -1 LICENSE | grep -q "Apache" || (echo "Expected Apache License" && exit 1) | |
| echo "License: OK" | |
| # Wave 4 / Faz 23: bandit static-security analysis on forgelm/. | |
| # tests/ is excluded via [tool.bandit] in pyproject.toml because | |
| # test fixtures legitimately use insecure patterns (assert, dummy | |
| # secrets). HIGH → fail; MEDIUM → ::warning::; LOW → silent. | |
| # (Mirrors nightly.yml supply-chain-security severity policy.) | |
| # See tools/check_bandit.py for the severity-tiering helper. | |
| - name: bandit (static security analysis) | |
| run: | | |
| python -m pip install 'bandit[toml]>=1.7.0,<2.0.0' | |
| # bandit returns exit 1 on ANY findings. We capture the JSON | |
| # report and apply our own severity policy via the tiering | |
| # helper (tools/check_bandit.py). | |
| bandit -c pyproject.toml -r forgelm/ -f json -o /tmp/bandit.json || true | |
| python3 tools/check_bandit.py /tmp/bandit.json |