Merge pull request #56 from cemililik/development #227

Workflow file for this run

	name: CI

	on:
	push:
	branches: [main]
	pull_request:
	branches: [main]

	# Coverage gate (--cov-fail-under=40) is enforced via pyproject.toml's
	# [tool.pytest.ini_options].addopts; the test job intentionally does not
	# duplicate the flag here. FORGELM_OPERATOR is set so future operator-identity
	# checks (Phase 3 hardening) do not raise on distroless runners where USER is
	# unset.
	env:
	FORGELM_OPERATOR: ci-smoke

	jobs:
	# --- Job 1: Lint (fast, no heavy deps) ---
	lint:
	runs-on: ubuntu-latest
	steps:
	- uses: actions/checkout@v5

	- uses: actions/setup-python@v6
	with:
	python-version: "3.11"

	- name: Install linter
	run: pip install ruff

	- name: Ruff lint
	run: ruff check .

	- name: Ruff format check
	run: ruff format --check .

	# Wave 2a Round-2 F-XPR-02-03: project-wide HTTP discipline gate
	# (per docs/standards/architecture.md "HTTP discipline"). Every
	# outbound HTTP call must go through forgelm/_http.py (safe_post /
	# safe_get). This grep guard fails CI if any new module reaches
	# for requests / urllib / httpx directly.
	# Phase 16: every Pydantic field in forgelm/config.py must carry a
	# description= argument so the autogenerated configuration reference
	# stays in lockstep with the schema. Strict mode exits 1 on any
	# undocumented field — a new contributor adding a field forgets the
	# description here, not silently drifting from the operator-facing docs.
	- name: Pydantic description= guard
	run: python tools/check_field_descriptions.py --strict forgelm/config.py

	- name: HTTP discipline guard
	run: \|
	set +e
	# Match ACTUAL CALLS only (the function name followed by an
	# opening paren) so docstring prose mentioning the same names
	# is not flagged.
	violations=$(grep -rn -E '(requests\.(get\|post\|put\|delete\|patch\|request\|head)\(\|urllib\.request\.urlopen\(\|httpx\.[a-z]+\()' forgelm/ --include='*.py' \| grep -v 'forgelm/_http.py')
	if [ -n "$violations" ]; then
	echo "::error::Found undisciplined HTTP call(s) outside forgelm/_http.py:"
	echo "$violations"
	echo ""
	echo "Route through forgelm._http.safe_post / safe_get instead."
	echo "See docs/standards/architecture.md 'HTTP discipline' section."
	exit 1
	fi

	# --- Job 2: Test (matrix across Python versions) ---
	test:
	runs-on: ubuntu-latest
	strategy:
	fail-fast: false
	matrix:
	python-version: ["3.10", "3.11", "3.12", "3.13"]
	steps:
	- uses: actions/checkout@v5

	- uses: actions/setup-python@v6
	with:
	python-version: ${{ matrix.python-version }}
	cache: pip

	- name: Install package (dev)
	run: \|
	python -m pip install --upgrade pip
	python -m pip install -e ".[dev]"

	- name: Run tests with coverage
	# --cov, --cov-report=term-missing, and --cov-fail-under=40 come from
	# pyproject.toml addopts (single source of truth). Only the XML report
	# is added here because it is CI-specific (artifact upload).
	run: pytest -q --tb=short --cov-report=xml:coverage.xml

	- name: Upload coverage (3.11 only)
	# Upload coverage from one combo only — avoids 4× artifact duplication; 3.11 is canonical (matches publish.yml build job).
	if: matrix.python-version == '3.11' && always()
	uses: actions/upload-artifact@v5
	with:
	name: coverage-report
	path: coverage.xml

	# --- Job 3: Validate (config, CLI, assets) ---
	validate:
	runs-on: ubuntu-latest
	steps:
	- uses: actions/checkout@v5

	- uses: actions/setup-python@v6
	with:
	python-version: "3.11"
	cache: pip

	- name: Install package
	run: \|
	python -m pip install --upgrade pip
	python -m pip install -e ".[dev]"

	- name: CLI smoke checks
	run: \|
	forgelm --version
	forgelm --config config_template.yaml --dry-run
	forgelm --config config_template.yaml --dry-run --output-format json

	- name: Config template validation
	run: \|
	python -c "
	from forgelm.config import load_config
	cfg = load_config('config_template.yaml')
	assert cfg.model.name_or_path, 'model.name_or_path is empty'
	assert cfg.model.trust_remote_code is False, 'trust_remote_code should default to False'
	assert cfg.model.offline is False, 'offline should default to False'
	assert cfg.training.trainer_type == 'sft', 'default trainer_type should be sft'
	assert cfg.data.dataset_name_or_path, 'data.dataset_name_or_path is empty'
	print('Config template validation passed.')
	"

	- name: DeepSpeed config validation
	run: \|
	python -c "
	import json, os
	configs_dir = 'configs/deepspeed'
	for preset in ['zero2.json', 'zero3.json', 'zero3_offload.json']:
	path = os.path.join(configs_dir, preset)
	assert os.path.isfile(path), f'Missing: {path}'
	with open(path) as f:
	data = json.load(f)
	assert 'zero_optimization' in data, f'{preset}: missing zero_optimization'
	assert data['train_batch_size'] == 'auto', f'{preset}: train_batch_size should be auto'
	print(f'{preset}: OK (ZeRO stage {data[\"zero_optimization\"][\"stage\"]})')
	"

	- name: Notebook validation
	run: \|
	python -c "
	import json, os
	for nb_file in os.listdir('notebooks'):
	if nb_file.endswith('.ipynb'):
	path = os.path.join('notebooks', nb_file)
	with open(path) as f:
	nb = json.load(f)
	assert nb['nbformat'] == 4, f'{nb_file}: invalid nbformat'
	assert len(nb['cells']) > 0, f'{nb_file}: no cells'
	print(f'{nb_file}: OK ({len(nb[\"cells\"])} cells)')
	"

	- name: Safety prompts validation
	run: \|
	python -c "
	import json, os
	prompts_dir = 'configs/safety_prompts'
	assert os.path.isdir(prompts_dir), 'Missing configs/safety_prompts/'
	total = 0
	for f in os.listdir(prompts_dir):
	if f.endswith('.jsonl'):
	path = os.path.join(prompts_dir, f)
	with open(path) as fh:
	for i, line in enumerate(fh):
	data = json.loads(line)
	assert 'prompt' in data, f'{f} line {i+1}: missing prompt key'
	total += 1
	print(f'{f}: OK')
	assert total >= 140, f'Expected 140+ safety prompts, found {total}'
	print(f'Total safety prompts: {total}')
	"

	- name: Synthetic module import check
	run: \|
	python -c "
	from forgelm.synthetic import SyntheticDataGenerator
	print('synthetic.py import: OK')
	"

	- name: Site-as-tested-surface guard (strict)
	# Diffs site/*.html claims against the Python sources of truth
	# (compliance artefacts, quickstart templates, GPU profile count,
	# pyproject version). Fails the build on any drift; see
	# tools/check_site_claims.py for what's checked and why.
	run: python3 tools/check_site_claims.py --strict

	- name: Bilingual doc H2/H3/H4 parity check (strict)
	# Phase 24: extended structural parity guard. Where the prior
	# inline check counted H2 only and missed reordered / demoted
	# sections, ``tools/check_bilingual_parity.py`` walks every
	# registered EN/TR pair and compares the full H2 + H3 + H4
	# spine. Translated text differs by definition; structural
	# depth + ordering must not. Add a new pair: register it in
	# ``tools/check_bilingual_parity.py::_PAIRS``; CI picks it up
	# automatically the next run.
	run: python3 tools/check_bilingual_parity.py --strict

	- name: Markdown anchor resolution check (strict)
	# Wave 5 / Faz 30 Task N: every Markdown anchor + relative-link
	# under docs/ must resolve. Catches renamed/removed targets,
	# slug-case drift, and forward-refs to pages that never landed
	# (the GH-014/017/019 bug class). Strict mode wired up after
	# the Wave 5 cleanup zeroed the live drift count from 36 → 0;
	# the tool itself remains usable in advisory mode (no flag) for
	# local feedback during a docs PR. See tools/check_anchor_resolution.py
	# for what's checked and the GFM slug approximation used.
	run: python3 tools/check_anchor_resolution.py --strict

	- name: CLI / docs help-consistency drift check (strict)
	# Wave 5 / Faz 30 Task J: catches docs that cite ghost flags
	# or non-existent subcommands (the GH-008/011/016/018/020
	# bug class). Strict mode wired up after the Wave 5 cleanup
	# commit zeroed the live drift count from 40 → 0 (matches the
	# check_anchor_resolution.py precedent: gate lands advisory,
	# baseline cleanup follows, later commit flips to --strict).
	# The tool itself remains usable in advisory mode (no flag)
	# for local feedback during a docs PR.
	run: python3 tools/check_cli_help_consistency.py --strict

	- name: Unguarded sys.modules.pop guard
	# v0.5.7 round-4 absorption: the v0.5.7 round-3 review traced
	# 35 spurious full-suite failures to three test sites that
	# popped torch / numpy from sys.modules without restoring
	# them, half-loading torch._C for every later test in the
	# pytest session. This guard fails CI on any new
	# ``sys.modules.pop("<heavy-module>")`` /
	# ``del sys.modules["<heavy-module>"]`` site, steering authors
	# to ``monkeypatch.delitem`` (auto-restores on teardown).
	run: python3 tools/check_no_unguarded_sys_modules_pop.py

	- name: User-manual self-contained link guard (strict)
	# Post-v0.7.0 cycle: the docs/usermanuals/ tree is the source of
	# truth for the static-site SPA viewer (site/usermanual.html +
	# site/js/guide.js). The viewer only resolves SPA hash-router
	# routes ``#/<section>/<page>`` and external HTTPS URLs.
	# Anything else — repo-relative ``../../../guides/...`` paths,
	# intra-manual ``../section/page.md`` paths, SPA routes that
	# point at a non-existent page — 404s when the user clicks.
	# This guard walks every *.md under docs/usermanuals/ and
	# fails the gate on any link that would break in the SPA.
	# See docs/standards/documentation.md "User-manual link
	# discipline" for the full ruleset.
	run: python3 tools/check_usermanual_self_contained.py --strict

	- name: License check
	run: \|
	test -f LICENSE \|\| (echo "LICENSE file missing" && exit 1)
	head -1 LICENSE \| grep -q "Apache" \|\| (echo "Expected Apache License" && exit 1)
	echo "License: OK"

	# Wave 4 / Faz 23: bandit static-security analysis on forgelm/.
	# tests/ is excluded via [tool.bandit] in pyproject.toml because
	# test fixtures legitimately use insecure patterns (assert, dummy
	# secrets). HIGH → fail; MEDIUM → ::warning::; LOW → silent.
	# (Mirrors nightly.yml supply-chain-security severity policy.)
	# See tools/check_bandit.py for the severity-tiering helper.
	- name: bandit (static security analysis)
	run: \|
	python -m pip install 'bandit[toml]>=1.7.0,<2.0.0'
	# bandit returns exit 1 on ANY findings. We capture the JSON
	# report and apply our own severity policy via the tiering
	# helper (tools/check_bandit.py).
	bandit -c pyproject.toml -r forgelm/ -f json -o /tmp/bandit.json \|\| true
	python3 tools/check_bandit.py /tmp/bandit.json

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Merge pull request #56 from cemililik/development #227

Workflow file

Merge pull request #56 from cemililik/development #227

Uh oh!

Workflow file for this run