release(v0.5.0): Phase 11 + 11.5 + 12 + 12.5 consolidated release #122
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: CI | |
| on: | |
| push: | |
| branches: [main] | |
| pull_request: | |
| branches: [main] | |
| jobs: | |
| # --- Job 1: Lint (fast, no heavy deps) --- | |
| lint: | |
| runs-on: ubuntu-latest | |
| steps: | |
| - uses: actions/checkout@v5 | |
| - uses: actions/setup-python@v6 | |
| with: | |
| python-version: "3.11" | |
| - name: Install linter | |
| run: pip install ruff | |
| - name: Ruff lint | |
| run: ruff check . | |
| - name: Ruff format check | |
| run: ruff format --check . | |
| # --- Job 2: Test (matrix across Python versions) --- | |
| test: | |
| runs-on: ubuntu-latest | |
| strategy: | |
| matrix: | |
| python-version: ["3.10", "3.11", "3.12", "3.13"] | |
| steps: | |
| - uses: actions/checkout@v5 | |
| - uses: actions/setup-python@v6 | |
| with: | |
| python-version: ${{ matrix.python-version }} | |
| cache: pip | |
| - name: Install package (dev) | |
| run: | | |
| python -m pip install --upgrade pip | |
| python -m pip install -e ".[dev]" | |
| - name: Run tests with coverage | |
| run: pytest -q --tb=short --cov=forgelm --cov-report=term-missing --cov-report=xml:coverage.xml | |
| - name: Upload coverage (3.11 only) | |
| if: matrix.python-version == '3.11' && always() | |
| uses: actions/upload-artifact@v5 | |
| with: | |
| name: coverage-report | |
| path: coverage.xml | |
| # --- Job 3: Validate (config, CLI, assets) --- | |
| validate: | |
| runs-on: ubuntu-latest | |
| steps: | |
| - uses: actions/checkout@v5 | |
| - uses: actions/setup-python@v6 | |
| with: | |
| python-version: "3.11" | |
| cache: pip | |
| - name: Install package | |
| run: | | |
| python -m pip install --upgrade pip | |
| python -m pip install -e ".[dev]" | |
| - name: CLI smoke checks | |
| run: | | |
| forgelm --version | |
| forgelm --config config_template.yaml --dry-run | |
| forgelm --config config_template.yaml --dry-run --output-format json | |
| - name: Config template validation | |
| run: | | |
| python -c " | |
| from forgelm.config import load_config | |
| cfg = load_config('config_template.yaml') | |
| assert cfg.model.name_or_path, 'model.name_or_path is empty' | |
| assert cfg.model.trust_remote_code is False, 'trust_remote_code should default to False' | |
| assert cfg.model.offline is False, 'offline should default to False' | |
| assert cfg.training.trainer_type == 'sft', 'default trainer_type should be sft' | |
| assert cfg.data.dataset_name_or_path, 'data.dataset_name_or_path is empty' | |
| print('Config template validation passed.') | |
| " | |
| - name: DeepSpeed config validation | |
| run: | | |
| python -c " | |
| import json, os | |
| configs_dir = 'configs/deepspeed' | |
| for preset in ['zero2.json', 'zero3.json', 'zero3_offload.json']: | |
| path = os.path.join(configs_dir, preset) | |
| assert os.path.isfile(path), f'Missing: {path}' | |
| with open(path) as f: | |
| data = json.load(f) | |
| assert 'zero_optimization' in data, f'{preset}: missing zero_optimization' | |
| assert data['train_batch_size'] == 'auto', f'{preset}: train_batch_size should be auto' | |
| print(f'{preset}: OK (ZeRO stage {data[\"zero_optimization\"][\"stage\"]})') | |
| " | |
| - name: Notebook validation | |
| run: | | |
| python -c " | |
| import json, os | |
| for nb_file in os.listdir('notebooks'): | |
| if nb_file.endswith('.ipynb'): | |
| path = os.path.join('notebooks', nb_file) | |
| with open(path) as f: | |
| nb = json.load(f) | |
| assert nb['nbformat'] == 4, f'{nb_file}: invalid nbformat' | |
| assert len(nb['cells']) > 0, f'{nb_file}: no cells' | |
| print(f'{nb_file}: OK ({len(nb[\"cells\"])} cells)') | |
| " | |
| - name: Safety prompts validation | |
| run: | | |
| python -c " | |
| import json, os | |
| prompts_dir = 'configs/safety_prompts' | |
| assert os.path.isdir(prompts_dir), 'Missing configs/safety_prompts/' | |
| total = 0 | |
| for f in os.listdir(prompts_dir): | |
| if f.endswith('.jsonl'): | |
| path = os.path.join(prompts_dir, f) | |
| with open(path) as fh: | |
| for i, line in enumerate(fh): | |
| data = json.loads(line) | |
| assert 'prompt' in data, f'{f} line {i+1}: missing prompt key' | |
| total += 1 | |
| print(f'{f}: OK') | |
| assert total >= 140, f'Expected 140+ safety prompts, found {total}' | |
| print(f'Total safety prompts: {total}') | |
| " | |
| - name: Synthetic module import check | |
| run: | | |
| python -c " | |
| from forgelm.synthetic import SyntheticDataGenerator | |
| print('synthetic.py import: OK') | |
| " | |
| - name: Bilingual doc H2 parity check | |
| run: | | |
| python -c " | |
| import re, sys | |
| pairs = [ | |
| ('docs/reference/configuration.md', 'docs/reference/configuration-tr.md'), | |
| ('docs/reference/usage.md', 'docs/reference/usage-tr.md'), | |
| ('docs/reference/distributed_training.md','docs/reference/distributed_training-tr.md'), | |
| ('docs/reference/data_preparation.md', 'docs/reference/data_preparation-tr.md'), | |
| ('docs/reference/architecture.md', 'docs/reference/architecture-tr.md'), | |
| ('docs/guides/ingestion.md', 'docs/guides/ingestion-tr.md'), | |
| ('docs/guides/data_audit.md', 'docs/guides/data_audit-tr.md'), | |
| ] | |
| failed = False | |
| for en, tr in pairs: | |
| with open(en) as f: | |
| en_count = sum(1 for line in f if re.match(r'^## ', line)) | |
| with open(tr) as f: | |
| tr_count = sum(1 for line in f if re.match(r'^## ', line)) | |
| status = 'OK' if en_count == tr_count else 'FAIL' | |
| print(f'{status}: {en} ({en_count} H2) vs {tr} ({tr_count} H2)') | |
| if en_count != tr_count: | |
| failed = True | |
| if failed: | |
| sys.exit(1) | |
| " | |
| - name: License check | |
| run: | | |
| test -f LICENSE || (echo "LICENSE file missing" && exit 1) | |
| head -1 LICENSE | grep -q "Apache" || (echo "Expected Apache License" && exit 1) | |
| echo "License: OK" |