Skip to content

Merge pull request #18 from cemililik/development #127

Merge pull request #18 from cemililik/development

Merge pull request #18 from cemililik/development #127

Workflow file for this run

name: CI
on:
push:
branches: [main]
pull_request:
branches: [main]
jobs:
# --- Job 1: Lint (fast, no heavy deps) ---
lint:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v5
- uses: actions/setup-python@v6
with:
python-version: "3.11"
- name: Install linter
run: pip install ruff
- name: Ruff lint
run: ruff check .
- name: Ruff format check
run: ruff format --check .
# --- Job 2: Test (matrix across Python versions) ---
test:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.10", "3.11", "3.12", "3.13"]
steps:
- uses: actions/checkout@v5
- uses: actions/setup-python@v6
with:
python-version: ${{ matrix.python-version }}
cache: pip
- name: Install package (dev)
run: |
python -m pip install --upgrade pip
python -m pip install -e ".[dev]"
- name: Run tests with coverage
run: pytest -q --tb=short --cov=forgelm --cov-report=term-missing --cov-report=xml:coverage.xml
- name: Upload coverage (3.11 only)
if: matrix.python-version == '3.11' && always()
uses: actions/upload-artifact@v5
with:
name: coverage-report
path: coverage.xml
# --- Job 3: Validate (config, CLI, assets) ---
validate:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v5
- uses: actions/setup-python@v6
with:
python-version: "3.11"
cache: pip
- name: Install package
run: |
python -m pip install --upgrade pip
python -m pip install -e ".[dev]"
- name: CLI smoke checks
run: |
forgelm --version
forgelm --config config_template.yaml --dry-run
forgelm --config config_template.yaml --dry-run --output-format json
- name: Config template validation
run: |
python -c "
from forgelm.config import load_config
cfg = load_config('config_template.yaml')
assert cfg.model.name_or_path, 'model.name_or_path is empty'
assert cfg.model.trust_remote_code is False, 'trust_remote_code should default to False'
assert cfg.model.offline is False, 'offline should default to False'
assert cfg.training.trainer_type == 'sft', 'default trainer_type should be sft'
assert cfg.data.dataset_name_or_path, 'data.dataset_name_or_path is empty'
print('Config template validation passed.')
"
- name: DeepSpeed config validation
run: |
python -c "
import json, os
configs_dir = 'configs/deepspeed'
for preset in ['zero2.json', 'zero3.json', 'zero3_offload.json']:
path = os.path.join(configs_dir, preset)
assert os.path.isfile(path), f'Missing: {path}'
with open(path) as f:
data = json.load(f)
assert 'zero_optimization' in data, f'{preset}: missing zero_optimization'
assert data['train_batch_size'] == 'auto', f'{preset}: train_batch_size should be auto'
print(f'{preset}: OK (ZeRO stage {data[\"zero_optimization\"][\"stage\"]})')
"
- name: Notebook validation
run: |
python -c "
import json, os
for nb_file in os.listdir('notebooks'):
if nb_file.endswith('.ipynb'):
path = os.path.join('notebooks', nb_file)
with open(path) as f:
nb = json.load(f)
assert nb['nbformat'] == 4, f'{nb_file}: invalid nbformat'
assert len(nb['cells']) > 0, f'{nb_file}: no cells'
print(f'{nb_file}: OK ({len(nb[\"cells\"])} cells)')
"
- name: Safety prompts validation
run: |
python -c "
import json, os
prompts_dir = 'configs/safety_prompts'
assert os.path.isdir(prompts_dir), 'Missing configs/safety_prompts/'
total = 0
for f in os.listdir(prompts_dir):
if f.endswith('.jsonl'):
path = os.path.join(prompts_dir, f)
with open(path) as fh:
for i, line in enumerate(fh):
data = json.loads(line)
assert 'prompt' in data, f'{f} line {i+1}: missing prompt key'
total += 1
print(f'{f}: OK')
assert total >= 140, f'Expected 140+ safety prompts, found {total}'
print(f'Total safety prompts: {total}')
"
- name: Synthetic module import check
run: |
python -c "
from forgelm.synthetic import SyntheticDataGenerator
print('synthetic.py import: OK')
"
- name: Bilingual doc H2 parity check
run: |
python -c "
import re, sys
pairs = [
('docs/reference/configuration.md', 'docs/reference/configuration-tr.md'),
('docs/reference/usage.md', 'docs/reference/usage-tr.md'),
('docs/reference/distributed_training.md','docs/reference/distributed_training-tr.md'),
('docs/reference/data_preparation.md', 'docs/reference/data_preparation-tr.md'),
('docs/reference/architecture.md', 'docs/reference/architecture-tr.md'),
('docs/guides/ingestion.md', 'docs/guides/ingestion-tr.md'),
('docs/guides/data_audit.md', 'docs/guides/data_audit-tr.md'),
]
failed = False
for en, tr in pairs:
with open(en) as f:
en_count = sum(1 for line in f if re.match(r'^## ', line))
with open(tr) as f:
tr_count = sum(1 for line in f if re.match(r'^## ', line))
status = 'OK' if en_count == tr_count else 'FAIL'
print(f'{status}: {en} ({en_count} H2) vs {tr} ({tr_count} H2)')
if en_count != tr_count:
failed = True
if failed:
sys.exit(1)
"
- name: License check
run: |
test -f LICENSE || (echo "LICENSE file missing" && exit 1)
head -1 LICENSE | grep -q "Apache" || (echo "Expected Apache License" && exit 1)
echo "License: OK"