Merge pull request #18 from cemililik/development #127

Workflow file for this run

	name: CI

	on:
	push:
	branches: [main]
	pull_request:
	branches: [main]

	jobs:
	# --- Job 1: Lint (fast, no heavy deps) ---
	lint:
	runs-on: ubuntu-latest
	steps:
	- uses: actions/checkout@v5

	- uses: actions/setup-python@v6
	with:
	python-version: "3.11"

	- name: Install linter
	run: pip install ruff

	- name: Ruff lint
	run: ruff check .

	- name: Ruff format check
	run: ruff format --check .

	# --- Job 2: Test (matrix across Python versions) ---
	test:
	runs-on: ubuntu-latest
	strategy:
	matrix:
	python-version: ["3.10", "3.11", "3.12", "3.13"]
	steps:
	- uses: actions/checkout@v5

	- uses: actions/setup-python@v6
	with:
	python-version: ${{ matrix.python-version }}
	cache: pip

	- name: Install package (dev)
	run: \|
	python -m pip install --upgrade pip
	python -m pip install -e ".[dev]"

	- name: Run tests with coverage
	run: pytest -q --tb=short --cov=forgelm --cov-report=term-missing --cov-report=xml:coverage.xml

	- name: Upload coverage (3.11 only)
	if: matrix.python-version == '3.11' && always()
	uses: actions/upload-artifact@v5
	with:
	name: coverage-report
	path: coverage.xml

	# --- Job 3: Validate (config, CLI, assets) ---
	validate:
	runs-on: ubuntu-latest
	steps:
	- uses: actions/checkout@v5

	- uses: actions/setup-python@v6
	with:
	python-version: "3.11"
	cache: pip

	- name: Install package
	run: \|
	python -m pip install --upgrade pip
	python -m pip install -e ".[dev]"

	- name: CLI smoke checks
	run: \|
	forgelm --version
	forgelm --config config_template.yaml --dry-run
	forgelm --config config_template.yaml --dry-run --output-format json

	- name: Config template validation
	run: \|
	python -c "
	from forgelm.config import load_config
	cfg = load_config('config_template.yaml')
	assert cfg.model.name_or_path, 'model.name_or_path is empty'
	assert cfg.model.trust_remote_code is False, 'trust_remote_code should default to False'
	assert cfg.model.offline is False, 'offline should default to False'
	assert cfg.training.trainer_type == 'sft', 'default trainer_type should be sft'
	assert cfg.data.dataset_name_or_path, 'data.dataset_name_or_path is empty'
	print('Config template validation passed.')
	"

	- name: DeepSpeed config validation
	run: \|
	python -c "
	import json, os
	configs_dir = 'configs/deepspeed'
	for preset in ['zero2.json', 'zero3.json', 'zero3_offload.json']:
	path = os.path.join(configs_dir, preset)
	assert os.path.isfile(path), f'Missing: {path}'
	with open(path) as f:
	data = json.load(f)
	assert 'zero_optimization' in data, f'{preset}: missing zero_optimization'
	assert data['train_batch_size'] == 'auto', f'{preset}: train_batch_size should be auto'
	print(f'{preset}: OK (ZeRO stage {data[\"zero_optimization\"][\"stage\"]})')
	"

	- name: Notebook validation
	run: \|
	python -c "
	import json, os
	for nb_file in os.listdir('notebooks'):
	if nb_file.endswith('.ipynb'):
	path = os.path.join('notebooks', nb_file)
	with open(path) as f:
	nb = json.load(f)
	assert nb['nbformat'] == 4, f'{nb_file}: invalid nbformat'
	assert len(nb['cells']) > 0, f'{nb_file}: no cells'
	print(f'{nb_file}: OK ({len(nb[\"cells\"])} cells)')
	"

	- name: Safety prompts validation
	run: \|
	python -c "
	import json, os
	prompts_dir = 'configs/safety_prompts'
	assert os.path.isdir(prompts_dir), 'Missing configs/safety_prompts/'
	total = 0
	for f in os.listdir(prompts_dir):
	if f.endswith('.jsonl'):
	path = os.path.join(prompts_dir, f)
	with open(path) as fh:
	for i, line in enumerate(fh):
	data = json.loads(line)
	assert 'prompt' in data, f'{f} line {i+1}: missing prompt key'
	total += 1
	print(f'{f}: OK')
	assert total >= 140, f'Expected 140+ safety prompts, found {total}'
	print(f'Total safety prompts: {total}')
	"

	- name: Synthetic module import check
	run: \|
	python -c "
	from forgelm.synthetic import SyntheticDataGenerator
	print('synthetic.py import: OK')
	"

	- name: Bilingual doc H2 parity check
	run: \|
	python -c "
	import re, sys
	pairs = [
	('docs/reference/configuration.md', 'docs/reference/configuration-tr.md'),
	('docs/reference/usage.md', 'docs/reference/usage-tr.md'),
	('docs/reference/distributed_training.md','docs/reference/distributed_training-tr.md'),
	('docs/reference/data_preparation.md', 'docs/reference/data_preparation-tr.md'),
	('docs/reference/architecture.md', 'docs/reference/architecture-tr.md'),
	('docs/guides/ingestion.md', 'docs/guides/ingestion-tr.md'),
	('docs/guides/data_audit.md', 'docs/guides/data_audit-tr.md'),
	]
	failed = False
	for en, tr in pairs:
	with open(en) as f:
	en_count = sum(1 for line in f if re.match(r'^## ', line))
	with open(tr) as f:
	tr_count = sum(1 for line in f if re.match(r'^## ', line))
	status = 'OK' if en_count == tr_count else 'FAIL'
	print(f'{status}: {en} ({en_count} H2) vs {tr} ({tr_count} H2)')
	if en_count != tr_count:
	failed = True
	if failed:
	sys.exit(1)
	"

	- name: License check
	run: \|
	test -f LICENSE \|\| (echo "LICENSE file missing" && exit 1)
	head -1 LICENSE \| grep -q "Apache" \|\| (echo "Expected Apache License" && exit 1)
	echo "License: OK"

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Merge pull request #18 from cemililik/development #127

Workflow file

Merge pull request #18 from cemililik/development #127

Uh oh!

Workflow file for this run