diff --git a/.github/workflows/package-entries-check.yml b/.github/workflows/package-entries-check.yml new file mode 100644 index 00000000..53bdb3a9 --- /dev/null +++ b/.github/workflows/package-entries-check.yml @@ -0,0 +1,43 @@ +--- +name: Package Entries Validation + +on: + push: + branches: + - main + tags: + - 'v*' + pull_request: + branches: + - main + paths: + - 'components/**' + - 'pipelines/**' + - 'pyproject.toml' + - 'uv.lock' + - 'scripts/validate_package_entries/**' + - '.github/workflows/package-entries-check.yml' + - '.github/actions/setup-python-ci/**' + +# Cancel in-progress runs when a new commit is pushed to the same PR +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +jobs: + validate-package-entries: + runs-on: ubuntu-24.04 + + name: Validate Package Entries + + steps: + - name: Checkout Repository + uses: actions/checkout@v4 + + - name: Setup Python CI + uses: ./.github/actions/setup-python-ci + with: + python-version: 3.11 + + - name: Run package entries validation + run: uv run python -m scripts.validate_package_entries.validate_package_entries diff --git a/docs/CONTRIBUTING.md b/docs/CONTRIBUTING.md index 93cfe27e..d3a0ba7f 100644 --- a/docs/CONTRIBUTING.md +++ b/docs/CONTRIBUTING.md @@ -660,6 +660,21 @@ pytest tests/ --cov=. --cov-report=html - **Dependencies**: Mock external services in unit tests; use real dependencies in local runner tests - **Cleanup**: Use provided fixtures to ensure proper test environment cleanup +### Package Validation + +The validation script ensures the `packages` list in `pyproject.toml` stays in sync with the actual +Python package structure. It discovers all packages in `components/` and `pipelines/` and compares +them with the declared packages in `pyproject.toml`. + +Run the validation locally: + +```bash +uv run python -m scripts.validate_package_entries.validate_package_entries +``` + +If validation fails, update the `packages` list in `pyproject.toml` under `[tool.setuptools]` to +include any missing packages. The script will report exactly which packages are missing or extra. + ### Building Custom Container Images If your component uses a custom image, test the container build: diff --git a/pyproject.toml b/pyproject.toml index 084032a8..c9808f29 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -49,14 +49,15 @@ packages = [ "kfp_components", "kfp_components.components", "kfp_components.components.training", - "kfp_components.components.evaluation", + "kfp_components.components.evaluation", "kfp_components.components.data_processing", + "kfp_components.components.data_processing.yoda_data_processor", "kfp_components.components.deployment", "kfp_components.pipelines", "kfp_components.pipelines.training", "kfp_components.pipelines.evaluation", "kfp_components.pipelines.data_processing", - "kfp_components.pipelines.deployment" + "kfp_components.pipelines.deployment", ] [tool.setuptools.package-dir] diff --git a/scripts/validate_package_entries/__init__.py b/scripts/validate_package_entries/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/scripts/validate_package_entries/tests/__init__.py b/scripts/validate_package_entries/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/scripts/validate_package_entries/tests/test_validate_package_entries.py b/scripts/validate_package_entries/tests/test_validate_package_entries.py new file mode 100644 index 00000000..a8e7936d --- /dev/null +++ b/scripts/validate_package_entries/tests/test_validate_package_entries.py @@ -0,0 +1,251 @@ +"""Unit tests for validate_package_entries.py.""" + +from pathlib import Path + +import pytest + +from ..validate_package_entries import ( + discover_packages, + read_pyproject_packages, + validate_package_entries, +) + + +@pytest.fixture +def components_training_structure(tmp_path: Path) -> Path: + """Create a common components/training directory structure for tests.""" + components_dir = tmp_path / "components" + components_dir.mkdir() + (components_dir / "__init__.py").write_text("") + + training_dir = components_dir / "training" + training_dir.mkdir() + (training_dir / "__init__.py").write_text("") + + return tmp_path + + +class TestDiscoverPackages: + """Tests for discover_packages function.""" + + def test_discover_root_package(self, tmp_path: Path): + """Test discovery of root package.""" + # Create root __init__.py + (tmp_path / "__init__.py").write_text("") + + packages = discover_packages(tmp_path) + assert "kfp_components" in packages + + def test_discover_components_packages(self, components_training_structure: Path): + """Test discovery of component packages.""" + packages = discover_packages(components_training_structure) + assert "kfp_components.components" in packages + assert "kfp_components.components.training" in packages + + def test_discover_pipelines_packages(self, tmp_path: Path): + """Test discovery of pipeline packages.""" + # Create pipelines structure + pipelines_dir = tmp_path / "pipelines" + pipelines_dir.mkdir() + (pipelines_dir / "__init__.py").write_text("") + + evaluation_dir = pipelines_dir / "evaluation" + evaluation_dir.mkdir() + (evaluation_dir / "__init__.py").write_text("") + + packages = discover_packages(tmp_path) + assert "kfp_components.pipelines" in packages + assert "kfp_components.pipelines.evaluation" in packages + + def test_skip_directories_without_init(self, tmp_path: Path): + """Test that directories without __init__.py are skipped.""" + components_dir = tmp_path / "components" + components_dir.mkdir() + (components_dir / "__init__.py").write_text("") + + # Create directory without __init__.py + no_init_dir = components_dir / "no_init" + no_init_dir.mkdir() + + packages = discover_packages(tmp_path) + assert "kfp_components.components" in packages + assert "kfp_components.components.no_init" not in packages + + def test_discover_nested_packages(self, components_training_structure: Path): + """Test discovery of nested package structure.""" + training_dir = components_training_structure / "components" / "training" + nested_dir = training_dir / "nested" + nested_dir.mkdir() + (nested_dir / "__init__.py").write_text("") + + packages = discover_packages(components_training_structure) + assert "kfp_components.components" in packages + assert "kfp_components.components.training" in packages + assert "kfp_components.components.training.nested" in packages + + +class TestReadPyprojectPackages: + """Tests for read_pyproject_packages function.""" + + def test_read_valid_packages(self, tmp_path: Path): + """Test reading packages from valid pyproject.toml.""" + pyproject_content = """ +[build-system] +requires = ["setuptools", "wheel"] + +[tool.setuptools] +packages = [ + "kfp_components", + "kfp_components.components", + "kfp_components.components.training", +] +""" + (tmp_path / "pyproject.toml").write_text(pyproject_content) + + packages = read_pyproject_packages(tmp_path) + assert "kfp_components" in packages + assert "kfp_components.components" in packages + assert "kfp_components.components.training" in packages + + def test_read_empty_packages_list(self, tmp_path: Path): + """Test reading empty packages list.""" + pyproject_content = """ +[build-system] +requires = ["setuptools", "wheel"] + +[tool.setuptools] +packages = [] +""" + (tmp_path / "pyproject.toml").write_text(pyproject_content) + + packages = read_pyproject_packages(tmp_path) + assert packages == set() + + def test_missing_tool_setuptools_section(self, tmp_path: Path): + """Test handling missing tool.setuptools section.""" + pyproject_content = """ +[build-system] +requires = ["setuptools", "wheel"] +""" + (tmp_path / "pyproject.toml").write_text(pyproject_content) + + packages = read_pyproject_packages(tmp_path) + assert packages == set() + + def test_missing_packages_key(self, tmp_path: Path): + """Test handling missing packages key.""" + pyproject_content = """ +[build-system] +requires = ["setuptools", "wheel"] + +[tool.setuptools] +package-dir = {"kfp_components" = "."} +""" + (tmp_path / "pyproject.toml").write_text(pyproject_content) + + packages = read_pyproject_packages(tmp_path) + assert packages == set() + + +class TestValidatePackageEntries: + """Tests for validate_package_entries function.""" + + def test_valid_sync(self, components_training_structure: Path): + """Test validation when packages are in sync.""" + # Create root __init__.py + (components_training_structure / "__init__.py").write_text("") + + # Create matching pyproject.toml + pyproject_content = """ +[build-system] +requires = ["setuptools", "wheel"] + +[tool.setuptools] +packages = [ + "kfp_components", + "kfp_components.components", + "kfp_components.components.training", +] +""" + (components_training_structure / "pyproject.toml").write_text(pyproject_content) + + is_valid, errors = validate_package_entries(components_training_structure) + assert is_valid + assert len(errors) == 0 + + def test_missing_packages(self, components_training_structure: Path): + """Test validation when packages are missing from pyproject.toml.""" + # Create root __init__.py + (components_training_structure / "__init__.py").write_text("") + + # Create pyproject.toml missing some packages + pyproject_content = """ +[build-system] +requires = ["setuptools", "wheel"] + +[tool.setuptools] +packages = [ + "kfp_components", + "kfp_components.components", + # Missing kfp_components.components.training +] +""" + (components_training_structure / "pyproject.toml").write_text(pyproject_content) + + is_valid, errors = validate_package_entries(components_training_structure) + assert not is_valid + assert len(errors) == 1 + assert "Missing packages" in errors[0] + assert "kfp_components.components.training" in errors[0] + + def test_extra_packages(self, tmp_path: Path): + """Test validation when pyproject.toml has extra packages.""" + # Create minimal directory structure + (tmp_path / "__init__.py").write_text("") + + # Create pyproject.toml with extra packages + pyproject_content = """ +[build-system] +requires = ["setuptools", "wheel"] + +[tool.setuptools] +packages = [ + "kfp_components", + "kfp_components.components", + "kfp_components.components.nonexistent", # Extra package +] +""" + (tmp_path / "pyproject.toml").write_text(pyproject_content) + + is_valid, errors = validate_package_entries(tmp_path) + assert not is_valid + assert len(errors) == 1 + assert "Extra packages" in errors[0] + assert "kfp_components.components" in errors[0] + assert "kfp_components.components.nonexistent" in errors[0] + + def test_both_missing_and_extra(self, components_training_structure: Path): + """Test validation when both missing and extra packages exist.""" + # Create root __init__.py + (components_training_structure / "__init__.py").write_text("") + + # Create pyproject.toml with both issues + pyproject_content = """ +[build-system] +requires = ["setuptools", "wheel"] + +[tool.setuptools] +packages = [ + "kfp_components", + "kfp_components.components", + # Missing kfp_components.components.training + "kfp_components.components.nonexistent", # Extra package +] +""" + (components_training_structure / "pyproject.toml").write_text(pyproject_content) + + is_valid, errors = validate_package_entries(components_training_structure) + assert not is_valid + assert len(errors) == 2 + assert any("Missing packages" in e for e in errors) + assert any("Extra packages" in e for e in errors) diff --git a/scripts/validate_package_entries/validate_package_entries.py b/scripts/validate_package_entries/validate_package_entries.py new file mode 100644 index 00000000..3c2692d8 --- /dev/null +++ b/scripts/validate_package_entries/validate_package_entries.py @@ -0,0 +1,164 @@ +#!/usr/bin/env python3 +"""Validate that package entries in pyproject.toml are up to date. + +This script discovers all Python packages in the components/ and pipelines/ +directories and ensures they are properly listed in pyproject.toml under +tool.setuptools.packages. + +Usage: + uv run python -m scripts.validate_package_entries.validate_package_entries +""" + +import argparse +import sys +import tomllib +from pathlib import Path + +from ..lib.discovery import get_repo_root + + +def _discover_recursive(directory: Path, base_package: str, packages: set[str]) -> None: + """Recursively discover packages in a directory. + + Args: + directory: Directory to search for packages. + base_package: Base package name (e.g., "kfp_components.components"). + packages: Set to add discovered packages to. + """ + if not directory.exists(): + return + + for item in directory.iterdir(): + # Skip test directories + if item.name == "tests": + continue + + if item.is_dir() and (item / "__init__.py").exists(): + package_name = f"{base_package}.{item.name}" + packages.add(package_name) + + # Recursively discover nested packages + _discover_recursive(item, package_name, packages) + + +def discover_packages(repo_root: Path) -> set[str]: + """Discover all Python packages in components/ and pipelines/ directories. + + Returns a set of package names in the format kfp_components.* based on + the package-dir mapping in pyproject.toml. + """ + packages: set[str] = set() + + # Always include the root package + if (repo_root / "__init__.py").exists(): + packages.add("kfp_components") + + # Discover packages in components/ + components_dir = repo_root / "components" + if components_dir.exists() and (components_dir / "__init__.py").exists(): + packages.add("kfp_components.components") + _discover_recursive(components_dir, "kfp_components.components", packages) + + # Discover packages in pipelines/ + pipelines_dir = repo_root / "pipelines" + if pipelines_dir.exists() and (pipelines_dir / "__init__.py").exists(): + packages.add("kfp_components.pipelines") + _discover_recursive(pipelines_dir, "kfp_components.pipelines", packages) + + return packages + + +def read_pyproject_packages(repo_root: Path) -> set[str]: + """Read the packages list from pyproject.toml.""" + pyproject_path = repo_root / "pyproject.toml" + + try: + with open(pyproject_path, "rb") as f: + pyproject = tomllib.load(f) + except FileNotFoundError: + raise RuntimeError(f"pyproject.toml not found at {pyproject_path}") + except tomllib.TOMLDecodeError as e: + raise RuntimeError(f"Failed to parse pyproject.toml: {e}") from e + + tool_setuptools = pyproject.get("tool", {}).get("setuptools", {}) + packages = tool_setuptools.get("packages", []) + + if not isinstance(packages, list): + raise RuntimeError("tool.setuptools.packages must be a list") + + if not all(isinstance(p, str) for p in packages): + raise RuntimeError("All entries in tool.setuptools.packages must be strings") + + return set(packages) + + +def validate_package_entries(repo_root: Path | None = None) -> tuple[bool, list[str]]: + """Validate that package entries in pyproject.toml match discovered packages. + + Returns: + (is_valid, error_messages) + """ + if repo_root is None: + repo_root = get_repo_root() + + discovered = discover_packages(repo_root) + declared = read_pyproject_packages(repo_root) + + errors: list[str] = [] + + # Find missing packages (discovered but not declared) + missing = discovered - declared + if missing: + errors.append( + f"Missing packages in pyproject.toml (found {len(missing)}):\n" + + "\n".join(f" - {pkg}" for pkg in sorted(missing)) + ) + + # Find extra packages (declared but not discovered) + extra = declared - discovered + if extra: + errors.append( + f"Extra packages in pyproject.toml (found {len(extra)}):\n" + + "\n".join(f" - {pkg}" for pkg in sorted(extra)) + ) + + is_valid = len(errors) == 0 + return is_valid, errors + + +def main() -> int: + """Main entry point.""" + parser = argparse.ArgumentParser( + description="Validate package entries in pyproject.toml", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +Examples: + # Validate all packages + uv run python -m scripts.validate_package_entries.validate_package_entries + """, + ) + + parser.parse_args() + + try: + is_valid, errors = validate_package_entries() + + if is_valid: + print("✅ All package entries in pyproject.toml are up to date.") + return 0 + else: + print("❌ Package entries in pyproject.toml are out of sync:\n") + for error in errors: + print(error) + print( + "\nTo fix, update the 'packages' list in pyproject.toml under " + "[tool.setuptools] to match the discovered packages." + ) + return 1 + except Exception as e: + print(f"❌ Error: {e}", file=sys.stderr) + return 1 + + +if __name__ == "__main__": + sys.exit(main())