|
| 1 | +#!/usr/bin/env python3 |
| 2 | +"""Check that all imports in the package are satisfied by declared dependencies. |
| 3 | +
|
| 4 | +This script scans all Python files in the pacc/ directory, extracts import statements, |
| 5 | +and verifies that third-party packages are declared in pyproject.toml dependencies. |
| 6 | +
|
| 7 | +Usage: |
| 8 | + python scripts/check_imports.py |
| 9 | +
|
| 10 | +Exit codes: |
| 11 | + 0 - All imports are satisfied |
| 12 | + 1 - Missing dependencies found |
| 13 | +""" |
| 14 | + |
| 15 | +import ast |
| 16 | +import sys |
| 17 | +from pathlib import Path |
| 18 | +from typing import Set |
| 19 | + |
| 20 | +# Standard library modules (Python 3.8+) |
| 21 | +# This is comprehensive for Python 3.8-3.12 |
| 22 | +STDLIB_MODULES = { |
| 23 | + # Built-in types and functions |
| 24 | + "abc", "aifc", "argparse", "array", "ast", "asynchat", "asyncio", |
| 25 | + "asyncore", "atexit", "audioop", "base64", "bdb", "binascii", |
| 26 | + "binhex", "bisect", "builtins", "bz2", |
| 27 | + # C |
| 28 | + "cProfile", "calendar", "cgi", "cgitb", "chunk", "cmath", "cmd", |
| 29 | + "code", "codecs", "codeop", "collections", "colorsys", "compileall", |
| 30 | + "concurrent", "configparser", "contextlib", "contextvars", "copy", |
| 31 | + "copyreg", "crypt", "csv", "ctypes", "curses", |
| 32 | + # D |
| 33 | + "dataclasses", "datetime", "dbm", "decimal", "difflib", "dis", |
| 34 | + "distutils", "doctest", |
| 35 | + # E |
| 36 | + "email", "encodings", "enum", "errno", |
| 37 | + # F |
| 38 | + "faulthandler", "fcntl", "filecmp", "fileinput", "fnmatch", |
| 39 | + "fractions", "ftplib", "functools", |
| 40 | + # G |
| 41 | + "gc", "getopt", "getpass", "gettext", "glob", "graphlib", "grp", "gzip", |
| 42 | + # H |
| 43 | + "hashlib", "heapq", "hmac", "html", "http", |
| 44 | + # I |
| 45 | + "idlelib", "imaplib", "imghdr", "imp", "importlib", "inspect", "io", |
| 46 | + "ipaddress", "itertools", |
| 47 | + # J |
| 48 | + "json", |
| 49 | + # K |
| 50 | + "keyword", |
| 51 | + # L |
| 52 | + "lib2to3", "linecache", "locale", "logging", "lzma", |
| 53 | + # M |
| 54 | + "mailbox", "mailcap", "marshal", "math", "mimetypes", "mmap", |
| 55 | + "modulefinder", "msvcrt", "multiprocessing", |
| 56 | + # N |
| 57 | + "netrc", "nis", "nntplib", "numbers", |
| 58 | + # O |
| 59 | + "operator", "optparse", "os", "ossaudiodev", |
| 60 | + # P |
| 61 | + "pathlib", "pdb", "pickle", "pickletools", "pipes", "pkgutil", |
| 62 | + "platform", "plistlib", "poplib", "posix", "posixpath", "pprint", |
| 63 | + "profile", "pstats", "pty", "pwd", "py_compile", "pyclbr", "pydoc", |
| 64 | + # Q |
| 65 | + "queue", "quopri", |
| 66 | + # R |
| 67 | + "random", "re", "readline", "reprlib", "resource", "rlcompleter", |
| 68 | + "runpy", |
| 69 | + # S |
| 70 | + "sched", "secrets", "select", "selectors", "shelve", "shlex", |
| 71 | + "shutil", "signal", "site", "smtpd", "smtplib", "sndhdr", "socket", |
| 72 | + "socketserver", "spwd", "sqlite3", "ssl", "stat", "statistics", |
| 73 | + "string", "stringprep", "struct", "subprocess", "sunau", "symtable", |
| 74 | + "sys", "sysconfig", "syslog", |
| 75 | + # T |
| 76 | + "tabnanny", "tarfile", "telnetlib", "tempfile", "termios", "test", |
| 77 | + "textwrap", "threading", "time", "timeit", "tkinter", "token", |
| 78 | + "tokenize", "tomllib", "trace", "traceback", "tracemalloc", "tty", |
| 79 | + "turtle", "turtledemo", "types", "typing", "typing_extensions", |
| 80 | + # U |
| 81 | + "unicodedata", "unittest", "urllib", "uu", "uuid", |
| 82 | + # V |
| 83 | + "venv", |
| 84 | + # W |
| 85 | + "warnings", "wave", "weakref", "webbrowser", "winreg", "winsound", |
| 86 | + "wsgiref", |
| 87 | + # X |
| 88 | + "xdrlib", "xml", "xmlrpc", |
| 89 | + # Z |
| 90 | + "zipapp", "zipfile", "zipimport", "zlib", "zoneinfo", |
| 91 | + # Private/internal |
| 92 | + "_thread", "__future__", |
| 93 | +} |
| 94 | + |
| 95 | +# Mapping from import names to PyPI package names (lowercase) |
| 96 | +IMPORT_TO_PACKAGE = { |
| 97 | + "yaml": "pyyaml", |
| 98 | + "chardet": "chardet", |
| 99 | + "psutil": "psutil", |
| 100 | + "aiohttp": "aiohttp", |
| 101 | + "aiofiles": "aiofiles", |
| 102 | + "pytest": "pytest", |
| 103 | + "coverage": "coverage", |
| 104 | + "mypy": "mypy", |
| 105 | + "ruff": "ruff", |
| 106 | + "bandit": "bandit", |
| 107 | + "build": "build", |
| 108 | + "twine": "twine", |
| 109 | + "tomli": "tomli", |
| 110 | + "mkdocs": "mkdocs", |
| 111 | + "git": "gitpython", |
| 112 | + "PIL": "pillow", |
| 113 | + "cv2": "opencv-python", |
| 114 | + "sklearn": "scikit-learn", |
| 115 | + "bs4": "beautifulsoup4", |
| 116 | +} |
| 117 | + |
| 118 | + |
| 119 | +def get_imports_from_file(filepath: Path) -> Set[str]: |
| 120 | + """Extract all top-level import names from a Python file.""" |
| 121 | + imports = set() |
| 122 | + try: |
| 123 | + with open(filepath, "r", encoding="utf-8") as f: |
| 124 | + tree = ast.parse(f.read(), filename=str(filepath)) |
| 125 | + except SyntaxError: |
| 126 | + print(f" Warning: Could not parse {filepath}") |
| 127 | + return imports |
| 128 | + |
| 129 | + for node in ast.walk(tree): |
| 130 | + if isinstance(node, ast.Import): |
| 131 | + for alias in node.names: |
| 132 | + # Get just the top-level module |
| 133 | + imports.add(alias.name.split(".")[0]) |
| 134 | + elif isinstance(node, ast.ImportFrom): |
| 135 | + if node.module and node.level == 0: |
| 136 | + # Only absolute imports (level 0), not relative imports |
| 137 | + imports.add(node.module.split(".")[0]) |
| 138 | + |
| 139 | + return imports |
| 140 | + |
| 141 | + |
| 142 | +def get_declared_dependencies(pyproject_path: Path) -> Set[str]: |
| 143 | + """Extract declared dependencies from pyproject.toml.""" |
| 144 | + dependencies = set() |
| 145 | + |
| 146 | + try: |
| 147 | + if sys.version_info >= (3, 11): |
| 148 | + import tomllib |
| 149 | + with open(pyproject_path, "rb") as f: |
| 150 | + data = tomllib.load(f) |
| 151 | + else: |
| 152 | + try: |
| 153 | + import tomli |
| 154 | + with open(pyproject_path, "rb") as f: |
| 155 | + data = tomli.load(f) |
| 156 | + except ImportError: |
| 157 | + # Fallback: simple regex parsing |
| 158 | + import re |
| 159 | + content = pyproject_path.read_text() |
| 160 | + match = re.search(r'dependencies\s*=\s*\[(.*?)\]', content, re.DOTALL) |
| 161 | + if match: |
| 162 | + deps_str = match.group(1) |
| 163 | + for dep in re.findall(r'"([^"]+)"', deps_str): |
| 164 | + pkg = re.split(r'[<>=!~\[]', dep)[0].strip() |
| 165 | + dependencies.add(pkg.lower()) |
| 166 | + return dependencies |
| 167 | + |
| 168 | + # Parse dependencies from pyproject.toml |
| 169 | + if "project" in data: |
| 170 | + for dep in data["project"].get("dependencies", []): |
| 171 | + pkg = dep.split("[")[0].split("<")[0].split(">")[0].split("=")[0].split("!")[0].split("~")[0].split(";")[0].strip() |
| 172 | + dependencies.add(pkg.lower()) |
| 173 | + |
| 174 | + # Also check optional dependencies |
| 175 | + for group_deps in data["project"].get("optional-dependencies", {}).values(): |
| 176 | + for dep in group_deps: |
| 177 | + pkg = dep.split("[")[0].split("<")[0].split(">")[0].split("=")[0].split("!")[0].split("~")[0].split(";")[0].strip() |
| 178 | + dependencies.add(pkg.lower()) |
| 179 | + |
| 180 | + except Exception as e: |
| 181 | + print(f" Warning: Could not parse pyproject.toml: {e}") |
| 182 | + |
| 183 | + return dependencies |
| 184 | + |
| 185 | + |
| 186 | +def main(): |
| 187 | + """Main function to check imports against dependencies.""" |
| 188 | + script_dir = Path(__file__).parent |
| 189 | + project_root = script_dir.parent |
| 190 | + pacc_dir = project_root / "pacc" |
| 191 | + pyproject_path = project_root / "pyproject.toml" |
| 192 | + |
| 193 | + if not pacc_dir.exists(): |
| 194 | + print(f"Error: pacc/ directory not found at {pacc_dir}") |
| 195 | + sys.exit(1) |
| 196 | + |
| 197 | + if not pyproject_path.exists(): |
| 198 | + print(f"Error: pyproject.toml not found at {pyproject_path}") |
| 199 | + sys.exit(1) |
| 200 | + |
| 201 | + print("Checking imports against declared dependencies...") |
| 202 | + print(f" Package directory: {pacc_dir}") |
| 203 | + print(f" pyproject.toml: {pyproject_path}") |
| 204 | + print() |
| 205 | + |
| 206 | + # Get all imports from package |
| 207 | + all_imports: Set[str] = set() |
| 208 | + python_files = list(pacc_dir.rglob("*.py")) |
| 209 | + print(f"Scanning {len(python_files)} Python files...") |
| 210 | + |
| 211 | + for pyfile in python_files: |
| 212 | + imports = get_imports_from_file(pyfile) |
| 213 | + all_imports.update(imports) |
| 214 | + |
| 215 | + # Filter to third-party imports only |
| 216 | + third_party_imports = set() |
| 217 | + for imp in all_imports: |
| 218 | + # Skip standard library |
| 219 | + if imp in STDLIB_MODULES: |
| 220 | + continue |
| 221 | + # Skip internal package imports |
| 222 | + if imp == "pacc": |
| 223 | + continue |
| 224 | + third_party_imports.add(imp) |
| 225 | + |
| 226 | + print(f"Found {len(third_party_imports)} third-party imports: {sorted(third_party_imports)}") |
| 227 | + print() |
| 228 | + |
| 229 | + # Get declared dependencies |
| 230 | + declared_deps = get_declared_dependencies(pyproject_path) |
| 231 | + |
| 232 | + # Also add common mappings to declared (import name -> package name) |
| 233 | + declared_import_names = set() |
| 234 | + for pkg in declared_deps: |
| 235 | + declared_import_names.add(pkg) |
| 236 | + # Add reverse mappings |
| 237 | + for import_name, package_name in IMPORT_TO_PACKAGE.items(): |
| 238 | + if package_name == pkg: |
| 239 | + declared_import_names.add(import_name) |
| 240 | + |
| 241 | + print(f"Declared dependencies: {sorted(declared_deps)}") |
| 242 | + print() |
| 243 | + |
| 244 | + # Check for missing dependencies |
| 245 | + missing = [] |
| 246 | + for imp in sorted(third_party_imports): |
| 247 | + # Map import name to package name |
| 248 | + package_name = IMPORT_TO_PACKAGE.get(imp, imp).lower() |
| 249 | + if package_name not in declared_deps and imp.lower() not in declared_deps: |
| 250 | + missing.append((imp, package_name)) |
| 251 | + |
| 252 | + if missing: |
| 253 | + print("❌ MISSING DEPENDENCIES:") |
| 254 | + for imp, pkg in missing: |
| 255 | + print(f" - Import '{imp}' requires package '{pkg}'") |
| 256 | + print() |
| 257 | + print("Add these to pyproject.toml [project].dependencies") |
| 258 | + sys.exit(1) |
| 259 | + else: |
| 260 | + print("✅ All imports are satisfied by declared dependencies") |
| 261 | + sys.exit(0) |
| 262 | + |
| 263 | + |
| 264 | +if __name__ == "__main__": |
| 265 | + main() |
0 commit comments