diff --git a/cve_bin_tool/checkers/python.py b/cve_bin_tool/checkers/python.py index f7ae562b79..f29c046cdb 100644 --- a/cve_bin_tool/checkers/python.py +++ b/cve_bin_tool/checkers/python.py @@ -16,6 +16,9 @@ class PythonChecker(Checker): r"Fatal Python error: unable to decode the command line argument", r"Internal error in the Python interpreter", r"CPython", + r"Python package: ", + r"dist-info", + r"egg-info", ] FILENAME_PATTERNS = [r"python"] VERSION_PATTERNS = [ @@ -25,5 +28,7 @@ class PythonChecker(Checker): r"([23]+\.[0-9]+\.[0-9]+)\r?\nPython %s", r"([23]+\.[0-9]+\.[0-9]+)\r?\n%\.80s \(%\.80s\) %\.80s", r"tags/v([23]+\.[0-9]+\.[0-9]+)\r?\n", + r"Python\s+([23]+\.[0-9]+\.[0-9]+)", + r"__version__\s*=\s*['\"]((?:[23]+\.[0-9]+\.[0-9]+))['\"]", ] VENDOR_PRODUCT = [("python_software_foundation", "python"), ("python", "python")] diff --git a/cve_bin_tool/dependency_graph.py b/cve_bin_tool/dependency_graph.py new file mode 100644 index 0000000000..e8674d78fe --- /dev/null +++ b/cve_bin_tool/dependency_graph.py @@ -0,0 +1,54 @@ +class DependencyGraph: + def __init__(self): + self.graph = {} # {package_name: [dependency_names]} + + def add_package(self, package, dependencies): + # Add or update the package dependencies in the graph + self.graph[package.lower()] = [dep.lower() for dep in dependencies] + + def resolve_dependencies(self, root_package): + """ + Returns an ordered list of packages including indirect dependencies. + Uses iterative depth-first search (without recursion) and cycle detection. + """ + visited = set() + ordered = [] + root = root_package.lower() + stack = [(root, False)] # (node, processed_flag) + in_stack = {root} # track nodes in the current stack + + while stack: + current, processed = stack[-1] + + if processed: + stack.pop() + in_stack.remove(current) + if current not in visited: + visited.add(current) + ordered.append(current) + continue + + stack[-1] = (current, True) # mark current as processed + + # Add unvisited dependencies not already in the stack to avoid cycles + if current in self.graph: + for dep in reversed(self.graph[current]): + if dep not in visited and dep not in in_stack: + stack.append((dep, False)) + in_stack.add(dep) + return ordered + + +# Example usage: +if __name__ == "__main__": + # Build a sample dependency graph for testing + dg = DependencyGraph() + dg.add_package("packageA", ["packageB", "packageC"]) + dg.add_package("packageB", ["packageD"]) + dg.add_package("packageC", ["packageD", "packageE"]) + dg.add_package("packageD", []) + dg.add_package("packageE", []) + + # Resolve dependencies for packageA + dependencies = dg.resolve_dependencies("packageA") + print("Resolved order:", dependencies) diff --git a/cve_bin_tool/parsers/python.py b/cve_bin_tool/parsers/python.py index 827d717224..23fb428af8 100644 --- a/cve_bin_tool/parsers/python.py +++ b/cve_bin_tool/parsers/python.py @@ -1,11 +1,13 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: GPL-3.0-or-later +import importlib.metadata as importlib_metadata import json import re import subprocess from re import MULTILINE, compile, search +from packaging.requirements import Requirement from packaging.version import parse as parse_version from cve_bin_tool.parsers import Parser @@ -140,24 +142,86 @@ def generate_purl(self, product, vendor="", qualifier={}, subpath=None): return purl + def _get_installed_packages(self): + """Get a list of all installed packages using importlib.metadata.""" + try: + return { + dist.metadata["Name"]: dist # Keep original case + for dist in importlib_metadata.distributions() + } + except Exception as e: + self.logger.debug(f"Error getting installed packages: {e}") + return {} + + def _parse_dependencies(self, dist): + """Parse package dependencies considering environment markers.""" + requires = dist.requires or [] + dependencies = [] + + for req_str in requires: + try: + req = Requirement(req_str) + if req.marker is None or req.marker.evaluate(): + dependencies.append(req.name) # Keep original case + except Exception as e: + self.logger.debug(f"Error parsing requirement {req_str}: {e}") + + return dependencies + def run_checker(self, filename): """ - This generator runs only for python packages. - There are no actual checkers. - The ProductInfo is computed without the help of any checkers from PKG-INFO or METADATA. + Enhanced checker that uses importlib.metadata for better dependency resolution. """ self.filename = filename lines = parse_strings(self.filename) lines = "\n".join(lines.splitlines()[:3]) + try: product = search(compile(r"^Name: (.+)$", MULTILINE), lines).group(1) version = search(compile(r"^Version: (.+)$", MULTILINE), lines).group(1) - purl = self.generate_purl(product) - vendor = self.get_vendor(purl, product, version) - if vendor is not None: - yield from vendor + # Remove lowercasing to preserve original case + normalized_product = product.replace("-", "_") + + # Get all installed packages and their dependencies + installed_packages = self._get_installed_packages() + # Use case-insensitive lookup + product_key = next( + ( + name + for name in installed_packages.keys() + if name.lower() == normalized_product.lower() + ), + None, + ) + + if product_key: + dist = installed_packages[product_key] + dependencies = self._parse_dependencies(dist) + + # Process the main package + purl = self.generate_purl(product) # Original name for PURL + vendor = self.get_vendor(purl, product, version) + if vendor is not None: + yield from vendor + + # Process dependencies with case-preservation + for dep_name in dependencies: + dep_key = next( + ( + name + for name in installed_packages.keys() + if name.lower() == dep_name.lower() + ), + None, + ) + if dep_key: + dep_dist = installed_packages[dep_key] + dep_purl = self.generate_purl(dep_name) + dep_version = dep_dist.version + dep_vendor = self.get_vendor(dep_purl, dep_name, dep_version) + if dep_vendor is not None: + yield from dep_vendor - # There are packages with a METADATA file in them containing different data from what the tool expects except AttributeError: self.logger.debug(f"{filename} is an invalid METADATA/PKG-INFO") self.logger.debug(f"Done scanning file: {filename}") diff --git a/cve_bin_tool/util.py b/cve_bin_tool/util.py index 11ee0533f4..1811ed9096 100644 --- a/cve_bin_tool/util.py +++ b/cve_bin_tool/util.py @@ -7,6 +7,7 @@ import fnmatch import os +import platform import re import sys from enum import Enum @@ -608,6 +609,20 @@ def windows_fixup(filename): return filename.replace(":", "_").replace("\\", "_") +def get_environment_context(): + """ + Returns a dictionary with environment details for marker evaluation. + """ + return { + "python_version": f"{sys.version_info.major}.{sys.version_info.minor}", + "sys_platform": sys.platform, + "os_name": os.name, + "platform_machine": platform.machine(), + "platform_system": platform.system(), + "platform_release": platform.release(), + } + + def strip_path(path_element: str, scanned_dir: str) -> str: path = Path(path_element) return path.drive + path.root + os.path.relpath(path_element, scanned_dir) diff --git a/requirements.txt b/requirements.txt index e6d8e62c47..899e214076 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,7 +5,7 @@ defusedxml distro filetype>=1.2.0 gsutil -importlib_metadata>=3.6; python_version < "3.10" +importlib_metadata>=4.0; python_version < "3.10" importlib_resources; python_version < "3.9" jinja2>=2.11.3 jsonschema>=3.0.2 diff --git a/test/test_dependency_graph.py b/test/test_dependency_graph.py new file mode 100644 index 0000000000..70f79d8881 --- /dev/null +++ b/test/test_dependency_graph.py @@ -0,0 +1,69 @@ +# Copyright (C) 2022 Intel Corporation +# SPDX-License-Identifier: GPL-3.0-or-later + +from cve_bin_tool.dependency_graph import DependencyGraph + + +def test_dependency_order(): + """Test that dependencies are resolved in correct order""" + dg = DependencyGraph() + dg.add_package("A", ["B", "C"]) + dg.add_package("B", ["D"]) + dg.add_package("C", ["D", "E"]) + dg.add_package("D", []) + dg.add_package("E", []) + + order = dg.resolve_dependencies("A") + + # Verify dependencies come before their dependents + a_index = order.index("a") + b_index = order.index("b") + c_index = order.index("c") + d_index = order.index("d") + e_index = order.index("e") + + # D should come before B and C (its dependents) + assert d_index < b_index + assert d_index < c_index + + # E should come before C (its dependent) + assert e_index < c_index + + # B and C should come before A (their dependent) + assert b_index < a_index + assert c_index < a_index + + +def test_cycle_handling(): + """Test that circular dependencies don't cause infinite loops""" + dg = DependencyGraph() + dg.add_package("A", ["B"]) + dg.add_package("B", ["C"]) + dg.add_package("C", ["A"]) # Create cycle A -> B -> C -> A + + order = dg.resolve_dependencies("A") + + # All packages should be present exactly once + assert len(order) == 3 + assert len(set(order)) == 3 + for pkg in ["a", "b", "c"]: + assert pkg in order + + +def test_deep_dependency_chain(): + """Test handling of deep dependency chains""" + dg = DependencyGraph() + # Create a chain A -> B -> C -> D -> E -> F + dg.add_package("A", ["B"]) + dg.add_package("B", ["C"]) + dg.add_package("C", ["D"]) + dg.add_package("D", ["E"]) + dg.add_package("E", ["F"]) + dg.add_package("F", []) + + order = dg.resolve_dependencies("A") + + # Verify F comes first (no dependencies) + assert order[0] == "f" + # Verify A comes last (depends on everything) + assert order[-1] == "a" diff --git a/test/test_dependency_parser.py b/test/test_dependency_parser.py new file mode 100644 index 0000000000..896c4b14ea --- /dev/null +++ b/test/test_dependency_parser.py @@ -0,0 +1,34 @@ +# Copyright (C) 2022 Intel Corporation +# SPDX-License-Identifier: GPL-3.0-or-later + +import pytest +from packaging.requirements import Requirement + + +# Dummy implementation of parse_dependencies copied from your parser +def parse_dependencies(metadata): + dependencies = [] + for req in metadata.get("Requires-Dist", []): + requirement = Requirement(req) + if requirement.marker and not requirement.marker.evaluate(): + continue + dependencies.append(requirement.name.lower()) + return dependencies + + +@pytest.fixture +def metadata_with_markers(): + return { + "Requires-Dist": [ + "packageA; python_version >= '3.0'", # Should be included (assuming current python is >=3.0) + "packageB; python_version < '2.0'", # Should be excluded + "packageC", # Always included + ] + } + + +def test_parse_dependencies(metadata_with_markers): + deps = parse_dependencies(metadata_with_markers) + assert "packagea" in deps + assert "packagec" in deps + assert "packageb" not in deps diff --git a/test/test_python_parser.py b/test/test_python_parser.py new file mode 100644 index 0000000000..953ba48b3e --- /dev/null +++ b/test/test_python_parser.py @@ -0,0 +1,65 @@ +import importlib + +import pytest + +# Assume our parser is imported as follows: +from cve_bin_tool.parsers.python import PythonParser + + +class DummyLogger: + def debug(self, msg): + pass + + def error(self, msg): + pass + + +class DummyDB: + pass + + +@pytest.fixture +def parser(): + return PythonParser(DummyDB(), DummyLogger()) + + +def test_get_installed_packages(parser, monkeypatch): + # Create a dummy distribution object + class DummyDist: + def __init__(self, name, version, requires=None): + self.metadata = {"Name": name, "Version": version} + self.requires = requires or [] + + dummy_dists = [ + DummyDist("packageA", "1.0"), + DummyDist("packageB", "2.0", requires=["packageC; python_version >= '3.6'"]), + DummyDist("packageC", "3.0"), + ] + # Patch distributions on importlib.metadata directly. + monkeypatch.setattr(importlib.metadata, "distributions", lambda: dummy_dists) + packages = parser._get_installed_packages() + assert "packageA" in packages + assert "packageB" in packages + assert "packageC" in packages + + +def test_parse_dependencies(parser): + # Dummy distribution with requires field + class DummyDist: + def __init__(self, requires): + self.requires = requires + + # Requirement without marker + dist1 = DummyDist(requires=["packageD>=1.0"]) + deps1 = parser._parse_dependencies(dist1) + assert "packageD" in deps1 + + # Requirement with marker that evaluates to True (simulate current version) + dist2 = DummyDist(requires=["packageE; python_version >= '3.0'"]) + deps2 = parser._parse_dependencies(dist2) + assert "packageE" in deps2 + + # Requirement with marker that evaluates to False (simulate unmet condition) + dist3 = DummyDist(requires=["packageF; python_version < '2.0'"]) + deps3 = parser._parse_dependencies(dist3) + assert "packageF" not in deps3 diff --git a/test/test_util.py b/test/test_util.py index ed191ca4a0..708e0bf22a 100644 --- a/test/test_util.py +++ b/test/test_util.py @@ -8,7 +8,13 @@ from typing import DefaultDict from cve_bin_tool.cve_scanner import CVEScanner -from cve_bin_tool.util import CVEData, ProductInfo, inpath +from cve_bin_tool.util import ( + CVEData, + ProductInfo, + find_product_location, + get_environment_context, + inpath, +) class TestUtil: @@ -22,6 +28,53 @@ def test_inpath(self): def test_not_inpath(self): assert not inpath("cve_bin_tool_test_for_not_in_path") + @pytest.mark.parametrize( + "mock_sys_path, known_dirs", + [ + ( + ["/usr/local/bin", "/usr/local/lib/python3.10/site-packages"], + [ + "/usr/local/lib/python3.10/site-packages", + "/usr/local/share", + "/usr/share", + "/usr/local/include", + "/usr/include", + ], + ), + ], + ) + def test_find_product_location(self, monkeypatch, mock_sys_path, known_dirs): + product_name = "lib4sbom" + monkeypatch.setattr(sys, "path", mock_sys_path) + + def mock_exists(path): + for dir in known_dirs: + if dir in str(path): + return True + return False + + monkeypatch.setattr("pathlib.Path.exists", mock_exists) + + expected_path = None + for dir in known_dirs: + product_location = Path(dir) / product_name + if product_location.exists(): + expected_path = str(product_location) + break + + assert find_product_location(product_name) == expected_path + + def test_get_environment_context(self): + context = get_environment_context() + for key in [ + "python_version", + "sys_platform", + "os_name", + "platform_machine", + "platform_system", + "platform_release", + ]: + assert key in context, f"{key} not found in environment context" class TestSignature: """Tests signature of critical class and functions"""