Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
/pip-selfcheck.json
/tmp
/venv
/myenv
.Python
/include
/Include
Expand Down
314 changes: 314 additions & 0 deletions src/packagedcode/pypi.py
Original file line number Diff line number Diff line change
Expand Up @@ -529,6 +529,16 @@ def is_poetry_pyproject_toml(location):
return False


def is_uv_pyproject_toml(location):
with open(location, 'r') as file:
data = file.read()
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
data = file.read()
if "tool.uv" in file.read():
return True
return False


if "tool.uv" in data:
return True
else:
return False


class BasePoetryPythonLayout(BaseExtractedPythonLayout):
"""
Base class for poetry python projects.
Expand Down Expand Up @@ -832,6 +842,310 @@ def parse(cls, location, package_only=False):
yield models.PackageData.from_data(package_data, package_only)


def parse_dependency_requirement(requirement, scope='dependencies', is_runtime=True):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could this be replaced by using get_requires_dependencies? The processing looks similar.

Or maybe abstract out similar code into functions that both these functions use? As we atleast have some code duplication here.

"""
Parse a dependency requirement string and return a DependentPackage or None.

Args:
requirement: A requirement string (e.g., "requests>=2.0.0")
scope: The dependency scope (e.g., 'dependencies', 'dev-dependencies')
is_runtime: Whether this is a runtime dependency

Returns:
models.DependentPackage or None
"""
if not requirement:
return None

try:
req = Requirement(requirement)
name = canonicalize_name(req.name)
is_pinned = False
purl = PackageURL(type='pypi', name=name)

specifiers_set = req.specifier
specifiers = specifiers_set._specs
extracted_requirement = None

if specifiers:
extracted_requirement = str(specifiers_set)
if len(specifiers) == 1:
specifier = list(specifiers)[0]
if specifier.operator in ('==', '==='):
is_pinned = True
purl = purl._replace(version=specifier.version)

extra_data = {}
if req.marker:
platform = get_python_version_os(req.marker)
if platform:
extra_data = platform

is_optional = bool(get_extra(req.marker) if req.marker else False)

return models.DependentPackage(
purl=purl.to_string(),
scope=scope,
is_runtime=is_runtime,
is_optional=is_optional,
is_pinned=is_pinned,
is_direct=True,
extracted_requirement=extracted_requirement,
extra_data=extra_data if extra_data else None,
)
except Exception:
return None


class BaseUvPythonLayout(BaseExtractedPythonLayout):

@classmethod
def assemble(cls, package_data, resource, codebase, package_adder):
package_resource = None
if resource.name == 'pyproject.toml':
package_resource = resource
elif resource.name == 'uv.lock':
if resource.has_parent():
siblings = resource.siblings(codebase)
package_resource = [r for r in siblings if r.name == 'pyproject.toml']
if package_resource:
package_resource = package_resource[0]

if not package_resource:
# we do not have a pyproject.toml
yield from yield_dependencies_from_package_resource(resource)
return

if codebase.has_single_resource:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should be at the beginning of the function

yield from models.DatafileHandler.assemble(package_data, resource, codebase, package_adder)
return

assert len(package_resource.package_data) == 1, f'Invalid pyproject.toml for {package_resource.path}'
pkg_data = package_resource.package_data[0]
pkg_data = models.PackageData.from_dict(pkg_data)

if pkg_data.purl:
package = models.Package.from_package_data(
package_data=pkg_data,
datafile_path=package_resource.path,
)
package_uid = package.package_uid
package.populate_license_fields()
yield package

root = package_resource.parent(codebase)
if root:
for pypi_res in cls.walk_pypi(resource=root, codebase=codebase):
if package_uid and package_uid not in pypi_res.for_packages:
package_adder(package_uid, pypi_res, codebase)
yield pypi_res

yield package_resource

else:
# we have no package, so deps are not for a specific package uid
package_uid = None

# in all cases yield possible dependencies
yield from yield_dependencies_from_package_data(pkg_data, package_resource.path, package_uid)

# we yield this as we do not want this further processed
yield package_resource

for lock_file in package_resource.siblings(codebase):
if lock_file.name == 'uv.lock':
yield from yield_dependencies_from_package_resource(lock_file, package_uid)

if package_uid and package_uid not in lock_file.for_packages:
package_adder(package_uid, lock_file, codebase)
yield lock_file


class UvPyprojectTomlHandler(BaseUvPythonLayout):
datasource_id = 'pypi_uv_pyproject_toml'
path_patterns = ('*pyproject.toml',)
default_package_type = 'pypi'
default_primary_language = 'Python'
description = 'Python UV pyproject.toml'
documentation_url = 'https://docs.astral.sh/uv/'

@classmethod
def is_datafile(cls, location, filetypes=tuple()):
"""
Return True if the file at location is likely a UV pyproject.toml file.
"""
if super().is_datafile(location, filetypes=filetypes) is False:
return False
return is_uv_pyproject_toml(location)

@classmethod
def parse(cls, location, package_only=False):
"""
Parse a UV pyproject.toml file and yield a PackageData.
"""
with open(location, "rb") as fp:
pyproject_data = tomllib.load(fp)

project = pyproject_data.get('project', {})
tool_uv = pyproject_data.get('tool', {}).get('uv', {})

name = project.get('name')
version = project.get('version')
description = project.get('description')

# Standard dependencies
dependencies = []
for dep_requirement in project.get('dependencies', []):
dependency = parse_dependency_requirement(
requirement=dep_requirement,
scope='dependencies',
is_runtime=True,
)
if dependency:
dependencies.append(dependency.to_dict())

# UV dev dependencies
dev_dependencies = tool_uv.get('dev-dependencies', [])
for dep_requirement in dev_dependencies:
dependency = parse_dependency_requirement(
requirement=dep_requirement,
scope='dev-dependencies',
is_runtime=False,
)
if dependency:
dependencies.append(dependency.to_dict())

# Extra dependencies (optional dependency groups)
optional_dependencies = project.get('optional-dependencies', {})
for group_name, group_deps in optional_dependencies.items():
for dep_requirement in group_deps:
dependency = parse_dependency_requirement(
requirement=dep_requirement,
scope=group_name,
is_runtime=False,
)
if dependency:
dependencies.append(dependency.to_dict())

extra_data = {}
if tool_uv:
extra_data['uv_config'] = tool_uv

requires_python = project.get('requires-python')
if requires_python:
extra_data['python_version'] = requires_python

package_data = dict(
datasource_id=cls.datasource_id,
type=cls.default_package_type,
primary_language='Python',
name=name,
version=version,
description=description,
extra_data=extra_data if extra_data else None,
dependencies=dependencies,
)

yield models.PackageData.from_data(package_data, package_only)


class UvLockHandler(BaseUvPythonLayout):
datasource_id = 'pypi_uv_lock'
path_patterns = ('*uv.lock',)
default_package_type = 'pypi'
default_primary_language = 'Python'
description = 'Python UV lockfile'
documentation_url = 'https://docs.astral.sh/uv/'

@classmethod
def parse(cls, location, package_only=False):
with open(location, "rb") as fp:
toml_data = tomllib.load(fp)

packages = toml_data.get('package')
if not packages:
return

version = toml_data.get('version')
requires_python = toml_data.get('requires-python')

dependencies = []
for package in packages:
dependencies_for_resolved = []

# Handle dependencies - UV uses a different format than Poetry
deps = package.get("dependencies") or []
for dep in deps:
if isinstance(dep, dict):
# UV format: {name: "package-name", marker: "condition"}
dep_name = dep.get('name')
marker = dep.get('marker')
purl = PackageURL(
type=cls.default_package_type,
name=dep_name,
)
dependency = models.DependentPackage(
purl=purl.to_string(),
extracted_requirement=marker,
scope="dependencies",
is_runtime=True,
is_optional=False,
is_direct=True,
is_pinned=False,
)
dependencies_for_resolved.append(dependency.to_dict())
elif isinstance(dep, str):
# Simple string dependency
dependency = parse_dependency_requirement(
requirement=dep,
scope='dependencies',
is_runtime=True,
)
if dependency:
dependencies_for_resolved.append(dependency.to_dict())

name = package.get('name')
version = package.get('version')
urls = get_pypi_urls(name, version)

package_data = dict(
datasource_id=cls.datasource_id,
type=cls.default_package_type,
primary_language='Python',
name=name,
version=version,
is_virtual=True,
dependencies=dependencies_for_resolved,
**urls,
)
resolved_package = models.PackageData.from_data(package_data, package_only)

dependency = models.DependentPackage(
purl=resolved_package.purl,
extracted_requirement=None,
scope=None,
is_runtime=True,
is_optional=False,
is_direct=False,
is_pinned=True,
resolved_package=resolved_package.to_dict()
)
dependencies.append(dependency.to_dict())

extra_data = {}
extra_data['python_version'] = requires_python
extra_data['lock_version'] = version
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
extra_data['lock_version'] = version
extra_data['lock_version'] = lock_version

this would be confusing with the proper package version, let's be as descriptive as possible in variable names to improve readability

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is also likely wrong as you're using the same variable for version


package_data = dict(
datasource_id=cls.datasource_id,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

there are a lot more package data fields which can be parsed and added, please modify to include those. See

class PackageData(IdentifiablePackageData):
for more details for all the fields and refer to other package data parsers too

type=cls.default_package_type,
primary_language='Python',
extra_data=extra_data,
dependencies=dependencies,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we also need to populate URLs with packageURL fields, see

urls = get_pypi_urls(name, version)
for example

)
yield models.PackageData.from_data(package_data, package_only)


class PipInspectDeplockHandler(models.DatafileHandler):
datasource_id = 'pypi_inspect_deplock'
path_patterns = ('*pip-inspect.deplock',)
Expand Down
57 changes: 57 additions & 0 deletions tests/packagedcode/data/pypi/uv/attrs-uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading