diff --git a/tool/extract_deps.py b/tool/extract_deps.py
index 723b7e18..210b5c47 100644
--- a/tool/extract_deps.py
+++ b/tool/extract_deps.py
@@ -15,7 +15,7 @@
from pathlib import Path
import yaml
-from tool.tool_config import PNPM_LIST_COMMAND, get_cache_manager, YarnLockParser
+from tool.tool_config import PNPM_LIST_COMMAND, get_cache_manager, YarnLockParser, get_package_url, get_registry_url
cache_manager = get_cache_manager()
@@ -27,6 +27,52 @@
RESOLVE_PLUGINS_LOG = "/tmp/plugins.log"
+def build_tree_structure_with_links(paths, package_manager):
+ tree = {}
+ for path in paths:
+ current_level = tree
+ for node in path[:-1]:
+ label = f"[{node}]({get_package_url(node, package_manager)})"
+ if label not in current_level:
+ current_level[label] = {}
+ current_level = current_level[label]
+ return tree
+
+
+def format_tree_as_text(tree, target_package, package_manager, indent="", is_last_child=True):
+ if not tree:
+ return f"{indent}└── [{target_package}]({get_package_url(target_package, package_manager)})"
+
+ lines = []
+ items = list(tree.items())
+ for i, (label, subtree) in enumerate(items):
+ is_last = i == len(items) - 1
+ connector = "└──" if is_last else "├──"
+ lines.append(f"{indent}{connector} {label}")
+ child_indent = indent + (" " if is_last else "│ ")
+
+ if not subtree:
+ lines.append(f"{child_indent}└── [{target_package}]({get_package_url(target_package, package_manager)})")
+ else:
+ child_lines = format_tree_as_text(subtree, target_package, package_manager, child_indent, is_last)
+ lines.extend(child_lines if isinstance(child_lines, list) else [child_lines])
+ return lines
+
+
+def format_paths_for_markdown(paths, target_package, package_manager):
+ if not paths:
+ return ""
+
+ tree = build_tree_structure_with_links(paths, package_manager)
+ if not tree:
+ return f"1 path
{target_package} "
+
+ tree_lines = format_tree_as_text(tree, target_package, package_manager)
+ tree_text = "
".join(tree_lines)
+ summary_text = f"{len(paths)} path{'s' if len(paths) != 1 else ''}"
+ return f"{summary_text}
{tree_text} "
+
+
def get_lockfile_hash(lockfile_content):
"""Generate a hash of the lockfile to detect changes"""
return hashlib.sha256(str(lockfile_content).encode()).hexdigest()
@@ -116,79 +162,145 @@ def extract_deps_from_pnpm_lockfile(repo_path, pnpm_lockfile_yaml):
def extract_deps_from_npm(repo_path, npm_lock_file):
"""
- Extract dependencies from a "package-lock.json" file.
+ Extract dependencies from an npm project using npm list command.
Args:
- repo_path (str): The project's source code repository.
- npm_lock_file (dict): The content of the npm lock file.
+ repo_path (str): The project's source code repository path.
+ npm_lock_file (str): The npm lock file path.
Returns:
dict: A dictionary containing the extracted dependencies and patches.
"""
- lock_file_json = json.loads(npm_lock_file)
- lockfile_hash = get_lockfile_hash(lock_file_json)
+ # Generate cache key based on repo path and project info
+ lockfile_hash = get_lockfile_hash(npm_lock_file)
if not lockfile_hash:
logging.error("No lockfile found in %s", repo_path)
return {"resolutions": [], "patches": []}
-
cached_deps = cache_manager.extracted_deps_cache.get_dependencies(repo_path, lockfile_hash)
if cached_deps:
logging.info(f"Using cached dependencies for {repo_path}")
return cached_deps
+
try:
+ # If we reach here, we need to resolve dependencies
+ current_dir = os.getcwd()
+ os.chdir(repo_path)
+ # Run npm list to get dependency tree
+ logging.info("Running npm list to extract dependencies...")
+ result = subprocess.run(
+ ["npm", "list", "--json", "--all", "--long", "--package-lock-only"],
+ cwd=repo_path,
+ capture_output=True,
+ text=True,
+ check=False, # Don't fail on warnings/missing peer deps
+ )
+ os.chdir(current_dir)
+
+ if result.returncode != 0 and result.returncode != 1:
+ # Return code 1 is common for missing peer deps, which is OK
+ logging.error(f"npm list failed with return code {result.returncode}")
+ logging.error(f"stderr: {result.stderr}")
+ return {"resolutions": [], "patches": [], "aliased_packages": {}}
+
+ npm_data = json.loads(result.stdout)
+ # Parse project name and version from npm list output
+ project_name = npm_data.get("name")
+ project_version = npm_data.get("version")
+
patches = []
pkg_name_with_resolution = set()
aliased_packages = {}
- deps_list_data = {}
+ parent_packages = {} # Maps package -> set of immediate parents
+ dependency_tree = {} # Maps package -> complete dependency info
- parent_packages = {}
- if lock_file_json.get("packages") and isinstance(lock_file_json["packages"], dict):
- for package_path, package_info in lock_file_json["packages"].items():
- if package_path.startswith("node_modules/"):
- package_name = package_path.split("/", 1)[1]
- if "node_modules" in package_name:
- package_name = package_name.split("node_modules/")[-1]
-
- resolution = package_name
- if package_info.get("version"):
- version = package_info["version"]
- # Handle npm aliases
- original_name = package_info.get("name")
- if original_name:
- logging.warning(f"Found npm alias for {original_name}@{version}")
- aliased_packages[f"{original_name}@{version}"] = package_name
- package_name = original_name
-
- resolution = f"{package_name}@{version}"
- pkg_name_with_resolution.add(resolution)
-
- if package_info.get("dependencies"):
- for dep_name, version in package_info["dependencies"].items():
- parent_packages.setdefault(f"{dep_name}@{version}", set()).add(resolution)
-
- deps_list_data = {
- "resolutions": list(
- {
- "info": info,
- "parent": list(parent_packages.get(info, set())),
- }
- for info in sorted(pkg_name_with_resolution)
- ),
- "patches": patches,
- "aliased_packages": aliased_packages,
- }
+ # Add root package
+ root_name = npm_data.get("name", project_name)
+ root_version = npm_data.get("version", project_version)
+ root_resolution = f"{root_name}@{root_version}"
+ pkg_name_with_resolution.add(root_resolution)
- cache_manager.extracted_deps_cache.cache_dependencies(repo_path, lockfile_hash, deps_list_data)
+ def process_dependencies(deps_dict, parent_resolution, current_path=None):
+ """Recursively process dependencies from npm list output"""
+ if not deps_dict:
+ return
- return deps_list_data
+ if current_path is None:
+ current_path = [parent_resolution]
- except (IOError, ValueError, KeyError) as e:
- logging.error(
- "An error occurred while extracting dependencies from package-lock.json: %s",
- str(e),
- )
+ for dep_name, dep_info in deps_dict.items():
+ if not isinstance(dep_info, dict):
+ continue
+
+ dep_version = dep_info.get("version")
+ if not dep_version:
+ continue
+
+ # Handle npm aliases (like "my-lodash": "npm:lodash@4.17.21")
+ original_name = dep_name
+ if dep_name != dep_info.get("name", dep_name):
+ real_name = dep_info.get("name", dep_name)
+ logging.info(f"Found npm alias: {dep_name} -> {real_name}@{dep_version}")
+ aliased_packages[f"{real_name}@{dep_version}"] = f"{dep_name}@{dep_version}"
+ original_name = real_name
+
+ dep_resolution = f"{original_name}@{dep_version}"
+ pkg_name_with_resolution.add(dep_resolution)
+
+ # Map this dependency to its immediate parent
+ parent_packages.setdefault(dep_resolution, set()).add(parent_resolution)
+
+ # Build the full path to this dependency
+ full_path = current_path + [dep_resolution]
+
+ # Store all paths to this dependency
+ if dep_resolution not in dependency_tree:
+ dependency_tree[dep_resolution] = {"paths": [], "immediate_parents": set()}
+
+ dependency_tree[dep_resolution]["paths"].append(full_path[:])
+ dependency_tree[dep_resolution]["immediate_parents"].add(parent_resolution)
- return {"resolutions": [], "patches": [], "aliased_packages": []}
+ # Check for patches (if using patch-package or similar)
+ if dep_info.get("patched"):
+ patches.append({"info": dep_resolution})
+
+ # Recursively process nested dependencies
+ if dep_info.get("dependencies"):
+ process_dependencies(dep_info["dependencies"], dep_resolution, full_path)
+
+ # Process all dependencies starting from root
+ if npm_data.get("dependencies"):
+ process_dependencies(npm_data["dependencies"], root_resolution)
+
+ deps_list_data = {
+ "resolutions": list(
+ {
+ "info": info,
+ "parent": format_paths_for_markdown(dependency_tree.get(info, {}).get("paths", []), info, "npm"),
+ }
+ for info in sorted(pkg_name_with_resolution)
+ ),
+ "patches": patches,
+ "aliased_packages": aliased_packages,
+ }
+
+ cache_manager.extracted_deps_cache.cache_dependencies(repo_path, lockfile_hash, deps_list_data)
+
+ logging.info(f"Extracted {len(pkg_name_with_resolution)} dependencies from npm list")
+ return deps_list_data
+
+ except subprocess.CalledProcessError as e:
+ os.chdir(current_dir)
+ logging.error(f"Error running npm list: {e}")
+ logging.error(f"stderr: {e.stderr}")
+ return {"resolutions": [], "patches": [], "aliased_packages": {}}
+ except json.JSONDecodeError as e:
+ os.chdir(current_dir)
+ logging.error(f"Error parsing npm list JSON output: {e}")
+ return {"resolutions": [], "patches": [], "aliased_packages": {}}
+ except Exception as e:
+ os.chdir(current_dir)
+ logging.error(f"Unexpected error in extract_deps_from_npm: {e}")
+ return {"resolutions": [], "patches": [], "aliased_packages": {}}
def extract_deps_from_yarn_berry(repo_path, yarn_lock_file):
@@ -698,9 +810,31 @@ def parse_mvn_plugin_logs(log_file):
for plugin in retrieved_plugins
]
+ dependency_tree = defaultdict(lambda: {"paths": [], "immediate_parents": set()})
+ pkg_name_with_resolution = set()
+
+ for dep in parsed_deps + parsed_plugins:
+ child = dep["info"]
+ parent = dep["parent"]
+ pkg_name_with_resolution.add(child)
+
+ if parent:
+ dependency_tree[child]["paths"].append([parent, child])
+ dependency_tree[child]["immediate_parents"].add(parent)
+ else:
+ # Root dependency
+ dependency_tree[child]["paths"].append([child])
+
# Create the result
deps_list_data = {
- "resolutions": list({item["info"]: item for item in parsed_plugins + parsed_deps}.values()),
+ "resolutions": list(
+ {
+ "info": info,
+ "parent": format_paths_for_markdown(dependency_tree.get(info, {}).get("paths", []), info, "maven"),
+ "command": command,
+ }
+ for info, command in {item["info"]: item["command"] for item in parsed_deps + parsed_plugins}.items()
+ ),
"patches": [],
}
diff --git a/tool/main.py b/tool/main.py
index a3f74c1a..8d521976 100644
--- a/tool/main.py
+++ b/tool/main.py
@@ -186,7 +186,6 @@ def get_lockfile(project_repo_name, release_version, package_manager):
try:
lockfile_name = LOOKING_FOR[package_manager]
logging.info(f"Getting {lockfile_name} for {project_repo_name}@{release_version}")
- logging.info(f"Package manager: {package_manager}")
except KeyError:
logging.error("Invalid package manager or lack of lockfile: %s", package_manager)
raise ValueError("Invalid package manager or lack of lockfile.")
@@ -230,6 +229,7 @@ def get_deps(folder_path, project_repo_name, release_version, package_manager):
deps_list_all = None
logging.info("Getting dependencies for %s@%s...", project_repo_name, release_version)
+ logging.info(f"Package manager: {package_manager}")
# if it is a pnpm monorepo
if package_manager == "pnpm":
@@ -252,8 +252,9 @@ def get_deps(folder_path, project_repo_name, release_version, package_manager):
patches_info = extract_deps.get_patches_info(project_repo_name, yarn_file)
elif package_manager == "npm":
+ repo_path = tool_config.clone_repo(project_repo_name, release_version)
npm_file, _, _ = get_lockfile(project_repo_name, release_version, package_manager)
- deps_list_all = extract_deps.extract_deps_from_npm(project_repo_name, npm_file)
+ deps_list_all = extract_deps.extract_deps_from_npm(repo_path, npm_file)
elif package_manager == "maven":
# Maven is more complex, because of child packages in the repo/pom; this requires to clone the whole repo
diff --git a/tool/report_static.py b/tool/report_static.py
index f705a0cd..6f7219aa 100644
--- a/tool/report_static.py
+++ b/tool/report_static.py
@@ -6,7 +6,7 @@
import subprocess
from datetime import datetime
import pandas as pd
-from tool.tool_config import DEFAULT_ENABLED_CHECKS
+from tool.tool_config import DEFAULT_ENABLED_CHECKS, get_package_url, get_registry_url
import logging
import re
@@ -145,7 +145,7 @@ def create_dataframe(data, deps_list, package_manager, enabled_checks, config):
"all_deprecated": package_data.get("package_info", {}).get("all_deprecated", None),
"signature_present": package_data.get("code_signature", {}).get("signature_present"),
"signature_valid": package_data.get("code_signature", {}).get("signature_valid"),
- "parent": f"`{package_data.get("parent", "-")}`",
+ "parent": package_data.get("parent", "-"),
"command": f"`{package_data.get("command", "-")}`",
"is_github": source_code_data.get("is_github", False),
"github_url": source_code_data.get("github_url", "Could not find repo from package registry"),
@@ -858,7 +858,7 @@ def write_summary(
md_file.write("\n")
break
- md_file.write("#### Ignored Smells\n\n")
+ md_file.write("\n#### Ignored Smells\n\n")
md_file.write("\nThe following smells were configured to be ignored in this project:\n\n")
for report in ignored_reports:
if ignored_reports[report]["enabled"]:
diff --git a/tool/tool_config.py b/tool/tool_config.py
index abcaaa99..bf476c03 100644
--- a/tool/tool_config.py
+++ b/tool/tool_config.py
@@ -1191,3 +1191,25 @@ def get_last_page_info(
logging.error(f"Failed after {max_retries} attempts: {e}")
return None
time.sleep(retry_delay * (attempt + 1))
+
+
+def get_package_url(package_name, package_manager):
+ if package_manager == "maven":
+ ga, v = package_name.split("@")
+ g, a = ga.split(":")
+ return f"https://central.sonatype.com/artifact/{g}/{a}/{v}"
+ elif package_manager in ["npm", "yarn-berry", "yarn-classic", "pnpm"]:
+ name_in_url = "/v/".join(package_name.rsplit("@", 1)) # replaces last occurrence of @ for /v/
+ return f"https://npmjs.com/package/{name_in_url}"
+ raise ValueError("Package Manager not supported for acquiring package URL.")
+
+
+def get_registry_url(package_name, package_manager):
+ if package_manager == "maven":
+ ga, v = package_name.split("@")
+ g, a = ga.split(":")
+ return f"https://central.sonatype.com/artifact/{g}/{a}/{v}"
+ elif package_manager in ["npm", "yarn-berry", "yarn-classic", "pnpm"]:
+ name_in_url = "/".join(package_name.rsplit("@", 1)) # replaces last occurrence of @ for /v/
+ return f"https://registry.npmjs.com/{name_in_url}"
+ raise ValueError("Package Manager not supported for acquiring registry URL.")