diff --git a/README.md b/README.md index 355191a2..2f0a3753 100644 --- a/README.md +++ b/README.md @@ -160,15 +160,17 @@ export GITHUB_API_TOKEN= ### Configuration You can set the tool's configuration through a JSON file, which can be then passed to the tool using the `--config` flag. -At the moment, we have configuration support to ignore smells for specific dependencies, as well as parents with specific parents. +At the moment, we have configuration support to: +- ignore smells for specific dependencies (`ignore`), as well as dependencies with specific parents (`ignore-if-parent`); +- provide hardcoded URLs (`revisions`) for both source code repositories (`source_code_url`) and tag/SHA locations (`source_code_version_url`). -The dependencies can be set either as an exact match or as a regex pattern. +The dependencies can be set either as an exact match or as a regex pattern (this only for ignoring smells). **Note that regular expressions don't behave the same as Unix match expressions**: e.g., `@types*` will match every string starting with `@type` and 0 or more `s` following it. For a Unix-like behavior, the equivalent regular expression would be `^@types/.*`. -You can either set "all" to ignore every check for the dependency or specify the checks you want to ignore. +To ignore smells, you can either set "all" to ignore every check for the dependency or specify the checks you want to ignore. -The possible specific [check options](https://github.com/chains-project/dirty-waters#smell-check-options) are as follows (note that **checks represented as "children" of another check are ignored if the parent one is**): +The possible specific [check options](https://github.com/chains-project/dirty-waters#smell-check-options) to ignore are as follows (note that **checks represented as "children" of another check are ignored if the parent one is**): - `"source_code"` - `"source_code_sha"` @@ -188,6 +190,12 @@ An example configuration file: }, "ignore-if-parent": { "^org.apache.maven.plugins:maven-release-plugin.*": "all" + }, + "revisions": { + "io.perfmark:perfmark-api@0.27.0": { + "source_code_url": "https://github.com/perfmark/perfmark", + "source_code_version_url": "https://github.com/perfmark/perfmark/tree/v0.27.0" + } } } ``` diff --git a/tool/main.py b/tool/main.py index eb102235..a3f74c1a 100644 --- a/tool/main.py +++ b/tool/main.py @@ -309,6 +309,7 @@ def static_analysis_all( folder_path, repo_url_info, package_manager, + config=config, check_match=check_match, enabled_checks=enabled_checks, ) diff --git a/tool/static_analysis.py b/tool/static_analysis.py index fdd57dab..91901c9e 100644 --- a/tool/static_analysis.py +++ b/tool/static_analysis.py @@ -316,7 +316,7 @@ def check_parent_scm(package): } -def check_source_code_by_version(package_name, version, repo_api, repo_link, simplified_path, package_manager): +def check_source_code_by_version(package_name, version, repo_api, repo_link, simplified_path, package_manager, config): def check_git_head_presence(package_name, version): # In NPM-based packages, the registry may contain a gitHead field in the package's metadata # Although it's not mandatory to have it, if it's present, it's the best way to check @@ -342,6 +342,26 @@ def check_git_head_presence(package_name, version): "tag_status_code": 404, "sha_status_code": 404, } + if ( + hardcoded_url := config.get("revisions", {}) + .get(f"{package_name}@{version}", {}) + .get("source_code_version_url", "") + ): + logging.info(f"Found hardcoded tag/SHA url {hardcoded_url} in config for package {package_name}") + if requests.get(hardcoded_url).status_code == requests.codes.ok: + logging.info(f"Hardcoded URL {hardcoded_url} exists") + return { + "exists": True, + "tag_version": version, + "is_sha": False, + "sha": None, + "url": hardcoded_url, + "message": "Hardcoded URL for source code revision set in config", + "status_code": 200, + } + else: + logging.warning(f"Hardcoded URL {hardcoded_url} does not exist") + if package_manager in ["yarn-berry", "yarn-classic", "pnpm", "npm"]: if git_head := check_git_head_presence(package_name, version): try: @@ -402,7 +422,7 @@ def check_git_head_presence(package_name, version): if existing_tag_format: existing_tag_format = existing_tag_format[0] release_tag_exists = True - release_tag_url = f"{repo_api}/git/ref/tags/{existing_tag_format}" + release_tag_url = f"{repo_link}/tree/{existing_tag_format}" message = f"Tag {existing_tag_format} is found in the repo" status_code_release_tag = 200 else: @@ -425,9 +445,14 @@ def check_git_head_presence(package_name, version): return source_code_info -def check_existence(package_name, repository, extract_message, package_manager, enabled_checks): +def check_existence(package_name, repository, extract_message, package_manager, config, enabled_checks): """Check if the package exists in the repository.""" - if "Could not find repository" in extract_message: + if hardcoded_url := config.get("revisions", {}).get(package_name, {}).get("source_code_url", ""): + logging.info( + f"Found hardcoded repository URL {hardcoded_url} for package {package_name}; repository was {repository}" + ) + repository = hardcoded_url + elif "Could not find repository" in extract_message: return {"is_github": False, "github_url": "No_repo_info_found"} elif "Not a GitHub repository" in extract_message: return {"is_github": False, "github_url": repository} @@ -495,7 +520,7 @@ def check_existence(package_name, repository, extract_message, package_manager, now_repo_url = None source_code_info = check_source_code_by_version( - package_full_name, version, repo_api, repo_link, simplified_path, package_manager + package_full_name, version, repo_api, repo_link, simplified_path, package_manager, config ) github_info = { @@ -674,7 +699,7 @@ def check_name_match(package_name, repository): def analyze_package_data( - package, repo_url, extract_message, pm, check_match=False, enabled_checks=DEFAULT_ENABLED_CHECKS + package, repo_url, extract_message, pm, config, check_match=False, enabled_checks=DEFAULT_ENABLED_CHECKS ): """ Analyze package data with configurable smell checks. @@ -684,6 +709,7 @@ def analyze_package_data( repo_url: Repository URL extract_message: Message from repository URL extraction - is it or not a GitHub repository pm: Package manager + config: Config dictionary check_match: Whether to check name matches enabled_checks: Dictionary of enabled smell checks """ @@ -761,7 +787,9 @@ def cached_analysis_matches_schema(cached_analysis, schema): if missing_checks.get("source_code"): update_package_info( - package_info, "source_code", check_existence(package, repo_url, extract_message, pm, enabled_checks) + package_info, + "source_code", + check_existence(package, repo_url, extract_message, pm, config, enabled_checks), ) if check_match and package_info.get("source_code") and package_info["source_code"].get("github_exists"): @@ -844,7 +872,7 @@ def disable_checks_from_config(package_name, parent, config, enabled_checks): return final_enabled_checks -def get_static_data(folder, packages_data, pm, check_match=False, enabled_checks=DEFAULT_ENABLED_CHECKS): +def get_static_data(folder, packages_data, pm, config, check_match=False, enabled_checks=DEFAULT_ENABLED_CHECKS): logging.info("Analyzing package static data...") package_all = {} errors = {} @@ -861,6 +889,7 @@ def get_static_data(folder, packages_data, pm, check_match=False, enabled_checks repo_url, extract_repo_url_message, pm, + config, check_match=check_match, enabled_checks=enabled_checks, ) diff --git a/tool/tool_config.py b/tool/tool_config.py index a1255c48..abcaaa99 100644 --- a/tool/tool_config.py +++ b/tool/tool_config.py @@ -1008,7 +1008,7 @@ def clone_repo(project_repo_name, release_version=None, blobless=False): DEFAULT_CONFIG_PATH = ".dirty-waters.json" -DEFAULT_CONFIG = {"ignore": {}} +DEFAULT_CONFIG = {"ignore": {}, "revisions": {}} def load_config(config_path=None):