Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 12 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -160,15 +160,17 @@ export GITHUB_API_TOKEN=<your_token>
### Configuration

You can set the tool's configuration through a JSON file, which can be then passed to the tool using the `--config` flag.
At the moment, we have configuration support to ignore smells for specific dependencies, as well as parents with specific parents.
At the moment, we have configuration support to:
- ignore smells for specific dependencies (`ignore`), as well as dependencies with specific parents (`ignore-if-parent`);
- provide hardcoded URLs (`revisions`) for both source code repositories (`source_code_url`) and tag/SHA locations (`source_code_version_url`).

The dependencies can be set either as an exact match or as a regex pattern.
The dependencies can be set either as an exact match or as a regex pattern (this only for ignoring smells).
**Note that regular expressions don't behave the same as Unix match expressions**: e.g., `@types*` will match every string starting with `@type` and 0 or more `s` following it.
For a Unix-like behavior, the equivalent regular expression would be `^@types/.*`.

You can either set "all" to ignore every check for the dependency or specify the checks you want to ignore.
To ignore smells, you can either set "all" to ignore every check for the dependency or specify the checks you want to ignore.

The possible specific [check options](https://github.com/chains-project/dirty-waters#smell-check-options) are as follows (note that **checks represented as "children" of another check are ignored if the parent one is**):
The possible specific [check options](https://github.com/chains-project/dirty-waters#smell-check-options) to ignore are as follows (note that **checks represented as "children" of another check are ignored if the parent one is**):

- `"source_code"`
- `"source_code_sha"`
Expand All @@ -188,6 +190,12 @@ An example configuration file:
},
"ignore-if-parent": {
"^org.apache.maven.plugins:maven-release-plugin.*": "all"
},
"revisions": {
"io.perfmark:perfmark-api@0.27.0": {
"source_code_url": "https://github.com/perfmark/perfmark",
"source_code_version_url": "https://github.com/perfmark/perfmark/tree/v0.27.0"
}
}
}
```
Expand Down
1 change: 1 addition & 0 deletions tool/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -309,6 +309,7 @@ def static_analysis_all(
folder_path,
repo_url_info,
package_manager,
config=config,
check_match=check_match,
enabled_checks=enabled_checks,
)
Expand Down
45 changes: 37 additions & 8 deletions tool/static_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -316,7 +316,7 @@ def check_parent_scm(package):
}


def check_source_code_by_version(package_name, version, repo_api, repo_link, simplified_path, package_manager):
def check_source_code_by_version(package_name, version, repo_api, repo_link, simplified_path, package_manager, config):
def check_git_head_presence(package_name, version):
# In NPM-based packages, the registry may contain a gitHead field in the package's metadata
# Although it's not mandatory to have it, if it's present, it's the best way to check
Expand All @@ -342,6 +342,26 @@ def check_git_head_presence(package_name, version):
"tag_status_code": 404,
"sha_status_code": 404,
}
if (
hardcoded_url := config.get("revisions", {})
.get(f"{package_name}@{version}", {})
.get("source_code_version_url", "")
):
logging.info(f"Found hardcoded tag/SHA url {hardcoded_url} in config for package {package_name}")
if requests.get(hardcoded_url).status_code == requests.codes.ok:
logging.info(f"Hardcoded URL {hardcoded_url} exists")
return {
"exists": True,
"tag_version": version,
"is_sha": False,
"sha": None,
"url": hardcoded_url,
"message": "Hardcoded URL for source code revision set in config",
"status_code": 200,
}
else:
logging.warning(f"Hardcoded URL {hardcoded_url} does not exist")

if package_manager in ["yarn-berry", "yarn-classic", "pnpm", "npm"]:
if git_head := check_git_head_presence(package_name, version):
try:
Expand Down Expand Up @@ -402,7 +422,7 @@ def check_git_head_presence(package_name, version):
if existing_tag_format:
existing_tag_format = existing_tag_format[0]
release_tag_exists = True
release_tag_url = f"{repo_api}/git/ref/tags/{existing_tag_format}"
release_tag_url = f"{repo_link}/tree/{existing_tag_format}"
message = f"Tag {existing_tag_format} is found in the repo"
status_code_release_tag = 200
else:
Expand All @@ -425,9 +445,14 @@ def check_git_head_presence(package_name, version):
return source_code_info


def check_existence(package_name, repository, extract_message, package_manager, enabled_checks):
def check_existence(package_name, repository, extract_message, package_manager, config, enabled_checks):
"""Check if the package exists in the repository."""
if "Could not find repository" in extract_message:
if hardcoded_url := config.get("revisions", {}).get(package_name, {}).get("source_code_url", ""):
logging.info(
f"Found hardcoded repository URL {hardcoded_url} for package {package_name}; repository was {repository}"
)
repository = hardcoded_url
elif "Could not find repository" in extract_message:
return {"is_github": False, "github_url": "No_repo_info_found"}
elif "Not a GitHub repository" in extract_message:
return {"is_github": False, "github_url": repository}
Expand Down Expand Up @@ -495,7 +520,7 @@ def check_existence(package_name, repository, extract_message, package_manager,
now_repo_url = None

source_code_info = check_source_code_by_version(
package_full_name, version, repo_api, repo_link, simplified_path, package_manager
package_full_name, version, repo_api, repo_link, simplified_path, package_manager, config
)

github_info = {
Expand Down Expand Up @@ -674,7 +699,7 @@ def check_name_match(package_name, repository):


def analyze_package_data(
package, repo_url, extract_message, pm, check_match=False, enabled_checks=DEFAULT_ENABLED_CHECKS
package, repo_url, extract_message, pm, config, check_match=False, enabled_checks=DEFAULT_ENABLED_CHECKS
):
"""
Analyze package data with configurable smell checks.
Expand All @@ -684,6 +709,7 @@ def analyze_package_data(
repo_url: Repository URL
extract_message: Message from repository URL extraction - is it or not a GitHub repository
pm: Package manager
config: Config dictionary
check_match: Whether to check name matches
enabled_checks: Dictionary of enabled smell checks
"""
Expand Down Expand Up @@ -761,7 +787,9 @@ def cached_analysis_matches_schema(cached_analysis, schema):

if missing_checks.get("source_code"):
update_package_info(
package_info, "source_code", check_existence(package, repo_url, extract_message, pm, enabled_checks)
package_info,
"source_code",
check_existence(package, repo_url, extract_message, pm, config, enabled_checks),
)

if check_match and package_info.get("source_code") and package_info["source_code"].get("github_exists"):
Expand Down Expand Up @@ -844,7 +872,7 @@ def disable_checks_from_config(package_name, parent, config, enabled_checks):
return final_enabled_checks


def get_static_data(folder, packages_data, pm, check_match=False, enabled_checks=DEFAULT_ENABLED_CHECKS):
def get_static_data(folder, packages_data, pm, config, check_match=False, enabled_checks=DEFAULT_ENABLED_CHECKS):
logging.info("Analyzing package static data...")
package_all = {}
errors = {}
Expand All @@ -861,6 +889,7 @@ def get_static_data(folder, packages_data, pm, check_match=False, enabled_checks
repo_url,
extract_repo_url_message,
pm,
config,
check_match=check_match,
enabled_checks=enabled_checks,
)
Expand Down
2 changes: 1 addition & 1 deletion tool/tool_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -1008,7 +1008,7 @@ def clone_repo(project_repo_name, release_version=None, blobless=False):


DEFAULT_CONFIG_PATH = ".dirty-waters.json"
DEFAULT_CONFIG = {"ignore": {}}
DEFAULT_CONFIG = {"ignore": {}, "revisions": {}}


def load_config(config_path=None):
Expand Down
Loading