From 8392c151978feaff9cfc442542dffdaa6cb2487b Mon Sep 17 00:00:00 2001 From: "Tristan F." Date: Tue, 25 Nov 2025 16:31:26 -0800 Subject: [PATCH 01/14] refactor: separate analysis --- Snakefile | 72 +++++++++++++++++++++++------------ config/config.yaml | 47 +++++++++++++++-------- spras/config/config.py | 85 ++---------------------------------------- spras/config/schema.py | 52 +++++++++++++++++++------- 4 files changed, 122 insertions(+), 134 deletions(-) diff --git a/Snakefile b/Snakefile index 5b9340ccc..31f0d279e 100644 --- a/Snakefile +++ b/Snakefile @@ -20,13 +20,16 @@ wildcard_constraints: # without declaration! _config.init_global(config) +def without_keys(d: dict, keys: list): + if set(keys) & set(d.keys()) != set(keys): raise RuntimeError(f"Keys {keys} not fully present in {list(d.keys())}!") + return {k: v for k, v in d.items() if k not in keys} + out_dir = _config.config.out_dir algorithm_params = _config.config.algorithm_params algorithm_directed = _config.config.algorithm_directed -pca_params = _config.config.pca_params -hac_params = _config.config.hac_params container_settings = _config.config.container_settings -include_aggregate_algo_eval = _config.config.analysis_include_evaluation_aggregate_algo +pca_params = without_keys(vars(_config.config.analysis.pca), ["evaluation", "include", "aggregate_per_algorithm"]) +hac_params = without_keys(vars(_config.config.analysis.hac), ["evaluation", "include", "aggregate_per_algorithm"]) # Return the dataset or gold_standard dictionary from the config file given the label def get_dataset(_datasets, label): @@ -71,55 +74,76 @@ def write_dataset_log(dataset, logfile): def make_final_input(wildcards): final_input = [] - if _config.config.analysis_include_summary: + if _config.config.analysis.summary.include: # add summary output file for each pathway # TODO: reuse in the future once we make summary work for mixed graphs. See https://github.com/Reed-CompBio/spras/issues/128 # final_input.extend(expand('{out_dir}{sep}{dataset}-{algorithm_params}{sep}summary.txt',out_dir=out_dir,sep=SEP,dataset=dataset_labels,algorithm_params=algorithms_with_params)) # add table summarizing all pathways for each dataset final_input.extend(expand('{out_dir}{sep}{dataset}-pathway-summary.txt',out_dir=out_dir,sep=SEP,dataset=dataset_labels)) - if _config.config.analysis_include_cytoscape: + if _config.config.analysis.cytoscape.include: final_input.extend(expand('{out_dir}{sep}{dataset}-cytoscape.cys',out_dir=out_dir,sep=SEP,dataset=dataset_labels)) - if _config.config.analysis_include_ml: + if _config.config.analysis.pca.include: final_input.extend(expand('{out_dir}{sep}{dataset}-ml{sep}pca.png',out_dir=out_dir,sep=SEP,dataset=dataset_labels,algorithm_params=algorithms_with_params)) final_input.extend(expand('{out_dir}{sep}{dataset}-ml{sep}pca-variance.txt',out_dir=out_dir,sep=SEP,dataset=dataset_labels,algorithm_params=algorithms_with_params)) - final_input.extend(expand('{out_dir}{sep}{dataset}-ml{sep}hac-vertical.png',out_dir=out_dir,sep=SEP,dataset=dataset_labels,algorithm_params=algorithms_with_params)) - final_input.extend(expand('{out_dir}{sep}{dataset}-ml{sep}hac-clusters-vertical.txt',out_dir=out_dir,sep=SEP,dataset=dataset_labels,algorithm_params=algorithms_with_params)) final_input.extend(expand('{out_dir}{sep}{dataset}-ml{sep}pca-coordinates.txt',out_dir=out_dir,sep=SEP,dataset=dataset_labels,algorithm_params=algorithms_with_params)) - final_input.extend(expand('{out_dir}{sep}{dataset}-ml{sep}hac-horizontal.png',out_dir=out_dir,sep=SEP,dataset=dataset_labels,algorithm_params=algorithms_with_params)) - final_input.extend(expand('{out_dir}{sep}{dataset}-ml{sep}hac-clusters-horizontal.txt',out_dir=out_dir,sep=SEP,dataset=dataset_labels,algorithm_params=algorithms_with_params)) - final_input.extend(expand('{out_dir}{sep}{dataset}-ml{sep}ensemble-pathway.txt',out_dir=out_dir,sep=SEP,dataset=dataset_labels,algorithm_params=algorithms_with_params)) - final_input.extend(expand('{out_dir}{sep}{dataset}-ml{sep}jaccard-matrix.txt',out_dir=out_dir,sep=SEP,dataset=dataset_labels,algorithm_params=algorithms_with_params)) - final_input.extend(expand('{out_dir}{sep}{dataset}-ml{sep}jaccard-heatmap.png',out_dir=out_dir,sep=SEP,dataset=dataset_labels,algorithm_params=algorithms_with_params)) - if _config.config.analysis_include_ml_aggregate_algo: + if _config.config.analysis.pca.aggregate_per_algorithm: final_input.extend(expand('{out_dir}{sep}{dataset}-ml{sep}{algorithm}-pca.png',out_dir=out_dir,sep=SEP,dataset=dataset_labels,algorithm=algorithms_mult_param_combos)) final_input.extend(expand('{out_dir}{sep}{dataset}-ml{sep}{algorithm}-pca-variance.txt',out_dir=out_dir,sep=SEP,dataset=dataset_labels,algorithm=algorithms_mult_param_combos)) final_input.extend(expand('{out_dir}{sep}{dataset}-ml{sep}{algorithm}-pca-coordinates.txt',out_dir=out_dir,sep=SEP,dataset=dataset_labels,algorithm=algorithms_mult_param_combos)) + + if _config.config.analysis.hac.include: + final_input.extend(expand('{out_dir}{sep}{dataset}-ml{sep}hac-vertical.png',out_dir=out_dir,sep=SEP,dataset=dataset_labels,algorithm_params=algorithms_with_params)) + final_input.extend(expand('{out_dir}{sep}{dataset}-ml{sep}hac-clusters-vertical.txt',out_dir=out_dir,sep=SEP,dataset=dataset_labels,algorithm_params=algorithms_with_params)) + final_input.extend(expand('{out_dir}{sep}{dataset}-ml{sep}hac-horizontal.png',out_dir=out_dir,sep=SEP,dataset=dataset_labels,algorithm_params=algorithms_with_params)) + final_input.extend(expand('{out_dir}{sep}{dataset}-ml{sep}hac-clusters-horizontal.txt',out_dir=out_dir,sep=SEP,dataset=dataset_labels,algorithm_params=algorithms_with_params)) + + if _config.config.analysis.hac.aggregate_per_algorithm: final_input.extend(expand('{out_dir}{sep}{dataset}-ml{sep}{algorithm}-hac-vertical.png',out_dir=out_dir,sep=SEP,dataset=dataset_labels,algorithm=algorithms_mult_param_combos)) final_input.extend(expand('{out_dir}{sep}{dataset}-ml{sep}{algorithm}-hac-clusters-vertical.txt',out_dir=out_dir,sep=SEP,dataset=dataset_labels,algorithm=algorithms_mult_param_combos)) final_input.extend(expand('{out_dir}{sep}{dataset}-ml{sep}{algorithm}-hac-horizontal.png',out_dir=out_dir,sep=SEP,dataset=dataset_labels,algorithm=algorithms_mult_param_combos)) final_input.extend(expand('{out_dir}{sep}{dataset}-ml{sep}{algorithm}-hac-clusters-horizontal.txt',out_dir=out_dir,sep=SEP,dataset=dataset_labels,algorithm=algorithms_mult_param_combos)) + + if _config.config.analysis.ensemble.include: + final_input.extend(expand('{out_dir}{sep}{dataset}-ml{sep}ensemble-pathway.txt',out_dir=out_dir,sep=SEP,dataset=dataset_labels,algorithm_params=algorithms_with_params)) + + if _config.config.analysis.ensemble.aggregate_per_algorithm: final_input.extend(expand('{out_dir}{sep}{dataset}-ml{sep}{algorithm}-ensemble-pathway.txt',out_dir=out_dir,sep=SEP,dataset=dataset_labels,algorithm=algorithms)) + + if _config.config.analysis.jaccard.include: + final_input.extend(expand('{out_dir}{sep}{dataset}-ml{sep}jaccard-matrix.txt',out_dir=out_dir,sep=SEP,dataset=dataset_labels,algorithm_params=algorithms_with_params)) + final_input.extend(expand('{out_dir}{sep}{dataset}-ml{sep}jaccard-heatmap.png',out_dir=out_dir,sep=SEP,dataset=dataset_labels,algorithm_params=algorithms_with_params)) + + if _config.config.analysis.jaccard.aggregate_per_algorithm: final_input.extend(expand('{out_dir}{sep}{dataset}-ml{sep}{algorithm}-jaccard-matrix.txt',out_dir=out_dir,sep=SEP,dataset=dataset_labels,algorithm=algorithms)) final_input.extend(expand('{out_dir}{sep}{dataset}-ml{sep}{algorithm}-jaccard-heatmap.png',out_dir=out_dir,sep=SEP,dataset=dataset_labels,algorithm=algorithms)) - if _config.config.analysis_include_evaluation: + if _config.config.analysis.evaluation.include: final_input.extend(expand('{out_dir}{sep}{dataset_gold_standard_pair}-eval{sep}pr-per-pathway-nodes.txt',out_dir=out_dir,sep=SEP,dataset_gold_standard_pair=dataset_gold_standard_node_pairs,algorithm_params=algorithms_with_params)) final_input.extend(expand('{out_dir}{sep}{dataset_gold_standard_pair}-eval{sep}pr-per-pathway-nodes.png',out_dir=out_dir,sep=SEP,dataset_gold_standard_pair=dataset_gold_standard_node_pairs)) - final_input.extend(expand('{out_dir}{sep}{dataset_gold_standard_pair}-eval{sep}pr-pca-chosen-pathway-nodes.txt',out_dir=out_dir,sep=SEP,dataset_gold_standard_pair=dataset_gold_standard_node_pairs)) - final_input.extend(expand('{out_dir}{sep}{dataset_gold_standard_pair}-eval{sep}pr-pca-chosen-pathway-nodes.png',out_dir=out_dir,sep=SEP,dataset_gold_standard_pair=dataset_gold_standard_node_pairs)) - final_input.extend(expand('{out_dir}{sep}{dataset_gold_standard_pair}-eval{sep}pr-curve-ensemble-nodes.png',out_dir=out_dir,sep=SEP,dataset_gold_standard_pair=dataset_gold_standard_node_pairs)) - final_input.extend(expand('{out_dir}{sep}{dataset_gold_standard_pair}-eval{sep}pr-curve-ensemble-nodes.txt',out_dir=out_dir,sep=SEP,dataset_gold_standard_pair=dataset_gold_standard_node_pairs)) + # dummy file final_input.extend(expand('{out_dir}{sep}{dataset_gold_standard_pair}-eval{sep}dummy-edge.txt',out_dir=out_dir,sep=SEP,dataset_gold_standard_pair=dataset_gold_standard_edge_pairs)) - - if _config.config.analysis_include_evaluation_aggregate_algo: + + if _config.config.analysis.evaluation.aggregate_per_algorithm: final_input.extend(expand('{out_dir}{sep}{dataset_gold_standard_pair}-eval{sep}pr-per-pathway-for-{algorithm}-nodes.txt',out_dir=out_dir,sep=SEP,dataset_gold_standard_pair=dataset_gold_standard_node_pairs,algorithm=algorithms)) final_input.extend(expand('{out_dir}{sep}{dataset_gold_standard_pair}-eval{sep}pr-per-pathway-for-{algorithm}-nodes.png',out_dir=out_dir,sep=SEP,dataset_gold_standard_pair=dataset_gold_standard_node_pairs,algorithm=algorithms)) + + if _config.config.analysis.pca.evaluation.include: + final_input.extend(expand('{out_dir}{sep}{dataset_gold_standard_pair}-eval{sep}pr-pca-chosen-pathway-nodes.txt',out_dir=out_dir,sep=SEP,dataset_gold_standard_pair=dataset_gold_standard_node_pairs)) + final_input.extend(expand('{out_dir}{sep}{dataset_gold_standard_pair}-eval{sep}pr-pca-chosen-pathway-nodes.png',out_dir=out_dir,sep=SEP,dataset_gold_standard_pair=dataset_gold_standard_node_pairs)) + + if _config.config.analysis.pca.evaluation.aggregate_per_algorithm: final_input.extend(expand('{out_dir}{sep}{dataset_gold_standard_pair}-eval{sep}pr-pca-chosen-pathway-per-algorithm-nodes.txt',out_dir=out_dir,sep=SEP,dataset_gold_standard_pair=dataset_gold_standard_node_pairs)) final_input.extend(expand('{out_dir}{sep}{dataset_gold_standard_pair}-eval{sep}pr-pca-chosen-pathway-per-algorithm-nodes.png',out_dir=out_dir,sep=SEP,dataset_gold_standard_pair=dataset_gold_standard_node_pairs)) + + if _config.config.analysis.ensemble.evaluation.include: + final_input.extend(expand('{out_dir}{sep}{dataset_gold_standard_pair}-eval{sep}pr-curve-ensemble-nodes.png',out_dir=out_dir,sep=SEP,dataset_gold_standard_pair=dataset_gold_standard_node_pairs)) + final_input.extend(expand('{out_dir}{sep}{dataset_gold_standard_pair}-eval{sep}pr-curve-ensemble-nodes.txt',out_dir=out_dir,sep=SEP,dataset_gold_standard_pair=dataset_gold_standard_node_pairs)) + + if _config.config.analysis.ensemble.evaluation.aggregate_per_algorithm: final_input.extend(expand('{out_dir}{sep}{dataset_gold_standard_pair}-eval{sep}pr-curve-ensemble-nodes-per-algorithm-nodes.png',out_dir=out_dir,sep=SEP,dataset_gold_standard_pair=dataset_gold_standard_node_pairs)) final_input.extend(expand('{out_dir}{sep}{dataset_gold_standard_pair}-eval{sep}pr-curve-ensemble-nodes-per-algorithm-nodes.txt',out_dir=out_dir,sep=SEP,dataset_gold_standard_pair=dataset_gold_standard_node_pairs)) @@ -463,7 +487,7 @@ rule evaluation_per_algo_pr_per_pathways: run: node_table = Evaluation.from_file(input.node_gold_standard_file).node_table pr_df = Evaluation.node_precision_and_recall(input.pathways, node_table) - Evaluation.precision_and_recall_per_pathway(pr_df, output.node_pr_file, output.node_pr_png, include_aggregate_algo_eval) + Evaluation.precision_and_recall_per_pathway(pr_df, output.node_pr_file, output.node_pr_png, _config.config.analysis.evaluation.aggregate_per_algorithm) # Return pathway summary file per dataset def collect_summary_statistics_per_dataset(wildcards): @@ -511,7 +535,7 @@ rule evaluation_per_algo_pca_chosen: node_table = Evaluation.from_file(input.node_gold_standard_file).node_table pca_chosen_pathways = Evaluation.pca_chosen_pathway(input.pca_coordinates_file, input.pathway_summary_file, out_dir) pr_df = Evaluation.node_precision_and_recall(pca_chosen_pathways, node_table) - Evaluation.precision_and_recall_pca_chosen_pathway(pr_df, output.node_pca_chosen_pr_file, output.node_pca_chosen_pr_png, include_aggregate_algo_eval) + Evaluation.precision_and_recall_pca_chosen_pathway(pr_df, output.node_pca_chosen_pr_file, output.node_pca_chosen_pr_png, _config.config.analysis.pca.evaluation.aggregate_per_algorithm) # Return the dataset pickle file for a specific dataset def get_dataset_pickle_file(wildcards): @@ -554,7 +578,7 @@ rule evaluation_per_algo_ensemble_pr_curve: run: node_table = Evaluation.from_file(input.node_gold_standard_file).node_table node_ensembles_dict = Evaluation.edge_frequency_node_ensemble(node_table, input.ensemble_files, input.dataset_file) - Evaluation.precision_recall_curve_node_ensemble(node_ensembles_dict, node_table, output.node_pr_curve_png, output.node_pr_curve_file, include_aggregate_algo_eval) + Evaluation.precision_recall_curve_node_ensemble(node_ensembles_dict, node_table, output.node_pr_curve_png, output.node_pr_curve_file, _config.config.analysis.evaluation.aggregate_per_algorithm) rule evaluation_edge_dummy: input: diff --git a/config/config.yaml b/config/config.yaml index 11bac082a..b5927cdc2 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -205,33 +205,50 @@ analysis: # Create Cytoscape session file with all pathway graphs for each dataset cytoscape: include: true - # Machine learning analysis (e.g. clustering) of the pathway output files for each dataset - ml: - # ml analysis per dataset + # The following analysis options also have an `aggregate_per_algorithm` option, + # which adds the respective analysis to an algorithm as a whole. + # This will only run if the adjacent `include` is true. + + # Principle component analysis of the pathway output files + pca: include: true - # adds ml analysis per algorithm output - # only runs for algorithms with multiple parameter combinations chosen aggregate_per_algorithm: true + evaluation: + include: true + aggregate_per_algorithm: true # specify how many principal components to calculate components: 2 # boolean to show the labels on the pca graph labels: true - # 'ward', 'complete', 'average', 'single' - # if linkage: ward, must use metric: euclidean - linkage: 'ward' - # 'euclidean', 'manhattan', 'cosine' - metric: 'euclidean' # controls whether kernel density estimation (KDE) is computed and visualized on top of PCA plots. # the coordinates of the KDE maximum (kde_peak) are also saved to the PCA coordinates output file. # KDE needs to be run in order to select a parameter combination with PCA because the maximum kernel density is used # to pick the 'best' parameter combination. kde: true - # removes empty pathways from consideration in ml analysis (pca only) + # removes empty pathways from consideration in ml analysis remove_empty_pathways: false + # Hierarchical agglomerative clustering analysis of the pathway output files + hac: + include: true + aggregate_per_algorithm: true + evaluation: + include: true + aggregate_per_algorithm: true + # 'ward', 'complete', 'average', 'single' + # if linkage: ward, must use metric: euclidean + linkage: 'ward' + # 'euclidean', 'manhattan', 'cosine' + metric: 'euclidean' + # Ensembling pathway output + ensemble: + include: true + aggregate_per_algorithm: true + evaluation: + include: true + aggregate_per_algorithm: true evaluation: - # evaluation per dataset-goldstandard pair - # evaluation will not run unless ml include is set to true + # evaluation per dataset-goldstandard pair. + # This evaluation specifically generates precision-recall curves: + # to run evaluation on top of the other options, see the respective `evaluation` blocks under the other analyses. include: true - # adds evaluation per algorithm per dataset-goldstandard pair - # evaluation per algorithm will not run unless ml include and ml aggregate_per_algorithm are set to true aggregate_per_algorithm: true diff --git a/spras/config/config.py b/spras/config/config.py index cb19b2b1d..9572caa63 100644 --- a/spras/config/config.py +++ b/spras/config/config.py @@ -82,29 +82,7 @@ def __init__(self, raw_config: dict[str, Any]): # Deprecated. Previously a dict mapping algorithm names to a Boolean tracking whether they used directed graphs. self.algorithm_directed = None # A dict with the analysis settings - self.analysis_params = parsed_raw_config.analysis - # A dict with the evaluation settings - self.evaluation_params = self.analysis_params.evaluation - # A dict with the ML settings - self.ml_params = self.analysis_params.ml - # A Boolean specifying whether to run ML analysis for individual algorithms - self.analysis_include_ml_aggregate_algo = None - # A dict with the PCA settings - self.pca_params = None - # A dict with the hierarchical clustering settings - self.hac_params = None - # A Boolean specifying whether to run the summary analysis - self.analysis_include_summary = None - # A Boolean specifying whether to run the Cytoscape analysis - self.analysis_include_cytoscape = None - # A Boolean specifying whether to run the ML analysis - self.analysis_include_ml = None - # A Boolean specifying whether to run the Evaluation analysis - self.analysis_include_evaluation = None - # A Boolean specifying whether to run the ML per algorithm analysis - self.analysis_include_ml_aggregate_algo = None - # A Boolean specifying whether to run the evaluation per algorithm analysis - self.analysis_include_evaluation_aggregate_algo = None + self.analysis = parsed_raw_config.analysis self.process_config(parsed_raw_config) @@ -225,67 +203,12 @@ def process_algorithms(self, raw_config: RawConfig): f'(current length {self.hash_length}).') self.algorithm_params[alg.name][params_hash] = run_dict - def process_analysis(self, raw_config: RawConfig): - if not raw_config.analysis: - return - - # self.ml_params is a class, pca_params needs to be a dict. - self.pca_params = { - "components": self.ml_params.components, - "labels": self.ml_params.labels, - "kde": self.ml_params.kde, - "remove_empty_pathways": self.ml_params.remove_empty_pathways - } - - self.hac_params = { - "linkage": self.ml_params.linkage, - "metric": self.ml_params.metric - } - - self.analysis_include_summary = raw_config.analysis.summary.include - self.analysis_include_cytoscape = raw_config.analysis.cytoscape.include - self.analysis_include_ml = raw_config.analysis.ml.include - self.analysis_include_evaluation = raw_config.analysis.evaluation.include - - # Only run ML aggregate per algorithm if analysis include ML is set to True - if self.ml_params.aggregate_per_algorithm and self.analysis_include_ml: - self.analysis_include_ml_aggregate_algo = raw_config.analysis.ml.aggregate_per_algorithm - else: - self.analysis_include_ml_aggregate_algo = False - + def process_analysis(self): # Raises an error if Evaluation is enabled but no gold standard data is provided - if self.gold_standards == {} and self.analysis_include_evaluation: + if self.gold_standards == {} and self.analysis.evaluation.include: raise ValueError("Evaluation analysis cannot run as gold standard data not provided. " "Please set evaluation include to false or provide gold standard data.") - # Only run Evaluation if ML is set to True - if not self.analysis_include_ml: - self.analysis_include_evaluation = False - - # Only run Evaluation aggregate per algorithm if analysis include ML is set to True - if self.evaluation_params.aggregate_per_algorithm and self.analysis_include_evaluation: - self.analysis_include_evaluation_aggregate_algo = raw_config.analysis.evaluation.aggregate_per_algorithm - else: - self.analysis_include_evaluation_aggregate_algo = False - - # Only run Evaluation per algorithm if ML per algorithm is set to True - if not self.analysis_include_ml_aggregate_algo: - self.analysis_include_evaluation_aggregate_algo = False - - # Set kde to True if Evaluation is set to True - # When Evaluation is True, PCA is used to pick a single parameter combination for all algorithms with multiple - # parameter combinations and KDE is used to choose the parameter combination in the PC space - if self.analysis_include_evaluation and not self.pca_params["kde"]: - self.pca_params["kde"] = True - print("Setting kde to true; Evaluation analysis needs to run KDE for PCA-Chosen parameter selection.") - - # Set summary include to True if Evaluation is set to True - # When a PCA-chosen parameter set is chosen, summary statistics are used to resolve tiebreakers. - if self.analysis_include_evaluation and not self.analysis_include_summary: - self.analysis_include_summary = True - print("Setting summary include to true; Evaluation analysis needs to use summary statistics for PCA-Chosen parameter selection.") - - def process_config(self, raw_config: RawConfig): # Set up a few top-level config variables self.out_dir = raw_config.reconstruction_settings.locations.reconstruction_dir @@ -295,4 +218,4 @@ def process_config(self, raw_config: RawConfig): self.process_datasets(raw_config) self.process_algorithms(raw_config) - self.process_analysis(raw_config) + self.process_analysis() diff --git a/spras/config/schema.py b/spras/config/schema.py index 8aa067a53..f3459a277 100644 --- a/spras/config/schema.py +++ b/spras/config/schema.py @@ -11,9 +11,10 @@ """ import re +import warnings from typing import Annotated, Optional -from pydantic import AfterValidator, BaseModel, ConfigDict +from pydantic import AfterValidator, BaseModel, ConfigDict, model_validator from spras.config.container_schema import ContainerSettings from spras.config.util import CaseInsensitiveEnum @@ -37,42 +38,65 @@ class CytoscapeAnalysis(BaseModel): # Note that CaseInsensitiveEnum is not pydantic: pydantic # has special support for enums, but we avoid the # pydantic-specific "model_config" key here for this reason. -class MlLinkage(CaseInsensitiveEnum): +class HacLinkage(CaseInsensitiveEnum): ward = 'ward' complete = 'complete' average = 'average' single = 'single' -class MlMetric(CaseInsensitiveEnum): +class HacMetric(CaseInsensitiveEnum): euclidean = 'euclidean' manhattan = 'manhattan' cosine = 'cosine' -class MlAnalysis(BaseModel): +class AggregateAnalysis(BaseModel): include: bool aggregate_per_algorithm: bool = False + + model_config = ConfigDict(extra='forbid') + + @model_validator(mode='after') + def check_aggregate_when_include(self): + if self.aggregate_per_algorithm and not self.include: + warnings.warn("aggregate_per_algorithm is set to True but include is set to False; setting aggregate_per_algorithm to False", stacklevel=2) + self.aggregate_per_algorithm = False + return self + +class EvaluationAnalysis(AggregateAnalysis): pass +class AggregateEvaluationAnalysis(AggregateAnalysis): + evaluation: EvaluationAnalysis = EvaluationAnalysis(include=False) + + @model_validator(mode='after') + def check_include_when_evaluation_include(self): + if self.evaluation.include and not self.include: + warnings.warn("evaluation.include is set to True but include is set to False; setting evaluation.include to False", stacklevel=2) + self.evaluation.include = False + return self + +class PcaAnalysis(AggregateEvaluationAnalysis): components: int = 2 labels: bool = True kde: bool = False remove_empty_pathways: bool = False - linkage: MlLinkage = MlLinkage.ward - metric: MlMetric = MlMetric.euclidean - model_config = ConfigDict(extra='forbid') +class HacAnalysis(AggregateEvaluationAnalysis): + linkage: HacLinkage = HacLinkage.ward + metric: HacMetric = HacMetric.euclidean -class EvaluationAnalysis(BaseModel): - include: bool - aggregate_per_algorithm: bool = False - - model_config = ConfigDict(extra='forbid') +class EnsembleAnalysis(AggregateEvaluationAnalysis): pass +class JaccardAnalysis(AggregateAnalysis): pass class Analysis(BaseModel): summary: SummaryAnalysis = SummaryAnalysis(include=False) cytoscape: CytoscapeAnalysis = CytoscapeAnalysis(include=False) - ml: MlAnalysis = MlAnalysis(include=False) + pca: PcaAnalysis = PcaAnalysis(include=False) + hac: HacAnalysis = HacAnalysis(include=False) + jaccard: JaccardAnalysis = JaccardAnalysis(include=False) + ensemble: EnsembleAnalysis = EnsembleAnalysis(include=False) evaluation: EvaluationAnalysis = EvaluationAnalysis(include=False) + """Enables PR curve evaluation.""" - model_config = ConfigDict(extra='forbid') + model_config = ConfigDict(extra='forbid', use_attribute_docstrings=True) # The default length of the truncated hash used to identify parameter combinations From 9dd917e4e4c83cf756328b9a3b4dbcd5a49848b0 Mon Sep 17 00:00:00 2001 From: "Tristan F." Date: Wed, 26 Nov 2025 00:52:45 +0000 Subject: [PATCH 02/14] docs: update --- config/egfr.yaml | 26 +++++++++++--- docker-wrappers/SPRAS/example_config.yaml | 41 ++++++++++++++++++++--- docs/tutorial/advanced.rst | 7 ++++ docs/tutorial/beginner.rst | 2 +- docs/tutorial/intermediate.rst | 18 ++++++---- test/analysis/input/config.yaml | 41 +++++++++++++++++++---- test/analysis/input/egfr.yaml | 31 ++++++++++++++--- 7 files changed, 141 insertions(+), 25 deletions(-) diff --git a/config/egfr.yaml b/config/egfr.yaml index 25e56ab25..60a145249 100644 --- a/config/egfr.yaml +++ b/config/egfr.yaml @@ -150,16 +150,34 @@ reconstruction_settings: locations: reconstruction_dir: output/egfr analysis: - cytoscape: - include: true summary: include: true - ml: + cytoscape: + include: true + pca: include: true aggregate_per_algorithm: true + evaluation: + include: true + aggregate_per_algorithm: true + components: 2 labels: true kde: true remove_empty_pathways: true - evaluation: + hac: include: true aggregate_per_algorithm: true + evaluation: + include: true + aggregate_per_algorithm: true + linkage: 'ward' + metric: 'euclidean' + ensemble: + include: true + aggregate_per_algorithm: true + evaluation: + include: true + aggregate_per_algorithm: true + evaluation: + include: false + aggregate_per_algorithm: false diff --git a/docker-wrappers/SPRAS/example_config.yaml b/docker-wrappers/SPRAS/example_config.yaml index db1c2dbbf..d4a0a6a6c 100644 --- a/docker-wrappers/SPRAS/example_config.yaml +++ b/docker-wrappers/SPRAS/example_config.yaml @@ -138,18 +138,51 @@ analysis: include: true # Create Cytoscape session file with all pathway graphs for each dataset cytoscape: - include: false - # Machine learning analysis (e.g. clustering) of the pathway output files for each dataset - ml: include: true + # The following analysis options also have an `aggregate_per_algorithm` option, + # which adds the respective analysis to an algorithm as a whole. + # This will only run if the adjacent `include` is true. + + # Principle component analysis of the pathway output files + pca: + include: true + aggregate_per_algorithm: true + evaluation: + include: true + aggregate_per_algorithm: true # specify how many principal components to calculate components: 2 # boolean to show the labels on the pca graph labels: true + # controls whether kernel density estimation (KDE) is computed and visualized on top of PCA plots. + # the coordinates of the KDE maximum (kde_peak) are also saved to the PCA coordinates output file. + # KDE needs to be run in order to select a parameter combination with PCA because the maximum kernel density is used + # to pick the 'best' parameter combination. + kde: true + # removes empty pathways from consideration in ml analysis + remove_empty_pathways: false + # Hierarchical agglomerative clustering analysis of the pathway output files + hac: + include: true + aggregate_per_algorithm: true + evaluation: + include: true + aggregate_per_algorithm: true # 'ward', 'complete', 'average', 'single' # if linkage: ward, must use metric: euclidean linkage: 'ward' # 'euclidean', 'manhattan', 'cosine' metric: 'euclidean' + # Ensembling pathway output + ensemble: + include: true + aggregate_per_algorithm: true + evaluation: + include: true + aggregate_per_algorithm: true evaluation: - include: false + # evaluation per dataset-goldstandard pair. + # This evaluation specifically generates precision-recall curves: + # to run evaluation on top of the other options, see the respective `evaluation` blocks under the other analyses. + include: true + aggregate_per_algorithm: true diff --git a/docs/tutorial/advanced.rst b/docs/tutorial/advanced.rst index 8f7e8b645..569733631 100644 --- a/docs/tutorial/advanced.rst +++ b/docs/tutorial/advanced.rst @@ -106,6 +106,13 @@ When gold standards are provided and evaluation is enabled (``include: true``), analysis: evaluation: include: true + # One could also enable + # evaluation for PCA and HAC, and ensembling. + # For example, + jaccard: + include: true + evaluation: + include: true A gold standard dataset must include the following types of keys and files: diff --git a/docs/tutorial/beginner.rst b/docs/tutorial/beginner.rst index 9c8f7f236..43e265a49 100644 --- a/docs/tutorial/beginner.rst +++ b/docs/tutorial/beginner.rst @@ -199,7 +199,7 @@ Analysis include: true cytoscape: include: true - ml: + pca: include: true diff --git a/docs/tutorial/intermediate.rst b/docs/tutorial/intermediate.rst index 2e569e092..e39ab70cd 100644 --- a/docs/tutorial/intermediate.rst +++ b/docs/tutorial/intermediate.rst @@ -689,25 +689,31 @@ And the file ``egfr-omicsintegrator1-params-GUMLBDZ/pathway.txt`` contains the f MRE11_HUMAN RAD50_HUMAN 1 U -Step 3: Use ML post-analysis +Step 3: Use ML-related post-analysis ============================= -3.1 Adding ML post-analysis to the intermediate configuration +3.1 Adding ML-related post-analysis to the intermediate configuration ------------------------------------------------------------- -To enable the ML analysis, update the analysis section in your configuration file by setting ml to true. +To enable ML-related analysis, update the analysis section in your configuration file by setting your desired ML analyses to true. Your analysis section in the configuration file should look like this: .. code-block:: yaml analysis: - ml: + pca: + include: true + hac: + include: true + ensembling: + include: true + jaccard: include: true ... (other parameters preset) -``ml`` will perform unsupervised analyses such as principal component analysis (PCA), hierarchical agglomerative clustering (HAC), ensembling, and jaccard similarity comparisons of the pathways. +These settings will perform principal component analysis (PCA), hierarchical agglomerative clustering (HAC), ensembling, and jaccard similarity comparisons of the pathways, respectively. -- The ``ml`` section includes configurable parameters that let you adjust the behavior of the analyses performed. +- These sections includes configurable parameters that let you adjust the behavior of the analyses performed. With these updates, SPRAS will run the full set of unsupervised machine learning analyses across all outputs for a given dataset. diff --git a/test/analysis/input/config.yaml b/test/analysis/input/config.yaml index 15a5572fa..871bd6c84 100644 --- a/test/analysis/input/config.yaml +++ b/test/analysis/input/config.yaml @@ -115,21 +115,50 @@ analysis: # Create Cytoscape session file with all pathway graphs for each dataset cytoscape: include: true - # Machine learning analysis (e.g. clustering) of the pathway output files for each dataset - ml: - # ml analysis per dataset + # The following analysis options also have an `aggregate_per_algorithm` option, + # which adds the respective analysis to an algorithm as a whole. + # This will only run if the adjacent `include` is true. + + # Principle component analysis of the pathway output files + pca: include: false - # adds ml analysis per algorithm output - # only runs for algorithms with multiple parameter combinations chosen - aggregate_per_algorithm: true + aggregate_per_algorithm: false + evaluation: + include: false + aggregate_per_algorithm: false # specify how many principal components to calculate components: 2 # boolean to show the labels on the pca graph labels: true + # controls whether kernel density estimation (KDE) is computed and visualized on top of PCA plots. + # the coordinates of the KDE maximum (kde_peak) are also saved to the PCA coordinates output file. + # KDE needs to be run in order to select a parameter combination with PCA because the maximum kernel density is used + # to pick the 'best' parameter combination. + kde: true + # removes empty pathways from consideration in ml analysis + remove_empty_pathways: false + # Hierarchical agglomerative clustering analysis of the pathway output files + hac: + include: false + aggregate_per_algorithm: false + evaluation: + include: false + aggregate_per_algorithm: false # 'ward', 'complete', 'average', 'single' # if linkage: ward, must use metric: euclidean linkage: 'ward' # 'euclidean', 'manhattan', 'cosine' metric: 'euclidean' + # Ensembling pathway output + ensemble: + include: false + aggregate_per_algorithm: true + evaluation: + include: false + aggregate_per_algorithm: false evaluation: + # evaluation per dataset-goldstandard pair. + # This evaluation specifically generates precision-recall curves: + # to run evaluation on top of the other options, see the respective `evaluation` blocks under the other analyses. include: false + aggregate_per_algorithm: false diff --git a/test/analysis/input/egfr.yaml b/test/analysis/input/egfr.yaml index d26bded2d..cf4295e46 100644 --- a/test/analysis/input/egfr.yaml +++ b/test/analysis/input/egfr.yaml @@ -97,11 +97,34 @@ reconstruction_settings: locations: reconstruction_dir: output/egfr analysis: + summary: + include: true cytoscape: include: true - summary: + pca: + include: true + aggregate_per_algorithm: true + evaluation: + include: true + aggregate_per_algorithm: true + components: 2 + labels: true + kde: true + remove_empty_pathways: true + hac: include: true - ml: - include: false + aggregate_per_algorithm: true + evaluation: + include: true + aggregate_per_algorithm: true + linkage: 'ward' + metric: 'euclidean' + ensemble: + include: true + aggregate_per_algorithm: true + evaluation: + include: true + aggregate_per_algorithm: true evaluation: - include: false + include: true + aggregate_per_algorithm: true From 49d1f48382e34e74b906e176a4499d05f786ca02 Mon Sep 17 00:00:00 2001 From: "Tristan F." Date: Wed, 26 Nov 2025 03:25:32 +0000 Subject: [PATCH 03/14] test(config): fix --- config/config.yaml | 3 + config/egfr.yaml | 2 + spras/config/config.py | 2 +- spras/config/schema.py | 3 + test/test_config.py | 130 ++++++++++++----------------------------- 5 files changed, 47 insertions(+), 93 deletions(-) diff --git a/config/config.yaml b/config/config.yaml index b5927cdc2..47aa35c00 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -246,6 +246,9 @@ analysis: evaluation: include: true aggregate_per_algorithm: true + # Jaccard pathway output + jaccard: + enable: true evaluation: # evaluation per dataset-goldstandard pair. # This evaluation specifically generates precision-recall curves: diff --git a/config/egfr.yaml b/config/egfr.yaml index 60a145249..b305db9b0 100644 --- a/config/egfr.yaml +++ b/config/egfr.yaml @@ -178,6 +178,8 @@ analysis: evaluation: include: true aggregate_per_algorithm: true + jaccard: + enable: false evaluation: include: false aggregate_per_algorithm: false diff --git a/spras/config/config.py b/spras/config/config.py index 9572caa63..3cd94dafc 100644 --- a/spras/config/config.py +++ b/spras/config/config.py @@ -129,7 +129,7 @@ def process_algorithms(self, raw_config: RawConfig): Keys in the parameter dictionary are strings """ prior_params_hashes = set() - self.algorithm_params = dict() + self.algorithm_params: dict[str, Any] = dict() self.algorithm_directed = dict() self.algorithms = raw_config.algorithms for alg in self.algorithms: diff --git a/spras/config/schema.py b/spras/config/schema.py index f3459a277..3e97c1721 100644 --- a/spras/config/schema.py +++ b/spras/config/schema.py @@ -71,6 +71,9 @@ def check_include_when_evaluation_include(self): if self.evaluation.include and not self.include: warnings.warn("evaluation.include is set to True but include is set to False; setting evaluation.include to False", stacklevel=2) self.evaluation.include = False + if self.evaluation.aggregate_per_algorithm and not self.aggregate_per_algorithm: + warnings.warn("evaluation.aggregate_per_algorithm is set to True but aggregate_per_algorithm is set to False; setting evaluation.aggregate_per_algorithm to False", stacklevel=2) + self.evaluation.aggregate_per_algorithm = False return self class PcaAnalysis(AggregateEvaluationAnalysis): diff --git a/test/test_config.py b/test/test_config.py index c8b05f3c5..70d7175fd 100644 --- a/test/test_config.py +++ b/test/test_config.py @@ -105,13 +105,32 @@ def get_test_config(): "summary": { "include": False }, - "ml": { + "pca": { "include": False, "aggregate_per_algorithm": False, + "evaluation": { + "include": False + } + }, + "hac": { + "include": False, + "aggregate_per_algorithm": False, + "evaluation": { + "include": False + } + }, + "ensemble": { + "include": False, + "evaluation": { + "include": False + } }, "cytoscape": { "include": False }, + "jaccard": { + "include": False + }, "evaluation": { "include": False, "aggregate_per_algorithm": False @@ -254,54 +273,21 @@ def test_config_values(self): value_test_util('boolArrTest', [{'flags': True, 'range': 1}, {'flags': False, 'range': 2}, {'flags': False, 'range': 1}, {'flags': True, 'range': 2}]) - @pytest.mark.parametrize("ml_include, eval_include, expected_ml, expected_eval", [ + @pytest.mark.parametrize("include, eval_include, expected_include, expected_eval", [ (True, True, True, True), (True, False, True, False), (False, True, False, False), (False, False, False, False) ]) - def test_eval_ml_coupling(self, ml_include, eval_include, expected_ml, expected_eval): - test_config = get_test_config() - test_config["analysis"]["ml"]["include"] = ml_include - test_config["analysis"]["evaluation"]["include"] = eval_include - config.init_global(test_config) - - assert config.config.analysis_include_ml == expected_ml - assert config.config.analysis_include_evaluation == expected_eval - - @pytest.mark.parametrize("ml_include, ml_agg_include, expected_ml, expected_ml_agg", [ - (True, True, True, True), - (True, False, True, False), - (False, True, False, False), - (False, False, False, False) - ]) - def test_ml_agg_algo_coupling(self, ml_include, ml_agg_include, expected_ml, expected_ml_agg): - test_config = get_test_config() - test_config["analysis"]["ml"]["include"] = ml_include - test_config["analysis"]["ml"]["aggregate_per_algorithm"] = ml_agg_include - config.init_global(test_config) - - assert config.config.analysis_include_ml == expected_ml - assert config.config.analysis_include_ml_aggregate_algo == expected_ml_agg - - @pytest.mark.parametrize("eval_include, agg_algo, expected_eval, expected_agg_algo", [ - (True, True, True, True), - (True, False, True, False), - (False, True, False, False), - (False, False, False, False), - ]) - def test_eval_agg_algo_coupling(self, eval_include, agg_algo, expected_eval, expected_agg_algo): + @pytest.mark.parametrize("analysis_type", ["pca", "hac", "ensemble"]) + def test_eval_pca_coupling(self, include, eval_include, expected_include, expected_eval, analysis_type): test_config = get_test_config() - test_config["analysis"]["ml"]["include"] = True - test_config["analysis"]["ml"]["aggregate_per_algorithm"] = True - - test_config["analysis"]["evaluation"]["include"] = eval_include - test_config["analysis"]["evaluation"]["aggregate_per_algorithm"] = agg_algo - + test_config["analysis"][analysis_type]["include"] = include + test_config["analysis"][analysis_type]["evaluation"]["include"] = eval_include config.init_global(test_config) - assert config.config.analysis_include_evaluation == expected_eval - assert config.config.analysis_include_evaluation_aggregate_algo == expected_agg_algo + assert vars(config.config.analysis)[analysis_type].include == expected_include + assert vars(config.config.analysis)[analysis_type].evaluation.include == expected_eval @pytest.mark.parametrize("ml_include, ml_agg, eval_include, eval_agg, expected_ml, expected_ml_agg, expected_eval, expected_eval_agg", [ (False, True, True, True, False, False, False, False), @@ -310,61 +296,21 @@ def test_eval_agg_algo_coupling(self, eval_include, agg_algo, expected_eval, exp (True, True, True, True, True, True, True, True), (True, False, False, False, True, False, False, False), ]) + @pytest.mark.parametrize("analysis_type", ["pca", "hac", "ensemble"]) def test_eval_ml_agg_algo_coupling(self, ml_include, ml_agg, eval_include, eval_agg, expected_ml, expected_ml_agg, - expected_eval, expected_eval_agg): - # the value of ml include and ml aggregate_per_algorithm can affect the value of evaluation include and + expected_eval, expected_eval_agg, analysis_type): + # the value of pca include and pca aggregate_per_algorithm can affect the value of evaluation include and # evaluation aggregate_per_algorithm test_config = get_test_config() - test_config["analysis"]["ml"]["include"] = ml_include - test_config["analysis"]["ml"]["aggregate_per_algorithm"] = ml_agg - test_config["analysis"]["evaluation"]["include"] = eval_include - test_config["analysis"]["evaluation"]["aggregate_per_algorithm"] = eval_agg + test_config["analysis"][analysis_type]["include"] = ml_include + test_config["analysis"][analysis_type]["aggregate_per_algorithm"] = ml_agg + test_config["analysis"][analysis_type]["evaluation"]["include"] = eval_include + test_config["analysis"][analysis_type]["evaluation"]["aggregate_per_algorithm"] = eval_agg config.init_global(test_config) - assert config.config.analysis_include_ml == expected_ml - assert config.config.analysis_include_ml_aggregate_algo == expected_ml_agg - assert config.config.analysis_include_evaluation == expected_eval - assert config.config.analysis_include_evaluation_aggregate_algo == expected_eval_agg - - @pytest.mark.parametrize("eval_include, kde, expected_eval, expected_kde", [ - (True, True, True, True), - (True, False, True, True), - (False, True, False, True), - (False, False, False, False), - ]) - def test_eval_kde_coupling(self, eval_include, kde, expected_eval, expected_kde): - test_config = get_test_config() - test_config["analysis"]["ml"]["include"] = True - # dealing with other coupling issue - test_config["analysis"]["summary"]["include"] = True - - test_config["analysis"]["ml"]["kde"] = kde - test_config["analysis"]["evaluation"]["include"] = eval_include - - config.init_global(test_config) - - assert config.config.analysis_include_evaluation == expected_eval - assert config.config.pca_params["kde"] == expected_kde - - @pytest.mark.parametrize("eval_include, summary_include, expected_eval, expected_summary", [ - (True, True, True, True), - (True, False, True, True), - (False, True, False, True), - (False, False, False, False), - ]) - def test_eval_summary_coupling(self, eval_include, summary_include, expected_eval, expected_summary): - test_config = get_test_config() - # dealing with other coupling issue - test_config["analysis"]["ml"]["include"] = True - test_config["analysis"]["ml"]["kde"] = True - - test_config["analysis"]["summary"]["include"] = summary_include - test_config["analysis"]["evaluation"]["include"] = eval_include - - config.init_global(test_config) - - assert config.config.analysis_include_evaluation == expected_eval - assert config.config.analysis_include_summary == expected_summary - + assert vars(config.config.analysis)[analysis_type].include == expected_ml, f"Include was not {expected_ml}!" + assert vars(config.config.analysis)[analysis_type].aggregate_per_algorithm == expected_ml_agg, f"Aggregate per algorithm was not {expected_ml_agg}!" + assert vars(config.config.analysis)[analysis_type].evaluation.include == expected_eval, f"evaluation include was not {expected_eval}!" + assert vars(config.config.analysis)[analysis_type].evaluation.aggregate_per_algorithm == expected_eval_agg, f"evaluation aggregate per algorithm was not {expected_eval_agg}!" From a5736bab7368239f959179f4fa559ee2e8850acd Mon Sep 17 00:00:00 2001 From: "Tristan F." Date: Wed, 26 Nov 2025 03:29:33 +0000 Subject: [PATCH 04/14] fix(config): enable -> include --- config/config.yaml | 2 +- config/egfr.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/config/config.yaml b/config/config.yaml index 47aa35c00..2671b0641 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -248,7 +248,7 @@ analysis: aggregate_per_algorithm: true # Jaccard pathway output jaccard: - enable: true + include: true evaluation: # evaluation per dataset-goldstandard pair. # This evaluation specifically generates precision-recall curves: diff --git a/config/egfr.yaml b/config/egfr.yaml index b305db9b0..cb318b5da 100644 --- a/config/egfr.yaml +++ b/config/egfr.yaml @@ -179,7 +179,7 @@ analysis: include: true aggregate_per_algorithm: true jaccard: - enable: false + include: false evaluation: include: false aggregate_per_algorithm: false From 28d11b14089f15206ca39e18e4cbf27d5ea51a75 Mon Sep 17 00:00:00 2001 From: "Tristan F." Date: Wed, 26 Nov 2025 06:43:56 +0000 Subject: [PATCH 05/14] chore(test/analysis/input/egfr): disable other analyses --- test/analysis/input/egfr.yaml | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/test/analysis/input/egfr.yaml b/test/analysis/input/egfr.yaml index cf4295e46..d8a0c5c09 100644 --- a/test/analysis/input/egfr.yaml +++ b/test/analysis/input/egfr.yaml @@ -102,29 +102,29 @@ analysis: cytoscape: include: true pca: - include: true - aggregate_per_algorithm: true + include: false + aggregate_per_algorithm: false evaluation: - include: true - aggregate_per_algorithm: true + include: false + aggregate_per_algorithm: false components: 2 - labels: true - kde: true - remove_empty_pathways: true + labels: false + kde: false + remove_empty_pathways: false hac: - include: true - aggregate_per_algorithm: true + include: false + aggregate_per_algorithm: false evaluation: - include: true - aggregate_per_algorithm: true + include: false + aggregate_per_algorithm: false linkage: 'ward' metric: 'euclidean' ensemble: - include: true - aggregate_per_algorithm: true + include: false + aggregate_per_algorithm: false evaluation: - include: true - aggregate_per_algorithm: true + include: false + aggregate_per_algorithm: false evaluation: - include: true - aggregate_per_algorithm: true + include: false + aggregate_per_algorithm: false From 22ee171800e7513096c0b2bfe4f2a4ac1d3cdc82 Mon Sep 17 00:00:00 2001 From: "Tristan F.-R." Date: Mon, 1 Dec 2025 18:00:27 +0000 Subject: [PATCH 06/14] chore: drop hac evaluation section --- config/config.yaml | 3 --- config/egfr.yaml | 3 --- docker-wrappers/SPRAS/example_config.yaml | 3 --- spras/config/schema.py | 2 +- test/analysis/input/config.yaml | 3 --- test/analysis/input/egfr.yaml | 3 --- test/test_config.py | 9 +++------ 7 files changed, 4 insertions(+), 22 deletions(-) diff --git a/config/config.yaml b/config/config.yaml index 2671b0641..5d525e74f 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -231,9 +231,6 @@ analysis: hac: include: true aggregate_per_algorithm: true - evaluation: - include: true - aggregate_per_algorithm: true # 'ward', 'complete', 'average', 'single' # if linkage: ward, must use metric: euclidean linkage: 'ward' diff --git a/config/egfr.yaml b/config/egfr.yaml index cb318b5da..4c9186cd4 100644 --- a/config/egfr.yaml +++ b/config/egfr.yaml @@ -167,9 +167,6 @@ analysis: hac: include: true aggregate_per_algorithm: true - evaluation: - include: true - aggregate_per_algorithm: true linkage: 'ward' metric: 'euclidean' ensemble: diff --git a/docker-wrappers/SPRAS/example_config.yaml b/docker-wrappers/SPRAS/example_config.yaml index d4a0a6a6c..4c354e657 100644 --- a/docker-wrappers/SPRAS/example_config.yaml +++ b/docker-wrappers/SPRAS/example_config.yaml @@ -165,9 +165,6 @@ analysis: hac: include: true aggregate_per_algorithm: true - evaluation: - include: true - aggregate_per_algorithm: true # 'ward', 'complete', 'average', 'single' # if linkage: ward, must use metric: euclidean linkage: 'ward' diff --git a/spras/config/schema.py b/spras/config/schema.py index 3e97c1721..43da8a100 100644 --- a/spras/config/schema.py +++ b/spras/config/schema.py @@ -82,7 +82,7 @@ class PcaAnalysis(AggregateEvaluationAnalysis): kde: bool = False remove_empty_pathways: bool = False -class HacAnalysis(AggregateEvaluationAnalysis): +class HacAnalysis(AggregateAnalysis): linkage: HacLinkage = HacLinkage.ward metric: HacMetric = HacMetric.euclidean diff --git a/test/analysis/input/config.yaml b/test/analysis/input/config.yaml index 871bd6c84..35aa9766b 100644 --- a/test/analysis/input/config.yaml +++ b/test/analysis/input/config.yaml @@ -141,9 +141,6 @@ analysis: hac: include: false aggregate_per_algorithm: false - evaluation: - include: false - aggregate_per_algorithm: false # 'ward', 'complete', 'average', 'single' # if linkage: ward, must use metric: euclidean linkage: 'ward' diff --git a/test/analysis/input/egfr.yaml b/test/analysis/input/egfr.yaml index d8a0c5c09..dcfd7ae84 100644 --- a/test/analysis/input/egfr.yaml +++ b/test/analysis/input/egfr.yaml @@ -114,9 +114,6 @@ analysis: hac: include: false aggregate_per_algorithm: false - evaluation: - include: false - aggregate_per_algorithm: false linkage: 'ward' metric: 'euclidean' ensemble: diff --git a/test/test_config.py b/test/test_config.py index 70d7175fd..573f702e1 100644 --- a/test/test_config.py +++ b/test/test_config.py @@ -114,10 +114,7 @@ def get_test_config(): }, "hac": { "include": False, - "aggregate_per_algorithm": False, - "evaluation": { - "include": False - } + "aggregate_per_algorithm": False }, "ensemble": { "include": False, @@ -279,7 +276,7 @@ def test_config_values(self): (False, True, False, False), (False, False, False, False) ]) - @pytest.mark.parametrize("analysis_type", ["pca", "hac", "ensemble"]) + @pytest.mark.parametrize("analysis_type", ["pca", "ensemble"]) def test_eval_pca_coupling(self, include, eval_include, expected_include, expected_eval, analysis_type): test_config = get_test_config() test_config["analysis"][analysis_type]["include"] = include @@ -296,7 +293,7 @@ def test_eval_pca_coupling(self, include, eval_include, expected_include, expect (True, True, True, True, True, True, True, True), (True, False, False, False, True, False, False, False), ]) - @pytest.mark.parametrize("analysis_type", ["pca", "hac", "ensemble"]) + @pytest.mark.parametrize("analysis_type", ["pca", "ensemble"]) def test_eval_ml_agg_algo_coupling(self, ml_include, ml_agg, eval_include, eval_agg, expected_ml, expected_ml_agg, expected_eval, expected_eval_agg, analysis_type): # the value of pca include and pca aggregate_per_algorithm can affect the value of evaluation include and From 32614806d0aead25c5a2b9fda0f23203e20073ac Mon Sep 17 00:00:00 2001 From: "Tristan F.-R." Date: Mon, 1 Dec 2025 19:17:01 +0000 Subject: [PATCH 07/14] fix(snakemake): remove hac evaluation exclusion --- Snakefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Snakefile b/Snakefile index 31f0d279e..99937d5af 100644 --- a/Snakefile +++ b/Snakefile @@ -29,7 +29,7 @@ algorithm_params = _config.config.algorithm_params algorithm_directed = _config.config.algorithm_directed container_settings = _config.config.container_settings pca_params = without_keys(vars(_config.config.analysis.pca), ["evaluation", "include", "aggregate_per_algorithm"]) -hac_params = without_keys(vars(_config.config.analysis.hac), ["evaluation", "include", "aggregate_per_algorithm"]) +hac_params = without_keys(vars(_config.config.analysis.hac), ["include", "aggregate_per_algorithm"]) # Return the dataset or gold_standard dictionary from the config file given the label def get_dataset(_datasets, label): From eb68738b8b5b57c67777796d9c983cfcbf4f4445 Mon Sep 17 00:00:00 2001 From: "Tristan F." Date: Fri, 5 Dec 2025 06:56:27 +0000 Subject: [PATCH 08/14] fix: rename evaluation -> pca_chosen under pca --- config/config.yaml | 2 +- config/egfr.yaml | 2 +- docker-wrappers/SPRAS/example_config.yaml | 2 +- spras/config/schema.py | 40 +++++++++++++---------- test/analysis/input/config.yaml | 2 +- test/analysis/input/egfr.yaml | 2 +- test/test_config.py | 28 +++++++++------- 7 files changed, 45 insertions(+), 33 deletions(-) diff --git a/config/config.yaml b/config/config.yaml index 5d525e74f..4261776f4 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -240,7 +240,7 @@ analysis: ensemble: include: true aggregate_per_algorithm: true - evaluation: + pca_chosen: include: true aggregate_per_algorithm: true # Jaccard pathway output diff --git a/config/egfr.yaml b/config/egfr.yaml index 4c9186cd4..3db52d5bb 100644 --- a/config/egfr.yaml +++ b/config/egfr.yaml @@ -172,7 +172,7 @@ analysis: ensemble: include: true aggregate_per_algorithm: true - evaluation: + pca_chosen: include: true aggregate_per_algorithm: true jaccard: diff --git a/docker-wrappers/SPRAS/example_config.yaml b/docker-wrappers/SPRAS/example_config.yaml index 4c354e657..5b7396a1f 100644 --- a/docker-wrappers/SPRAS/example_config.yaml +++ b/docker-wrappers/SPRAS/example_config.yaml @@ -174,7 +174,7 @@ analysis: ensemble: include: true aggregate_per_algorithm: true - evaluation: + pca_chosen: include: true aggregate_per_algorithm: true evaluation: diff --git a/spras/config/schema.py b/spras/config/schema.py index 43da8a100..5b7d02e4e 100644 --- a/spras/config/schema.py +++ b/spras/config/schema.py @@ -49,6 +49,12 @@ class HacMetric(CaseInsensitiveEnum): manhattan = 'manhattan' cosine = 'cosine' +def implies(source: bool, target: bool, source_str: str, target_str: str): + if target and not source: + warnings.warn(f"{source_str} is set to True but {target_str} is set to False; setting {target_str} to False", stacklevel=2) + return False + return target + class AggregateAnalysis(BaseModel): include: bool aggregate_per_algorithm: bool = False @@ -57,36 +63,36 @@ class AggregateAnalysis(BaseModel): @model_validator(mode='after') def check_aggregate_when_include(self): - if self.aggregate_per_algorithm and not self.include: - warnings.warn("aggregate_per_algorithm is set to True but include is set to False; setting aggregate_per_algorithm to False", stacklevel=2) - self.aggregate_per_algorithm = False + self.aggregate_per_algorithm = implies(self.include, self.aggregate_per_algorithm, "include", "aggregate_per_algorithm") return self class EvaluationAnalysis(AggregateAnalysis): pass -class AggregateEvaluationAnalysis(AggregateAnalysis): - evaluation: EvaluationAnalysis = EvaluationAnalysis(include=False) - - @model_validator(mode='after') - def check_include_when_evaluation_include(self): - if self.evaluation.include and not self.include: - warnings.warn("evaluation.include is set to True but include is set to False; setting evaluation.include to False", stacklevel=2) - self.evaluation.include = False - if self.evaluation.aggregate_per_algorithm and not self.aggregate_per_algorithm: - warnings.warn("evaluation.aggregate_per_algorithm is set to True but aggregate_per_algorithm is set to False; setting evaluation.aggregate_per_algorithm to False", stacklevel=2) - self.evaluation.aggregate_per_algorithm = False - return self -class PcaAnalysis(AggregateEvaluationAnalysis): +class PcaAnalysis(AggregateAnalysis): components: int = 2 labels: bool = True kde: bool = False remove_empty_pathways: bool = False + pca_chosen: EvaluationAnalysis = EvaluationAnalysis(include=False) + + @model_validator(mode='after') + def check_include_when_evaluation_include(self): + self.pca_chosen.include = implies(self.include, self.pca_chosen.include, "include", "pca_chosen.include") + self.pca_chosen.aggregate_per_algorithm = implies(self.aggregate_per_algorithm, self.pca_chosen.aggregate_per_algorithm, "aggregate_per_algorithm", "pca_chosen.aggregate_per_algorithm") + return self class HacAnalysis(AggregateAnalysis): linkage: HacLinkage = HacLinkage.ward metric: HacMetric = HacMetric.euclidean -class EnsembleAnalysis(AggregateEvaluationAnalysis): pass +class EnsembleAnalysis(AggregateAnalysis): + evaluation: EvaluationAnalysis = EvaluationAnalysis(include=False) + + @model_validator(mode='after') + def check_include_when_evaluation_include(self): + self.evaluation.include = implies(self.include, self.evaluation.include, "include", "evaluation.include") + self.evaluation.aggregate_per_algorithm = implies(self.aggregate_per_algorithm, self.evaluation.aggregate_per_algorithm, "aggregate_per_algorithm", "evaluation.aggregate_per_algorithm") + return self class JaccardAnalysis(AggregateAnalysis): pass class Analysis(BaseModel): diff --git a/test/analysis/input/config.yaml b/test/analysis/input/config.yaml index 35aa9766b..403c9151a 100644 --- a/test/analysis/input/config.yaml +++ b/test/analysis/input/config.yaml @@ -150,7 +150,7 @@ analysis: ensemble: include: false aggregate_per_algorithm: true - evaluation: + pca_chosen: include: false aggregate_per_algorithm: false evaluation: diff --git a/test/analysis/input/egfr.yaml b/test/analysis/input/egfr.yaml index dcfd7ae84..f06b08a93 100644 --- a/test/analysis/input/egfr.yaml +++ b/test/analysis/input/egfr.yaml @@ -119,7 +119,7 @@ analysis: ensemble: include: false aggregate_per_algorithm: false - evaluation: + pca_chosen: include: false aggregate_per_algorithm: false evaluation: diff --git a/test/test_config.py b/test/test_config.py index 573f702e1..7e82c866a 100644 --- a/test/test_config.py +++ b/test/test_config.py @@ -108,7 +108,7 @@ def get_test_config(): "pca": { "include": False, "aggregate_per_algorithm": False, - "evaluation": { + "pca_chosen": { "include": False } }, @@ -276,15 +276,18 @@ def test_config_values(self): (False, True, False, False), (False, False, False, False) ]) - @pytest.mark.parametrize("analysis_type", ["pca", "ensemble"]) - def test_eval_pca_coupling(self, include, eval_include, expected_include, expected_eval, analysis_type): + @pytest.mark.parametrize("analysis_type, evaluation_type", [ + ("pca", "pca_chosen"), + ("ensemble", "evaluation") + ]) + def test_eval_pca_coupling(self, include, eval_include, expected_include, expected_eval, analysis_type, evaluation_type): test_config = get_test_config() test_config["analysis"][analysis_type]["include"] = include - test_config["analysis"][analysis_type]["evaluation"]["include"] = eval_include + test_config["analysis"][analysis_type][evaluation_type]["include"] = eval_include config.init_global(test_config) assert vars(config.config.analysis)[analysis_type].include == expected_include - assert vars(config.config.analysis)[analysis_type].evaluation.include == expected_eval + assert vars(vars(config.config.analysis)[analysis_type])[evaluation_type].include == expected_eval @pytest.mark.parametrize("ml_include, ml_agg, eval_include, eval_agg, expected_ml, expected_ml_agg, expected_eval, expected_eval_agg", [ (False, True, True, True, False, False, False, False), @@ -293,21 +296,24 @@ def test_eval_pca_coupling(self, include, eval_include, expected_include, expect (True, True, True, True, True, True, True, True), (True, False, False, False, True, False, False, False), ]) - @pytest.mark.parametrize("analysis_type", ["pca", "ensemble"]) + @pytest.mark.parametrize("analysis_type, evaluation_type", [ + ("pca", "pca_chosen"), + ("ensemble", "evaluation") + ]) def test_eval_ml_agg_algo_coupling(self, ml_include, ml_agg, eval_include, eval_agg, expected_ml, expected_ml_agg, - expected_eval, expected_eval_agg, analysis_type): + expected_eval, expected_eval_agg, analysis_type, evaluation_type): # the value of pca include and pca aggregate_per_algorithm can affect the value of evaluation include and # evaluation aggregate_per_algorithm test_config = get_test_config() test_config["analysis"][analysis_type]["include"] = ml_include test_config["analysis"][analysis_type]["aggregate_per_algorithm"] = ml_agg - test_config["analysis"][analysis_type]["evaluation"]["include"] = eval_include - test_config["analysis"][analysis_type]["evaluation"]["aggregate_per_algorithm"] = eval_agg + test_config["analysis"][analysis_type][evaluation_type]["include"] = eval_include + test_config["analysis"][analysis_type][evaluation_type]["aggregate_per_algorithm"] = eval_agg config.init_global(test_config) assert vars(config.config.analysis)[analysis_type].include == expected_ml, f"Include was not {expected_ml}!" assert vars(config.config.analysis)[analysis_type].aggregate_per_algorithm == expected_ml_agg, f"Aggregate per algorithm was not {expected_ml_agg}!" - assert vars(config.config.analysis)[analysis_type].evaluation.include == expected_eval, f"evaluation include was not {expected_eval}!" - assert vars(config.config.analysis)[analysis_type].evaluation.aggregate_per_algorithm == expected_eval_agg, f"evaluation aggregate per algorithm was not {expected_eval_agg}!" + assert vars(vars(config.config.analysis)[analysis_type])[evaluation_type].include == expected_eval, f"evaluation include was not {expected_eval}!" + assert vars(vars(config.config.analysis)[analysis_type])[evaluation_type].aggregate_per_algorithm == expected_eval_agg, f"evaluation aggregate per algorithm was not {expected_eval_agg}!" From 2b87accfd9555c9dc7dfb879424b951af8a56fb4 Mon Sep 17 00:00:00 2001 From: "Tristan F." Date: Fri, 5 Dec 2025 07:05:49 +0000 Subject: [PATCH 09/14] chore: correct configs --- config/config.yaml | 4 ++-- config/egfr.yaml | 4 ++-- docker-wrappers/SPRAS/example_config.yaml | 4 ++-- test/analysis/input/config.yaml | 4 ++-- test/analysis/input/egfr.yaml | 4 ++-- 5 files changed, 10 insertions(+), 10 deletions(-) diff --git a/config/config.yaml b/config/config.yaml index 4261776f4..ebadac917 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -213,7 +213,7 @@ analysis: pca: include: true aggregate_per_algorithm: true - evaluation: + pca_chosen: include: true aggregate_per_algorithm: true # specify how many principal components to calculate @@ -240,7 +240,7 @@ analysis: ensemble: include: true aggregate_per_algorithm: true - pca_chosen: + evaluation: include: true aggregate_per_algorithm: true # Jaccard pathway output diff --git a/config/egfr.yaml b/config/egfr.yaml index 3db52d5bb..ad30a1bf2 100644 --- a/config/egfr.yaml +++ b/config/egfr.yaml @@ -157,7 +157,7 @@ analysis: pca: include: true aggregate_per_algorithm: true - evaluation: + pca_chosen: include: true aggregate_per_algorithm: true components: 2 @@ -172,7 +172,7 @@ analysis: ensemble: include: true aggregate_per_algorithm: true - pca_chosen: + evaluation: include: true aggregate_per_algorithm: true jaccard: diff --git a/docker-wrappers/SPRAS/example_config.yaml b/docker-wrappers/SPRAS/example_config.yaml index 5b7396a1f..65a5299e5 100644 --- a/docker-wrappers/SPRAS/example_config.yaml +++ b/docker-wrappers/SPRAS/example_config.yaml @@ -147,7 +147,7 @@ analysis: pca: include: true aggregate_per_algorithm: true - evaluation: + pca_chosen: include: true aggregate_per_algorithm: true # specify how many principal components to calculate @@ -174,7 +174,7 @@ analysis: ensemble: include: true aggregate_per_algorithm: true - pca_chosen: + evaluation: include: true aggregate_per_algorithm: true evaluation: diff --git a/test/analysis/input/config.yaml b/test/analysis/input/config.yaml index 403c9151a..4e8ad8bea 100644 --- a/test/analysis/input/config.yaml +++ b/test/analysis/input/config.yaml @@ -123,7 +123,7 @@ analysis: pca: include: false aggregate_per_algorithm: false - evaluation: + pca_chosen: include: false aggregate_per_algorithm: false # specify how many principal components to calculate @@ -150,7 +150,7 @@ analysis: ensemble: include: false aggregate_per_algorithm: true - pca_chosen: + evaluation: include: false aggregate_per_algorithm: false evaluation: diff --git a/test/analysis/input/egfr.yaml b/test/analysis/input/egfr.yaml index f06b08a93..d18fc3333 100644 --- a/test/analysis/input/egfr.yaml +++ b/test/analysis/input/egfr.yaml @@ -104,7 +104,7 @@ analysis: pca: include: false aggregate_per_algorithm: false - evaluation: + pca_chosen: include: false aggregate_per_algorithm: false components: 2 @@ -119,7 +119,7 @@ analysis: ensemble: include: false aggregate_per_algorithm: false - pca_chosen: + evaluation: include: false aggregate_per_algorithm: false evaluation: From 633d64adc5d798a0e2eff36d11d2795cb4f494db Mon Sep 17 00:00:00 2001 From: "Tristan F." Date: Fri, 5 Dec 2025 07:08:36 +0000 Subject: [PATCH 10/14] fix(snakefile): without pca_chosen --- Snakefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Snakefile b/Snakefile index 99937d5af..3607d34a1 100644 --- a/Snakefile +++ b/Snakefile @@ -28,7 +28,7 @@ out_dir = _config.config.out_dir algorithm_params = _config.config.algorithm_params algorithm_directed = _config.config.algorithm_directed container_settings = _config.config.container_settings -pca_params = without_keys(vars(_config.config.analysis.pca), ["evaluation", "include", "aggregate_per_algorithm"]) +pca_params = without_keys(vars(_config.config.analysis.pca), ["pca_chosen", "include", "aggregate_per_algorithm"]) hac_params = without_keys(vars(_config.config.analysis.hac), ["include", "aggregate_per_algorithm"]) # Return the dataset or gold_standard dictionary from the config file given the label From bfb90c666eb160fdb456a6f9106302aa42d626a1 Mon Sep 17 00:00:00 2001 From: "Tristan F." Date: Fri, 5 Dec 2025 07:11:23 +0000 Subject: [PATCH 11/14] chore: ref pca_chosen --- Snakefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Snakefile b/Snakefile index 3607d34a1..93959353d 100644 --- a/Snakefile +++ b/Snakefile @@ -131,11 +131,11 @@ def make_final_input(wildcards): final_input.extend(expand('{out_dir}{sep}{dataset_gold_standard_pair}-eval{sep}pr-per-pathway-for-{algorithm}-nodes.txt',out_dir=out_dir,sep=SEP,dataset_gold_standard_pair=dataset_gold_standard_node_pairs,algorithm=algorithms)) final_input.extend(expand('{out_dir}{sep}{dataset_gold_standard_pair}-eval{sep}pr-per-pathway-for-{algorithm}-nodes.png',out_dir=out_dir,sep=SEP,dataset_gold_standard_pair=dataset_gold_standard_node_pairs,algorithm=algorithms)) - if _config.config.analysis.pca.evaluation.include: + if _config.config.analysis.pca.pca_chosen.include: final_input.extend(expand('{out_dir}{sep}{dataset_gold_standard_pair}-eval{sep}pr-pca-chosen-pathway-nodes.txt',out_dir=out_dir,sep=SEP,dataset_gold_standard_pair=dataset_gold_standard_node_pairs)) final_input.extend(expand('{out_dir}{sep}{dataset_gold_standard_pair}-eval{sep}pr-pca-chosen-pathway-nodes.png',out_dir=out_dir,sep=SEP,dataset_gold_standard_pair=dataset_gold_standard_node_pairs)) - if _config.config.analysis.pca.evaluation.aggregate_per_algorithm: + if _config.config.analysis.pca.pca_chosen.aggregate_per_algorithm: final_input.extend(expand('{out_dir}{sep}{dataset_gold_standard_pair}-eval{sep}pr-pca-chosen-pathway-per-algorithm-nodes.txt',out_dir=out_dir,sep=SEP,dataset_gold_standard_pair=dataset_gold_standard_node_pairs)) final_input.extend(expand('{out_dir}{sep}{dataset_gold_standard_pair}-eval{sep}pr-pca-chosen-pathway-per-algorithm-nodes.png',out_dir=out_dir,sep=SEP,dataset_gold_standard_pair=dataset_gold_standard_node_pairs)) From 08d25ee8fe5924b7fdc50a5a9d7714e454e20688 Mon Sep 17 00:00:00 2001 From: "Tristan F.-R." Date: Fri, 5 Dec 2025 18:31:05 +0000 Subject: [PATCH 12/14] fix(snakemake): use correct pca field --- Snakefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Snakefile b/Snakefile index 93959353d..2a1261371 100644 --- a/Snakefile +++ b/Snakefile @@ -535,7 +535,7 @@ rule evaluation_per_algo_pca_chosen: node_table = Evaluation.from_file(input.node_gold_standard_file).node_table pca_chosen_pathways = Evaluation.pca_chosen_pathway(input.pca_coordinates_file, input.pathway_summary_file, out_dir) pr_df = Evaluation.node_precision_and_recall(pca_chosen_pathways, node_table) - Evaluation.precision_and_recall_pca_chosen_pathway(pr_df, output.node_pca_chosen_pr_file, output.node_pca_chosen_pr_png, _config.config.analysis.pca.evaluation.aggregate_per_algorithm) + Evaluation.precision_and_recall_pca_chosen_pathway(pr_df, output.node_pca_chosen_pr_file, output.node_pca_chosen_pr_png, _config.config.analysis.pca.pca_chosen.aggregate_per_algorithm) # Return the dataset pickle file for a specific dataset def get_dataset_pickle_file(wildcards): From e3ea0263413cad005b4db25772e01bdf26b2503d Mon Sep 17 00:00:00 2001 From: "Tristan F.-R." Date: Fri, 5 Dec 2025 18:48:29 +0000 Subject: [PATCH 13/14] fix: correct implies message --- spras/config/schema.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spras/config/schema.py b/spras/config/schema.py index 5b7d02e4e..19649759c 100644 --- a/spras/config/schema.py +++ b/spras/config/schema.py @@ -51,7 +51,7 @@ class HacMetric(CaseInsensitiveEnum): def implies(source: bool, target: bool, source_str: str, target_str: str): if target and not source: - warnings.warn(f"{source_str} is set to True but {target_str} is set to False; setting {target_str} to False", stacklevel=2) + warnings.warn(f"{source_str} is False but {target_str} is True; setting {target_str} to False", stacklevel=2) return False return target From 32546232eb4a2240e818e7ae7210678f0411ec04 Mon Sep 17 00:00:00 2001 From: "Tristan F." Date: Fri, 9 Jan 2026 20:24:24 -0800 Subject: [PATCH 14/14] chore: drop pca/hac param access --- Snakefile | 2 -- 1 file changed, 2 deletions(-) diff --git a/Snakefile b/Snakefile index a660d542e..6ff4d73a9 100644 --- a/Snakefile +++ b/Snakefile @@ -26,8 +26,6 @@ def without_keys(d: dict, keys: list): out_dir = _config.config.out_dir algorithm_params = _config.config.algorithm_params -pca_params = _config.config.pca_params -hac_params = _config.config.hac_params container_settings = _config.config.container_settings pca_params = without_keys(vars(_config.config.analysis.pca), ["pca_chosen", "include", "aggregate_per_algorithm"]) hac_params = without_keys(vars(_config.config.analysis.hac), ["include", "aggregate_per_algorithm"])