From 8392c151978feaff9cfc442542dffdaa6cb2487b Mon Sep 17 00:00:00 2001
From: "Tristan F." <pub.tristanf@gmail.com>
Date: Tue, 25 Nov 2025 16:31:26 -0800
Subject: [PATCH 01/14] refactor: separate analysis

---
 Snakefile              | 72 +++++++++++++++++++++++------------
 config/config.yaml     | 47 +++++++++++++++--------
 spras/config/config.py | 85 ++----------------------------------------
 spras/config/schema.py | 52 +++++++++++++++++++-------
 4 files changed, 122 insertions(+), 134 deletions(-)

diff --git a/Snakefile b/Snakefile
index 5b9340ccc..31f0d279e 100644
--- a/Snakefile
+++ b/Snakefile
@@ -20,13 +20,16 @@ wildcard_constraints:
 # without declaration!
 _config.init_global(config)
 
+def without_keys(d: dict, keys: list):
+    if set(keys) & set(d.keys()) != set(keys): raise RuntimeError(f"Keys {keys} not fully present in {list(d.keys())}!")
+    return {k: v for k, v in d.items() if k not in keys}
+
 out_dir = _config.config.out_dir
 algorithm_params = _config.config.algorithm_params
 algorithm_directed = _config.config.algorithm_directed
-pca_params = _config.config.pca_params
-hac_params = _config.config.hac_params
 container_settings = _config.config.container_settings
-include_aggregate_algo_eval = _config.config.analysis_include_evaluation_aggregate_algo
+pca_params = without_keys(vars(_config.config.analysis.pca), ["evaluation", "include", "aggregate_per_algorithm"])
+hac_params = without_keys(vars(_config.config.analysis.hac), ["evaluation", "include", "aggregate_per_algorithm"])
 
 # Return the dataset or gold_standard dictionary from the config file given the label
 def get_dataset(_datasets, label):
@@ -71,55 +74,76 @@ def write_dataset_log(dataset, logfile):
 def make_final_input(wildcards):
     final_input = []
 
-    if _config.config.analysis_include_summary:
+    if _config.config.analysis.summary.include:
         # add summary output file for each pathway
         # TODO: reuse in the future once we make summary work for mixed graphs. See https://github.com/Reed-CompBio/spras/issues/128
         # final_input.extend(expand('{out_dir}{sep}{dataset}-{algorithm_params}{sep}summary.txt',out_dir=out_dir,sep=SEP,dataset=dataset_labels,algorithm_params=algorithms_with_params))
         # add table summarizing all pathways for each dataset
         final_input.extend(expand('{out_dir}{sep}{dataset}-pathway-summary.txt',out_dir=out_dir,sep=SEP,dataset=dataset_labels))
 
-    if _config.config.analysis_include_cytoscape:
+    if _config.config.analysis.cytoscape.include:
         final_input.extend(expand('{out_dir}{sep}{dataset}-cytoscape.cys',out_dir=out_dir,sep=SEP,dataset=dataset_labels))
 
-    if _config.config.analysis_include_ml:
+    if _config.config.analysis.pca.include:
         final_input.extend(expand('{out_dir}{sep}{dataset}-ml{sep}pca.png',out_dir=out_dir,sep=SEP,dataset=dataset_labels,algorithm_params=algorithms_with_params))
         final_input.extend(expand('{out_dir}{sep}{dataset}-ml{sep}pca-variance.txt',out_dir=out_dir,sep=SEP,dataset=dataset_labels,algorithm_params=algorithms_with_params))
-        final_input.extend(expand('{out_dir}{sep}{dataset}-ml{sep}hac-vertical.png',out_dir=out_dir,sep=SEP,dataset=dataset_labels,algorithm_params=algorithms_with_params))
-        final_input.extend(expand('{out_dir}{sep}{dataset}-ml{sep}hac-clusters-vertical.txt',out_dir=out_dir,sep=SEP,dataset=dataset_labels,algorithm_params=algorithms_with_params))
         final_input.extend(expand('{out_dir}{sep}{dataset}-ml{sep}pca-coordinates.txt',out_dir=out_dir,sep=SEP,dataset=dataset_labels,algorithm_params=algorithms_with_params))
-        final_input.extend(expand('{out_dir}{sep}{dataset}-ml{sep}hac-horizontal.png',out_dir=out_dir,sep=SEP,dataset=dataset_labels,algorithm_params=algorithms_with_params))
-        final_input.extend(expand('{out_dir}{sep}{dataset}-ml{sep}hac-clusters-horizontal.txt',out_dir=out_dir,sep=SEP,dataset=dataset_labels,algorithm_params=algorithms_with_params))
-        final_input.extend(expand('{out_dir}{sep}{dataset}-ml{sep}ensemble-pathway.txt',out_dir=out_dir,sep=SEP,dataset=dataset_labels,algorithm_params=algorithms_with_params))
-        final_input.extend(expand('{out_dir}{sep}{dataset}-ml{sep}jaccard-matrix.txt',out_dir=out_dir,sep=SEP,dataset=dataset_labels,algorithm_params=algorithms_with_params))
-        final_input.extend(expand('{out_dir}{sep}{dataset}-ml{sep}jaccard-heatmap.png',out_dir=out_dir,sep=SEP,dataset=dataset_labels,algorithm_params=algorithms_with_params))
 
-    if _config.config.analysis_include_ml_aggregate_algo:
+    if _config.config.analysis.pca.aggregate_per_algorithm:
         final_input.extend(expand('{out_dir}{sep}{dataset}-ml{sep}{algorithm}-pca.png',out_dir=out_dir,sep=SEP,dataset=dataset_labels,algorithm=algorithms_mult_param_combos))
         final_input.extend(expand('{out_dir}{sep}{dataset}-ml{sep}{algorithm}-pca-variance.txt',out_dir=out_dir,sep=SEP,dataset=dataset_labels,algorithm=algorithms_mult_param_combos))
         final_input.extend(expand('{out_dir}{sep}{dataset}-ml{sep}{algorithm}-pca-coordinates.txt',out_dir=out_dir,sep=SEP,dataset=dataset_labels,algorithm=algorithms_mult_param_combos))
+
+    if _config.config.analysis.hac.include:
+        final_input.extend(expand('{out_dir}{sep}{dataset}-ml{sep}hac-vertical.png',out_dir=out_dir,sep=SEP,dataset=dataset_labels,algorithm_params=algorithms_with_params))
+        final_input.extend(expand('{out_dir}{sep}{dataset}-ml{sep}hac-clusters-vertical.txt',out_dir=out_dir,sep=SEP,dataset=dataset_labels,algorithm_params=algorithms_with_params))
+        final_input.extend(expand('{out_dir}{sep}{dataset}-ml{sep}hac-horizontal.png',out_dir=out_dir,sep=SEP,dataset=dataset_labels,algorithm_params=algorithms_with_params))
+        final_input.extend(expand('{out_dir}{sep}{dataset}-ml{sep}hac-clusters-horizontal.txt',out_dir=out_dir,sep=SEP,dataset=dataset_labels,algorithm_params=algorithms_with_params))
+
+    if _config.config.analysis.hac.aggregate_per_algorithm:
         final_input.extend(expand('{out_dir}{sep}{dataset}-ml{sep}{algorithm}-hac-vertical.png',out_dir=out_dir,sep=SEP,dataset=dataset_labels,algorithm=algorithms_mult_param_combos))
         final_input.extend(expand('{out_dir}{sep}{dataset}-ml{sep}{algorithm}-hac-clusters-vertical.txt',out_dir=out_dir,sep=SEP,dataset=dataset_labels,algorithm=algorithms_mult_param_combos))
         final_input.extend(expand('{out_dir}{sep}{dataset}-ml{sep}{algorithm}-hac-horizontal.png',out_dir=out_dir,sep=SEP,dataset=dataset_labels,algorithm=algorithms_mult_param_combos))
         final_input.extend(expand('{out_dir}{sep}{dataset}-ml{sep}{algorithm}-hac-clusters-horizontal.txt',out_dir=out_dir,sep=SEP,dataset=dataset_labels,algorithm=algorithms_mult_param_combos))
+
+    if _config.config.analysis.ensemble.include:
+        final_input.extend(expand('{out_dir}{sep}{dataset}-ml{sep}ensemble-pathway.txt',out_dir=out_dir,sep=SEP,dataset=dataset_labels,algorithm_params=algorithms_with_params))
+    
+    if _config.config.analysis.ensemble.aggregate_per_algorithm:
         final_input.extend(expand('{out_dir}{sep}{dataset}-ml{sep}{algorithm}-ensemble-pathway.txt',out_dir=out_dir,sep=SEP,dataset=dataset_labels,algorithm=algorithms))
+    
+    if _config.config.analysis.jaccard.include:
+        final_input.extend(expand('{out_dir}{sep}{dataset}-ml{sep}jaccard-matrix.txt',out_dir=out_dir,sep=SEP,dataset=dataset_labels,algorithm_params=algorithms_with_params))
+        final_input.extend(expand('{out_dir}{sep}{dataset}-ml{sep}jaccard-heatmap.png',out_dir=out_dir,sep=SEP,dataset=dataset_labels,algorithm_params=algorithms_with_params))
+
+    if _config.config.analysis.jaccard.aggregate_per_algorithm:
         final_input.extend(expand('{out_dir}{sep}{dataset}-ml{sep}{algorithm}-jaccard-matrix.txt',out_dir=out_dir,sep=SEP,dataset=dataset_labels,algorithm=algorithms))
         final_input.extend(expand('{out_dir}{sep}{dataset}-ml{sep}{algorithm}-jaccard-heatmap.png',out_dir=out_dir,sep=SEP,dataset=dataset_labels,algorithm=algorithms))
 
-    if _config.config.analysis_include_evaluation:
+    if _config.config.analysis.evaluation.include:
         final_input.extend(expand('{out_dir}{sep}{dataset_gold_standard_pair}-eval{sep}pr-per-pathway-nodes.txt',out_dir=out_dir,sep=SEP,dataset_gold_standard_pair=dataset_gold_standard_node_pairs,algorithm_params=algorithms_with_params))
         final_input.extend(expand('{out_dir}{sep}{dataset_gold_standard_pair}-eval{sep}pr-per-pathway-nodes.png',out_dir=out_dir,sep=SEP,dataset_gold_standard_pair=dataset_gold_standard_node_pairs))
-        final_input.extend(expand('{out_dir}{sep}{dataset_gold_standard_pair}-eval{sep}pr-pca-chosen-pathway-nodes.txt',out_dir=out_dir,sep=SEP,dataset_gold_standard_pair=dataset_gold_standard_node_pairs))
-        final_input.extend(expand('{out_dir}{sep}{dataset_gold_standard_pair}-eval{sep}pr-pca-chosen-pathway-nodes.png',out_dir=out_dir,sep=SEP,dataset_gold_standard_pair=dataset_gold_standard_node_pairs))
-        final_input.extend(expand('{out_dir}{sep}{dataset_gold_standard_pair}-eval{sep}pr-curve-ensemble-nodes.png',out_dir=out_dir,sep=SEP,dataset_gold_standard_pair=dataset_gold_standard_node_pairs))
-        final_input.extend(expand('{out_dir}{sep}{dataset_gold_standard_pair}-eval{sep}pr-curve-ensemble-nodes.txt',out_dir=out_dir,sep=SEP,dataset_gold_standard_pair=dataset_gold_standard_node_pairs))
+
         # dummy file
         final_input.extend(expand('{out_dir}{sep}{dataset_gold_standard_pair}-eval{sep}dummy-edge.txt',out_dir=out_dir,sep=SEP,dataset_gold_standard_pair=dataset_gold_standard_edge_pairs))
-    
-    if _config.config.analysis_include_evaluation_aggregate_algo:
+
+    if _config.config.analysis.evaluation.aggregate_per_algorithm:
         final_input.extend(expand('{out_dir}{sep}{dataset_gold_standard_pair}-eval{sep}pr-per-pathway-for-{algorithm}-nodes.txt',out_dir=out_dir,sep=SEP,dataset_gold_standard_pair=dataset_gold_standard_node_pairs,algorithm=algorithms))
         final_input.extend(expand('{out_dir}{sep}{dataset_gold_standard_pair}-eval{sep}pr-per-pathway-for-{algorithm}-nodes.png',out_dir=out_dir,sep=SEP,dataset_gold_standard_pair=dataset_gold_standard_node_pairs,algorithm=algorithms))
+    
+    if _config.config.analysis.pca.evaluation.include:
+        final_input.extend(expand('{out_dir}{sep}{dataset_gold_standard_pair}-eval{sep}pr-pca-chosen-pathway-nodes.txt',out_dir=out_dir,sep=SEP,dataset_gold_standard_pair=dataset_gold_standard_node_pairs))
+        final_input.extend(expand('{out_dir}{sep}{dataset_gold_standard_pair}-eval{sep}pr-pca-chosen-pathway-nodes.png',out_dir=out_dir,sep=SEP,dataset_gold_standard_pair=dataset_gold_standard_node_pairs))
+
+    if _config.config.analysis.pca.evaluation.aggregate_per_algorithm:
         final_input.extend(expand('{out_dir}{sep}{dataset_gold_standard_pair}-eval{sep}pr-pca-chosen-pathway-per-algorithm-nodes.txt',out_dir=out_dir,sep=SEP,dataset_gold_standard_pair=dataset_gold_standard_node_pairs))
         final_input.extend(expand('{out_dir}{sep}{dataset_gold_standard_pair}-eval{sep}pr-pca-chosen-pathway-per-algorithm-nodes.png',out_dir=out_dir,sep=SEP,dataset_gold_standard_pair=dataset_gold_standard_node_pairs))
+    
+    if _config.config.analysis.ensemble.evaluation.include:
+        final_input.extend(expand('{out_dir}{sep}{dataset_gold_standard_pair}-eval{sep}pr-curve-ensemble-nodes.png',out_dir=out_dir,sep=SEP,dataset_gold_standard_pair=dataset_gold_standard_node_pairs))
+        final_input.extend(expand('{out_dir}{sep}{dataset_gold_standard_pair}-eval{sep}pr-curve-ensemble-nodes.txt',out_dir=out_dir,sep=SEP,dataset_gold_standard_pair=dataset_gold_standard_node_pairs))
+
+    if _config.config.analysis.ensemble.evaluation.aggregate_per_algorithm:
         final_input.extend(expand('{out_dir}{sep}{dataset_gold_standard_pair}-eval{sep}pr-curve-ensemble-nodes-per-algorithm-nodes.png',out_dir=out_dir,sep=SEP,dataset_gold_standard_pair=dataset_gold_standard_node_pairs))
         final_input.extend(expand('{out_dir}{sep}{dataset_gold_standard_pair}-eval{sep}pr-curve-ensemble-nodes-per-algorithm-nodes.txt',out_dir=out_dir,sep=SEP,dataset_gold_standard_pair=dataset_gold_standard_node_pairs))
 
@@ -463,7 +487,7 @@ rule evaluation_per_algo_pr_per_pathways:
     run:
         node_table = Evaluation.from_file(input.node_gold_standard_file).node_table
         pr_df = Evaluation.node_precision_and_recall(input.pathways, node_table)
-        Evaluation.precision_and_recall_per_pathway(pr_df, output.node_pr_file, output.node_pr_png, include_aggregate_algo_eval)
+        Evaluation.precision_and_recall_per_pathway(pr_df, output.node_pr_file, output.node_pr_png, _config.config.analysis.evaluation.aggregate_per_algorithm)
 
 # Return pathway summary file per dataset
 def collect_summary_statistics_per_dataset(wildcards):
@@ -511,7 +535,7 @@ rule evaluation_per_algo_pca_chosen:
         node_table = Evaluation.from_file(input.node_gold_standard_file).node_table
         pca_chosen_pathways = Evaluation.pca_chosen_pathway(input.pca_coordinates_file, input.pathway_summary_file, out_dir)
         pr_df = Evaluation.node_precision_and_recall(pca_chosen_pathways, node_table)
-        Evaluation.precision_and_recall_pca_chosen_pathway(pr_df, output.node_pca_chosen_pr_file, output.node_pca_chosen_pr_png, include_aggregate_algo_eval)
+        Evaluation.precision_and_recall_pca_chosen_pathway(pr_df, output.node_pca_chosen_pr_file, output.node_pca_chosen_pr_png, _config.config.analysis.pca.evaluation.aggregate_per_algorithm)
 
 # Return the dataset pickle file for a specific dataset
 def get_dataset_pickle_file(wildcards):
@@ -554,7 +578,7 @@ rule evaluation_per_algo_ensemble_pr_curve:
     run:
         node_table = Evaluation.from_file(input.node_gold_standard_file).node_table
         node_ensembles_dict = Evaluation.edge_frequency_node_ensemble(node_table, input.ensemble_files, input.dataset_file)
-        Evaluation.precision_recall_curve_node_ensemble(node_ensembles_dict, node_table, output.node_pr_curve_png, output.node_pr_curve_file, include_aggregate_algo_eval)
+        Evaluation.precision_recall_curve_node_ensemble(node_ensembles_dict, node_table, output.node_pr_curve_png, output.node_pr_curve_file, _config.config.analysis.evaluation.aggregate_per_algorithm)
 
 rule evaluation_edge_dummy:
     input: 
diff --git a/config/config.yaml b/config/config.yaml
index 11bac082a..b5927cdc2 100644
--- a/config/config.yaml
+++ b/config/config.yaml
@@ -205,33 +205,50 @@ analysis:
   # Create Cytoscape session file with all pathway graphs for each dataset
   cytoscape:
     include: true
-  # Machine learning analysis (e.g. clustering) of the pathway output files for each dataset
-  ml:
-    # ml analysis per dataset
+  # The following analysis options also have an `aggregate_per_algorithm` option,
+  # which adds the respective analysis to an algorithm as a whole.
+  # This will only run if the adjacent `include` is true.
+
+  # Principle component analysis of the pathway output files
+  pca:
     include: true
-    # adds ml analysis per algorithm output
-    # only runs for algorithms with multiple parameter combinations chosen
     aggregate_per_algorithm: true
+    evaluation:
+      include: true
+      aggregate_per_algorithm: true
     # specify how many principal components to calculate
     components: 2
     # boolean to show the labels on the pca graph
     labels: true
-    # 'ward', 'complete', 'average', 'single'
-    # if linkage: ward, must use metric: euclidean
-    linkage: 'ward'
-    # 'euclidean', 'manhattan', 'cosine'
-    metric: 'euclidean'
     # controls whether kernel density estimation (KDE) is computed and visualized on top of PCA plots.
     # the coordinates of the KDE maximum (kde_peak) are also saved to the PCA coordinates output file.
     # KDE needs to be run in order to select a parameter combination with PCA because the maximum kernel density is used
     # to pick the 'best' parameter combination.
     kde: true
-    # removes empty pathways from consideration in ml analysis (pca only)
+    # removes empty pathways from consideration in ml analysis
     remove_empty_pathways: false
+  # Hierarchical agglomerative clustering analysis of the pathway output files
+  hac:
+    include: true
+    aggregate_per_algorithm: true
+    evaluation:
+      include: true
+      aggregate_per_algorithm: true
+    # 'ward', 'complete', 'average', 'single'
+    # if linkage: ward, must use metric: euclidean
+    linkage: 'ward'
+    # 'euclidean', 'manhattan', 'cosine'
+    metric: 'euclidean'
+  # Ensembling pathway output
+  ensemble:
+    include: true
+    aggregate_per_algorithm: true
+    evaluation:
+      include: true
+      aggregate_per_algorithm: true
   evaluation:
-    # evaluation per dataset-goldstandard pair
-    # evaluation will not run unless ml include is set to true
+    # evaluation per dataset-goldstandard pair.
+    # This evaluation specifically generates precision-recall curves:
+    # to run evaluation on top of the other options, see the respective `evaluation` blocks under the other analyses.
     include: true
-    # adds evaluation per algorithm per dataset-goldstandard pair
-    # evaluation per algorithm will not run unless ml include and ml aggregate_per_algorithm are set to true
     aggregate_per_algorithm: true
diff --git a/spras/config/config.py b/spras/config/config.py
index cb19b2b1d..9572caa63 100644
--- a/spras/config/config.py
+++ b/spras/config/config.py
@@ -82,29 +82,7 @@ def __init__(self, raw_config: dict[str, Any]):
         # Deprecated. Previously a dict mapping algorithm names to a Boolean tracking whether they used directed graphs.
         self.algorithm_directed = None
         # A dict with the analysis settings
-        self.analysis_params = parsed_raw_config.analysis
-        # A dict with the evaluation settings
-        self.evaluation_params = self.analysis_params.evaluation
-        # A dict with the ML settings
-        self.ml_params = self.analysis_params.ml
-        # A Boolean specifying whether to run ML analysis for individual algorithms
-        self.analysis_include_ml_aggregate_algo = None
-        # A dict with the PCA settings
-        self.pca_params = None
-        # A dict with the hierarchical clustering settings
-        self.hac_params = None
-        # A Boolean specifying whether to run the summary analysis
-        self.analysis_include_summary = None
-        # A Boolean specifying whether to run the Cytoscape analysis
-        self.analysis_include_cytoscape = None
-        # A Boolean specifying whether to run the ML analysis
-        self.analysis_include_ml = None
-        # A Boolean specifying whether to run the Evaluation analysis
-        self.analysis_include_evaluation = None
-        # A Boolean specifying whether to run the ML per algorithm analysis
-        self.analysis_include_ml_aggregate_algo = None
-        # A Boolean specifying whether to run the evaluation per algorithm analysis
-        self.analysis_include_evaluation_aggregate_algo = None
+        self.analysis = parsed_raw_config.analysis
 
         self.process_config(parsed_raw_config)
 
@@ -225,67 +203,12 @@ def process_algorithms(self, raw_config: RawConfig):
                                         f'(current length {self.hash_length}).')
                     self.algorithm_params[alg.name][params_hash] = run_dict
 
-    def process_analysis(self, raw_config: RawConfig):
-        if not raw_config.analysis:
-            return
-
-        # self.ml_params is a class, pca_params needs to be a dict.
-        self.pca_params = {
-            "components": self.ml_params.components,
-            "labels": self.ml_params.labels,
-            "kde": self.ml_params.kde,
-            "remove_empty_pathways": self.ml_params.remove_empty_pathways
-        }
-
-        self.hac_params = {
-            "linkage": self.ml_params.linkage,
-            "metric": self.ml_params.metric
-        }
-
-        self.analysis_include_summary = raw_config.analysis.summary.include
-        self.analysis_include_cytoscape = raw_config.analysis.cytoscape.include
-        self.analysis_include_ml = raw_config.analysis.ml.include
-        self.analysis_include_evaluation = raw_config.analysis.evaluation.include
-
-        # Only run ML aggregate per algorithm if analysis include ML is set to True
-        if self.ml_params.aggregate_per_algorithm and self.analysis_include_ml:
-            self.analysis_include_ml_aggregate_algo = raw_config.analysis.ml.aggregate_per_algorithm
-        else:
-            self.analysis_include_ml_aggregate_algo = False
-
+    def process_analysis(self):
         # Raises an error if Evaluation is enabled but no gold standard data is provided
-        if self.gold_standards == {} and self.analysis_include_evaluation:
+        if self.gold_standards == {} and self.analysis.evaluation.include:
             raise ValueError("Evaluation analysis cannot run as gold standard data not provided. "
                              "Please set evaluation include to false or provide gold standard data.")
 
-        # Only run Evaluation if ML is set to True
-        if not self.analysis_include_ml:
-            self.analysis_include_evaluation = False
-
-        # Only run Evaluation aggregate per algorithm if analysis include ML is set to True
-        if self.evaluation_params.aggregate_per_algorithm and self.analysis_include_evaluation:
-            self.analysis_include_evaluation_aggregate_algo = raw_config.analysis.evaluation.aggregate_per_algorithm
-        else:
-            self.analysis_include_evaluation_aggregate_algo = False
-
-        # Only run Evaluation per algorithm if ML per algorithm is set to True
-        if not self.analysis_include_ml_aggregate_algo:
-            self.analysis_include_evaluation_aggregate_algo = False
-
-        # Set kde to True if Evaluation is set to True
-        # When Evaluation is True, PCA is used to pick a single parameter combination for all algorithms with multiple
-        # parameter combinations and KDE is used to choose the parameter combination in the PC space
-        if self.analysis_include_evaluation and not self.pca_params["kde"]:
-            self.pca_params["kde"] = True
-            print("Setting kde to true; Evaluation analysis needs to run KDE for PCA-Chosen parameter selection.")
-
-        # Set summary include to True if Evaluation is set to True
-        # When a PCA-chosen parameter set is chosen, summary statistics are used to resolve tiebreakers.
-        if self.analysis_include_evaluation and not self.analysis_include_summary:
-            self.analysis_include_summary = True
-            print("Setting summary include to true; Evaluation analysis needs to use summary statistics for PCA-Chosen parameter selection.")
-
-
     def process_config(self, raw_config: RawConfig):
         # Set up a few top-level config variables
         self.out_dir = raw_config.reconstruction_settings.locations.reconstruction_dir
@@ -295,4 +218,4 @@ def process_config(self, raw_config: RawConfig):
 
         self.process_datasets(raw_config)
         self.process_algorithms(raw_config)
-        self.process_analysis(raw_config)
+        self.process_analysis()
diff --git a/spras/config/schema.py b/spras/config/schema.py
index 8aa067a53..f3459a277 100644
--- a/spras/config/schema.py
+++ b/spras/config/schema.py
@@ -11,9 +11,10 @@
 """
 
 import re
+import warnings
 from typing import Annotated, Optional
 
-from pydantic import AfterValidator, BaseModel, ConfigDict
+from pydantic import AfterValidator, BaseModel, ConfigDict, model_validator
 
 from spras.config.container_schema import ContainerSettings
 from spras.config.util import CaseInsensitiveEnum
@@ -37,42 +38,65 @@ class CytoscapeAnalysis(BaseModel):
 # Note that CaseInsensitiveEnum is not pydantic: pydantic
 # has special support for enums, but we avoid the
 # pydantic-specific "model_config" key here for this reason.
-class MlLinkage(CaseInsensitiveEnum):
+class HacLinkage(CaseInsensitiveEnum):
     ward = 'ward'
     complete = 'complete'
     average = 'average'
     single = 'single'
 
-class MlMetric(CaseInsensitiveEnum):
+class HacMetric(CaseInsensitiveEnum):
     euclidean = 'euclidean'
     manhattan = 'manhattan'
     cosine = 'cosine'
 
-class MlAnalysis(BaseModel):
+class AggregateAnalysis(BaseModel):
     include: bool
     aggregate_per_algorithm: bool = False
+
+    model_config = ConfigDict(extra='forbid')
+
+    @model_validator(mode='after')
+    def check_aggregate_when_include(self):
+        if self.aggregate_per_algorithm and not self.include:
+            warnings.warn("aggregate_per_algorithm is set to True but include is set to False; setting aggregate_per_algorithm to False", stacklevel=2)
+            self.aggregate_per_algorithm = False
+        return self
+
+class EvaluationAnalysis(AggregateAnalysis): pass
+class AggregateEvaluationAnalysis(AggregateAnalysis):
+    evaluation: EvaluationAnalysis = EvaluationAnalysis(include=False)
+
+    @model_validator(mode='after')
+    def check_include_when_evaluation_include(self):
+        if self.evaluation.include and not self.include:
+            warnings.warn("evaluation.include is set to True but include is set to False; setting evaluation.include to False", stacklevel=2)
+            self.evaluation.include = False
+        return self
+
+class PcaAnalysis(AggregateEvaluationAnalysis):
     components: int = 2
     labels: bool = True
     kde: bool = False
     remove_empty_pathways: bool = False
-    linkage: MlLinkage = MlLinkage.ward
-    metric: MlMetric = MlMetric.euclidean
 
-    model_config = ConfigDict(extra='forbid')
+class HacAnalysis(AggregateEvaluationAnalysis):
+    linkage: HacLinkage = HacLinkage.ward
+    metric: HacMetric = HacMetric.euclidean
 
-class EvaluationAnalysis(BaseModel):
-    include: bool
-    aggregate_per_algorithm: bool = False
-
-    model_config = ConfigDict(extra='forbid')
+class EnsembleAnalysis(AggregateEvaluationAnalysis): pass
+class JaccardAnalysis(AggregateAnalysis): pass
 
 class Analysis(BaseModel):
     summary: SummaryAnalysis = SummaryAnalysis(include=False)
     cytoscape: CytoscapeAnalysis = CytoscapeAnalysis(include=False)
-    ml: MlAnalysis = MlAnalysis(include=False)
+    pca: PcaAnalysis = PcaAnalysis(include=False)
+    hac: HacAnalysis = HacAnalysis(include=False)
+    jaccard: JaccardAnalysis = JaccardAnalysis(include=False)
+    ensemble: EnsembleAnalysis = EnsembleAnalysis(include=False)
     evaluation: EvaluationAnalysis = EvaluationAnalysis(include=False)
+    """Enables PR curve evaluation."""
 
-    model_config = ConfigDict(extra='forbid')
+    model_config = ConfigDict(extra='forbid', use_attribute_docstrings=True)
 
 
 # The default length of the truncated hash used to identify parameter combinations

From 9dd917e4e4c83cf756328b9a3b4dbcd5a49848b0 Mon Sep 17 00:00:00 2001
From: "Tristan F." <pub.tristanf@gmail.com>
Date: Wed, 26 Nov 2025 00:52:45 +0000
Subject: [PATCH 02/14] docs: update

---
 config/egfr.yaml                          | 26 +++++++++++---
 docker-wrappers/SPRAS/example_config.yaml | 41 ++++++++++++++++++++---
 docs/tutorial/advanced.rst                |  7 ++++
 docs/tutorial/beginner.rst                |  2 +-
 docs/tutorial/intermediate.rst            | 18 ++++++----
 test/analysis/input/config.yaml           | 41 +++++++++++++++++++----
 test/analysis/input/egfr.yaml             | 31 ++++++++++++++---
 7 files changed, 141 insertions(+), 25 deletions(-)

diff --git a/config/egfr.yaml b/config/egfr.yaml
index 25e56ab25..60a145249 100644
--- a/config/egfr.yaml
+++ b/config/egfr.yaml
@@ -150,16 +150,34 @@ reconstruction_settings:
   locations:
     reconstruction_dir: output/egfr
 analysis:
-  cytoscape:
-    include: true
   summary:
     include: true
-  ml:
+  cytoscape:
+    include: true
+  pca:
     include: true
     aggregate_per_algorithm: true
+    evaluation:
+      include: true
+      aggregate_per_algorithm: true
+    components: 2
     labels: true
     kde: true
     remove_empty_pathways: true
-  evaluation:
+  hac:
     include: true
     aggregate_per_algorithm: true
+    evaluation:
+      include: true
+      aggregate_per_algorithm: true
+    linkage: 'ward'
+    metric: 'euclidean'
+  ensemble:
+    include: true
+    aggregate_per_algorithm: true
+    evaluation:
+      include: true
+      aggregate_per_algorithm: true
+  evaluation:
+    include: false
+    aggregate_per_algorithm: false
diff --git a/docker-wrappers/SPRAS/example_config.yaml b/docker-wrappers/SPRAS/example_config.yaml
index db1c2dbbf..d4a0a6a6c 100644
--- a/docker-wrappers/SPRAS/example_config.yaml
+++ b/docker-wrappers/SPRAS/example_config.yaml
@@ -138,18 +138,51 @@ analysis:
     include: true
   # Create Cytoscape session file with all pathway graphs for each dataset
   cytoscape:
-    include: false
-  # Machine learning analysis (e.g. clustering) of the pathway output files for each dataset
-  ml:
     include: true
+  # The following analysis options also have an `aggregate_per_algorithm` option,
+  # which adds the respective analysis to an algorithm as a whole.
+  # This will only run if the adjacent `include` is true.
+
+  # Principle component analysis of the pathway output files
+  pca:
+    include: true
+    aggregate_per_algorithm: true
+    evaluation:
+      include: true
+      aggregate_per_algorithm: true
     # specify how many principal components to calculate
     components: 2
     # boolean to show the labels on the pca graph
     labels: true
+    # controls whether kernel density estimation (KDE) is computed and visualized on top of PCA plots.
+    # the coordinates of the KDE maximum (kde_peak) are also saved to the PCA coordinates output file.
+    # KDE needs to be run in order to select a parameter combination with PCA because the maximum kernel density is used
+    # to pick the 'best' parameter combination.
+    kde: true
+    # removes empty pathways from consideration in ml analysis
+    remove_empty_pathways: false
+  # Hierarchical agglomerative clustering analysis of the pathway output files
+  hac:
+    include: true
+    aggregate_per_algorithm: true
+    evaluation:
+      include: true
+      aggregate_per_algorithm: true
     # 'ward', 'complete', 'average', 'single'
     # if linkage: ward, must use metric: euclidean
     linkage: 'ward'
     # 'euclidean', 'manhattan', 'cosine'
     metric: 'euclidean'
+  # Ensembling pathway output
+  ensemble:
+    include: true
+    aggregate_per_algorithm: true
+    evaluation:
+      include: true
+      aggregate_per_algorithm: true
   evaluation:
-    include: false
+    # evaluation per dataset-goldstandard pair.
+    # This evaluation specifically generates precision-recall curves:
+    # to run evaluation on top of the other options, see the respective `evaluation` blocks under the other analyses.
+    include: true
+    aggregate_per_algorithm: true
diff --git a/docs/tutorial/advanced.rst b/docs/tutorial/advanced.rst
index 8f7e8b645..569733631 100644
--- a/docs/tutorial/advanced.rst
+++ b/docs/tutorial/advanced.rst
@@ -106,6 +106,13 @@ When gold standards are provided and evaluation is enabled (``include: true``),
     analysis:
         evaluation:
             include: true
+        # One could also enable
+        # evaluation for PCA and HAC, and ensembling.
+        # For example,
+        jaccard:
+            include: true
+            evaluation:
+                include: true
 
 A gold standard dataset must include the following types of keys and files:
 
diff --git a/docs/tutorial/beginner.rst b/docs/tutorial/beginner.rst
index 9c8f7f236..43e265a49 100644
--- a/docs/tutorial/beginner.rst
+++ b/docs/tutorial/beginner.rst
@@ -199,7 +199,7 @@ Analysis
         include: true
     cytoscape:
         include: true
-    ml:
+    pca:
         include: true
    
 
diff --git a/docs/tutorial/intermediate.rst b/docs/tutorial/intermediate.rst
index 2e569e092..e39ab70cd 100644
--- a/docs/tutorial/intermediate.rst
+++ b/docs/tutorial/intermediate.rst
@@ -689,25 +689,31 @@ And the file ``egfr-omicsintegrator1-params-GUMLBDZ/pathway.txt`` contains the f
     MRE11_HUMAN	RAD50_HUMAN	1	U
 
 
-Step 3: Use ML post-analysis
+Step 3: Use ML-related post-analysis
 =============================
 
-3.1 Adding ML post-analysis to the intermediate configuration
+3.1 Adding ML-related post-analysis to the intermediate configuration
 -------------------------------------------------------------
 
-To enable the ML analysis, update the analysis section in your configuration file by setting ml to true. 
+To enable ML-related analysis, update the analysis section in your configuration file by setting your desired ML analyses to true. 
 Your analysis section in the configuration file should look like this:
 
 .. code-block:: yaml
 
     analysis:
-        ml:
+        pca:
+            include: true
+        hac:
+            include: true
+        ensembling:
+            include: true
+        jaccard:
             include: true
             ... (other parameters preset)
 
-``ml`` will perform unsupervised analyses such as principal component analysis (PCA), hierarchical agglomerative clustering (HAC), ensembling, and jaccard similarity comparisons of the pathways.
+These settings will perform principal component analysis (PCA), hierarchical agglomerative clustering (HAC), ensembling, and jaccard similarity comparisons of the pathways, respectively.
 
-- The  ``ml`` section includes configurable parameters that let you adjust the behavior of the analyses performed.
+- These sections includes configurable parameters that let you adjust the behavior of the analyses performed.
 
 With these updates, SPRAS will run the full set of unsupervised machine learning analyses across all outputs for a given dataset.
 
diff --git a/test/analysis/input/config.yaml b/test/analysis/input/config.yaml
index 15a5572fa..871bd6c84 100644
--- a/test/analysis/input/config.yaml
+++ b/test/analysis/input/config.yaml
@@ -115,21 +115,50 @@ analysis:
   # Create Cytoscape session file with all pathway graphs for each dataset
   cytoscape:
     include: true
-  # Machine learning analysis (e.g. clustering) of the pathway output files for each dataset
-  ml:
-    # ml analysis per dataset
+  # The following analysis options also have an `aggregate_per_algorithm` option,
+  # which adds the respective analysis to an algorithm as a whole.
+  # This will only run if the adjacent `include` is true.
+
+  # Principle component analysis of the pathway output files
+  pca:
     include: false
-    # adds ml analysis per algorithm output
-    # only runs for algorithms with multiple parameter combinations chosen
-    aggregate_per_algorithm: true
+    aggregate_per_algorithm: false
+    evaluation:
+      include: false
+      aggregate_per_algorithm: false
     # specify how many principal components to calculate
     components: 2
     # boolean to show the labels on the pca graph
     labels: true
+    # controls whether kernel density estimation (KDE) is computed and visualized on top of PCA plots.
+    # the coordinates of the KDE maximum (kde_peak) are also saved to the PCA coordinates output file.
+    # KDE needs to be run in order to select a parameter combination with PCA because the maximum kernel density is used
+    # to pick the 'best' parameter combination.
+    kde: true
+    # removes empty pathways from consideration in ml analysis
+    remove_empty_pathways: false
+  # Hierarchical agglomerative clustering analysis of the pathway output files
+  hac:
+    include: false
+    aggregate_per_algorithm: false
+    evaluation:
+      include: false
+      aggregate_per_algorithm: false
     # 'ward', 'complete', 'average', 'single'
     # if linkage: ward, must use metric: euclidean
     linkage: 'ward'
     # 'euclidean', 'manhattan', 'cosine'
     metric: 'euclidean'
+  # Ensembling pathway output
+  ensemble:
+    include: false
+    aggregate_per_algorithm: true
+    evaluation:
+      include: false
+      aggregate_per_algorithm: false
   evaluation:
+    # evaluation per dataset-goldstandard pair.
+    # This evaluation specifically generates precision-recall curves:
+    # to run evaluation on top of the other options, see the respective `evaluation` blocks under the other analyses.
     include: false
+    aggregate_per_algorithm: false
diff --git a/test/analysis/input/egfr.yaml b/test/analysis/input/egfr.yaml
index d26bded2d..cf4295e46 100644
--- a/test/analysis/input/egfr.yaml
+++ b/test/analysis/input/egfr.yaml
@@ -97,11 +97,34 @@ reconstruction_settings:
   locations:
     reconstruction_dir: output/egfr
 analysis:
+  summary:
+    include: true
   cytoscape:
     include: true
-  summary:
+  pca:
+    include: true
+    aggregate_per_algorithm: true
+    evaluation:
+      include: true
+      aggregate_per_algorithm: true
+    components: 2
+    labels: true
+    kde: true
+    remove_empty_pathways: true
+  hac:
     include: true
-  ml:
-    include: false
+    aggregate_per_algorithm: true
+    evaluation:
+      include: true
+      aggregate_per_algorithm: true
+    linkage: 'ward'
+    metric: 'euclidean'
+  ensemble:
+    include: true
+    aggregate_per_algorithm: true
+    evaluation:
+      include: true
+      aggregate_per_algorithm: true
   evaluation:
-    include: false
+    include: true
+    aggregate_per_algorithm: true

From 49d1f48382e34e74b906e176a4499d05f786ca02 Mon Sep 17 00:00:00 2001
From: "Tristan F." <pub.tristanf@gmail.com>
Date: Wed, 26 Nov 2025 03:25:32 +0000
Subject: [PATCH 03/14] test(config): fix

---
 config/config.yaml     |   3 +
 config/egfr.yaml       |   2 +
 spras/config/config.py |   2 +-
 spras/config/schema.py |   3 +
 test/test_config.py    | 130 ++++++++++++-----------------------------
 5 files changed, 47 insertions(+), 93 deletions(-)

diff --git a/config/config.yaml b/config/config.yaml
index b5927cdc2..47aa35c00 100644
--- a/config/config.yaml
+++ b/config/config.yaml
@@ -246,6 +246,9 @@ analysis:
     evaluation:
       include: true
       aggregate_per_algorithm: true
+  # Jaccard pathway output
+  jaccard:
+    enable: true
   evaluation:
     # evaluation per dataset-goldstandard pair.
     # This evaluation specifically generates precision-recall curves:
diff --git a/config/egfr.yaml b/config/egfr.yaml
index 60a145249..b305db9b0 100644
--- a/config/egfr.yaml
+++ b/config/egfr.yaml
@@ -178,6 +178,8 @@ analysis:
     evaluation:
       include: true
       aggregate_per_algorithm: true
+  jaccard:
+    enable: false
   evaluation:
     include: false
     aggregate_per_algorithm: false
diff --git a/spras/config/config.py b/spras/config/config.py
index 9572caa63..3cd94dafc 100644
--- a/spras/config/config.py
+++ b/spras/config/config.py
@@ -129,7 +129,7 @@ def process_algorithms(self, raw_config: RawConfig):
         Keys in the parameter dictionary are strings
         """
         prior_params_hashes = set()
-        self.algorithm_params = dict()
+        self.algorithm_params: dict[str, Any] = dict()
         self.algorithm_directed = dict()
         self.algorithms = raw_config.algorithms
         for alg in self.algorithms:
diff --git a/spras/config/schema.py b/spras/config/schema.py
index f3459a277..3e97c1721 100644
--- a/spras/config/schema.py
+++ b/spras/config/schema.py
@@ -71,6 +71,9 @@ def check_include_when_evaluation_include(self):
         if self.evaluation.include and not self.include:
             warnings.warn("evaluation.include is set to True but include is set to False; setting evaluation.include to False", stacklevel=2)
             self.evaluation.include = False
+        if self.evaluation.aggregate_per_algorithm and not self.aggregate_per_algorithm:
+            warnings.warn("evaluation.aggregate_per_algorithm is set to True but aggregate_per_algorithm is set to False; setting evaluation.aggregate_per_algorithm to False", stacklevel=2)
+            self.evaluation.aggregate_per_algorithm = False
         return self
 
 class PcaAnalysis(AggregateEvaluationAnalysis):
diff --git a/test/test_config.py b/test/test_config.py
index c8b05f3c5..70d7175fd 100644
--- a/test/test_config.py
+++ b/test/test_config.py
@@ -105,13 +105,32 @@ def get_test_config():
             "summary": {
                 "include": False
             },
-            "ml": {
+            "pca": {
                 "include": False,
                 "aggregate_per_algorithm": False,
+                "evaluation": {
+                    "include": False
+                }
+            },
+            "hac": {
+                "include": False,
+                "aggregate_per_algorithm": False,
+                "evaluation": {
+                    "include": False
+                }
+            },
+            "ensemble": {
+                "include": False,
+                "evaluation": {
+                    "include": False
+                }
             },
             "cytoscape": {
                 "include": False
             },
+            "jaccard": {
+                "include": False
+            },
             "evaluation": {
                 "include": False,
                 "aggregate_per_algorithm": False
@@ -254,54 +273,21 @@ def test_config_values(self):
         value_test_util('boolArrTest', [{'flags': True, 'range': 1}, {'flags': False, 'range': 2},
                                      {'flags': False, 'range': 1}, {'flags': True, 'range': 2}])
 
-    @pytest.mark.parametrize("ml_include, eval_include, expected_ml, expected_eval", [
+    @pytest.mark.parametrize("include, eval_include, expected_include, expected_eval", [
         (True, True, True, True),
         (True, False, True, False),
         (False, True, False, False),
         (False, False, False, False)
     ])
-    def test_eval_ml_coupling(self, ml_include, eval_include, expected_ml, expected_eval):
-        test_config = get_test_config()
-        test_config["analysis"]["ml"]["include"] = ml_include
-        test_config["analysis"]["evaluation"]["include"] = eval_include
-        config.init_global(test_config)
-
-        assert config.config.analysis_include_ml == expected_ml
-        assert config.config.analysis_include_evaluation == expected_eval
-
-    @pytest.mark.parametrize("ml_include, ml_agg_include, expected_ml, expected_ml_agg", [
-        (True, True, True, True),
-        (True, False, True, False),
-        (False, True, False, False),
-        (False, False, False, False)
-    ])
-    def test_ml_agg_algo_coupling(self, ml_include, ml_agg_include, expected_ml, expected_ml_agg):
-        test_config = get_test_config()
-        test_config["analysis"]["ml"]["include"] = ml_include
-        test_config["analysis"]["ml"]["aggregate_per_algorithm"] = ml_agg_include
-        config.init_global(test_config)
-
-        assert config.config.analysis_include_ml == expected_ml
-        assert config.config.analysis_include_ml_aggregate_algo == expected_ml_agg
-
-    @pytest.mark.parametrize("eval_include, agg_algo, expected_eval, expected_agg_algo", [
-        (True, True, True, True),
-        (True, False, True, False),
-        (False, True, False, False),
-        (False, False, False, False),
-    ])
-    def test_eval_agg_algo_coupling(self, eval_include, agg_algo, expected_eval, expected_agg_algo):
+    @pytest.mark.parametrize("analysis_type", ["pca", "hac", "ensemble"])
+    def test_eval_pca_coupling(self, include, eval_include, expected_include, expected_eval, analysis_type):
         test_config = get_test_config()
-        test_config["analysis"]["ml"]["include"] = True
-        test_config["analysis"]["ml"]["aggregate_per_algorithm"] = True
-
-        test_config["analysis"]["evaluation"]["include"] = eval_include
-        test_config["analysis"]["evaluation"]["aggregate_per_algorithm"] = agg_algo
-
+        test_config["analysis"][analysis_type]["include"] = include
+        test_config["analysis"][analysis_type]["evaluation"]["include"] = eval_include
         config.init_global(test_config)
 
-        assert config.config.analysis_include_evaluation == expected_eval
-        assert config.config.analysis_include_evaluation_aggregate_algo == expected_agg_algo
+        assert vars(config.config.analysis)[analysis_type].include == expected_include
+        assert vars(config.config.analysis)[analysis_type].evaluation.include == expected_eval
 
     @pytest.mark.parametrize("ml_include, ml_agg, eval_include, eval_agg, expected_ml, expected_ml_agg, expected_eval, expected_eval_agg", [
         (False, True,  True,  True,  False, False, False, False),
@@ -310,61 +296,21 @@ def test_eval_agg_algo_coupling(self, eval_include, agg_algo, expected_eval, exp
         (True,  True,  True,  True,  True,  True,  True,  True),
         (True,  False, False, False, True,  False, False, False),
     ])
+    @pytest.mark.parametrize("analysis_type", ["pca", "hac", "ensemble"])
     def test_eval_ml_agg_algo_coupling(self, ml_include, ml_agg, eval_include, eval_agg, expected_ml, expected_ml_agg,
-                                       expected_eval, expected_eval_agg):
-        # the value of ml include and ml aggregate_per_algorithm can affect the value of evaluation include and
+                                       expected_eval, expected_eval_agg, analysis_type):
+        # the value of pca include and pca aggregate_per_algorithm can affect the value of evaluation include and
         # evaluation aggregate_per_algorithm
         test_config = get_test_config()
 
-        test_config["analysis"]["ml"]["include"] = ml_include
-        test_config["analysis"]["ml"]["aggregate_per_algorithm"] = ml_agg
-        test_config["analysis"]["evaluation"]["include"] = eval_include
-        test_config["analysis"]["evaluation"]["aggregate_per_algorithm"] = eval_agg
+        test_config["analysis"][analysis_type]["include"] = ml_include
+        test_config["analysis"][analysis_type]["aggregate_per_algorithm"] = ml_agg
+        test_config["analysis"][analysis_type]["evaluation"]["include"] = eval_include
+        test_config["analysis"][analysis_type]["evaluation"]["aggregate_per_algorithm"] = eval_agg
 
         config.init_global(test_config)
 
-        assert config.config.analysis_include_ml == expected_ml
-        assert config.config.analysis_include_ml_aggregate_algo == expected_ml_agg
-        assert config.config.analysis_include_evaluation == expected_eval
-        assert config.config.analysis_include_evaluation_aggregate_algo == expected_eval_agg
-
-    @pytest.mark.parametrize("eval_include, kde, expected_eval, expected_kde", [
-        (True, True, True, True),
-        (True, False, True, True),
-        (False, True, False, True),
-        (False, False, False, False),
-    ])
-    def test_eval_kde_coupling(self, eval_include, kde, expected_eval, expected_kde):
-        test_config = get_test_config()
-        test_config["analysis"]["ml"]["include"] = True
-        # dealing with other coupling issue
-        test_config["analysis"]["summary"]["include"] = True
-
-        test_config["analysis"]["ml"]["kde"] = kde
-        test_config["analysis"]["evaluation"]["include"] = eval_include
-
-        config.init_global(test_config)
-
-        assert config.config.analysis_include_evaluation == expected_eval
-        assert config.config.pca_params["kde"] == expected_kde
-
-    @pytest.mark.parametrize("eval_include, summary_include, expected_eval, expected_summary", [
-        (True, True, True, True),
-        (True, False, True, True),
-        (False, True, False, True),
-        (False, False, False, False),
-    ])
-    def test_eval_summary_coupling(self, eval_include, summary_include, expected_eval, expected_summary):
-        test_config = get_test_config()
-        # dealing with other coupling issue
-        test_config["analysis"]["ml"]["include"] = True
-        test_config["analysis"]["ml"]["kde"] = True
-
-        test_config["analysis"]["summary"]["include"] = summary_include
-        test_config["analysis"]["evaluation"]["include"] = eval_include
-
-        config.init_global(test_config)
-
-        assert config.config.analysis_include_evaluation == expected_eval
-        assert config.config.analysis_include_summary == expected_summary
-
+        assert vars(config.config.analysis)[analysis_type].include == expected_ml, f"Include was not {expected_ml}!"
+        assert vars(config.config.analysis)[analysis_type].aggregate_per_algorithm == expected_ml_agg, f"Aggregate per algorithm was not {expected_ml_agg}!"
+        assert vars(config.config.analysis)[analysis_type].evaluation.include == expected_eval, f"evaluation include was not {expected_eval}!"
+        assert vars(config.config.analysis)[analysis_type].evaluation.aggregate_per_algorithm == expected_eval_agg, f"evaluation aggregate per algorithm was not {expected_eval_agg}!"

From a5736bab7368239f959179f4fa559ee2e8850acd Mon Sep 17 00:00:00 2001
From: "Tristan F." <pub.tristanf@gmail.com>
Date: Wed, 26 Nov 2025 03:29:33 +0000
Subject: [PATCH 04/14] fix(config): enable -> include

---
 config/config.yaml | 2 +-
 config/egfr.yaml   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/config/config.yaml b/config/config.yaml
index 47aa35c00..2671b0641 100644
--- a/config/config.yaml
+++ b/config/config.yaml
@@ -248,7 +248,7 @@ analysis:
       aggregate_per_algorithm: true
   # Jaccard pathway output
   jaccard:
-    enable: true
+    include: true
   evaluation:
     # evaluation per dataset-goldstandard pair.
     # This evaluation specifically generates precision-recall curves:
diff --git a/config/egfr.yaml b/config/egfr.yaml
index b305db9b0..cb318b5da 100644
--- a/config/egfr.yaml
+++ b/config/egfr.yaml
@@ -179,7 +179,7 @@ analysis:
       include: true
       aggregate_per_algorithm: true
   jaccard:
-    enable: false
+    include: false
   evaluation:
     include: false
     aggregate_per_algorithm: false

From 28d11b14089f15206ca39e18e4cbf27d5ea51a75 Mon Sep 17 00:00:00 2001
From: "Tristan F." <pub.tristanf@gmail.com>
Date: Wed, 26 Nov 2025 06:43:56 +0000
Subject: [PATCH 05/14] chore(test/analysis/input/egfr): disable other analyses

---
 test/analysis/input/egfr.yaml | 34 +++++++++++++++++-----------------
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/test/analysis/input/egfr.yaml b/test/analysis/input/egfr.yaml
index cf4295e46..d8a0c5c09 100644
--- a/test/analysis/input/egfr.yaml
+++ b/test/analysis/input/egfr.yaml
@@ -102,29 +102,29 @@ analysis:
   cytoscape:
     include: true
   pca:
-    include: true
-    aggregate_per_algorithm: true
+    include: false
+    aggregate_per_algorithm: false
     evaluation:
-      include: true
-      aggregate_per_algorithm: true
+      include: false
+      aggregate_per_algorithm: false
     components: 2
-    labels: true
-    kde: true
-    remove_empty_pathways: true
+    labels: false
+    kde: false
+    remove_empty_pathways: false
   hac:
-    include: true
-    aggregate_per_algorithm: true
+    include: false
+    aggregate_per_algorithm: false
     evaluation:
-      include: true
-      aggregate_per_algorithm: true
+      include: false
+      aggregate_per_algorithm: false
     linkage: 'ward'
     metric: 'euclidean'
   ensemble:
-    include: true
-    aggregate_per_algorithm: true
+    include: false
+    aggregate_per_algorithm: false
     evaluation:
-      include: true
-      aggregate_per_algorithm: true
+      include: false
+      aggregate_per_algorithm: false
   evaluation:
-    include: true
-    aggregate_per_algorithm: true
+    include: false
+    aggregate_per_algorithm: false

From 22ee171800e7513096c0b2bfe4f2a4ac1d3cdc82 Mon Sep 17 00:00:00 2001
From: "Tristan F.-R." <pub.tristanf@gmail.com>
Date: Mon, 1 Dec 2025 18:00:27 +0000
Subject: [PATCH 06/14] chore: drop hac evaluation section

---
 config/config.yaml                        | 3 ---
 config/egfr.yaml                          | 3 ---
 docker-wrappers/SPRAS/example_config.yaml | 3 ---
 spras/config/schema.py                    | 2 +-
 test/analysis/input/config.yaml           | 3 ---
 test/analysis/input/egfr.yaml             | 3 ---
 test/test_config.py                       | 9 +++------
 7 files changed, 4 insertions(+), 22 deletions(-)

diff --git a/config/config.yaml b/config/config.yaml
index 2671b0641..5d525e74f 100644
--- a/config/config.yaml
+++ b/config/config.yaml
@@ -231,9 +231,6 @@ analysis:
   hac:
     include: true
     aggregate_per_algorithm: true
-    evaluation:
-      include: true
-      aggregate_per_algorithm: true
     # 'ward', 'complete', 'average', 'single'
     # if linkage: ward, must use metric: euclidean
     linkage: 'ward'
diff --git a/config/egfr.yaml b/config/egfr.yaml
index cb318b5da..4c9186cd4 100644
--- a/config/egfr.yaml
+++ b/config/egfr.yaml
@@ -167,9 +167,6 @@ analysis:
   hac:
     include: true
     aggregate_per_algorithm: true
-    evaluation:
-      include: true
-      aggregate_per_algorithm: true
     linkage: 'ward'
     metric: 'euclidean'
   ensemble:
diff --git a/docker-wrappers/SPRAS/example_config.yaml b/docker-wrappers/SPRAS/example_config.yaml
index d4a0a6a6c..4c354e657 100644
--- a/docker-wrappers/SPRAS/example_config.yaml
+++ b/docker-wrappers/SPRAS/example_config.yaml
@@ -165,9 +165,6 @@ analysis:
   hac:
     include: true
     aggregate_per_algorithm: true
-    evaluation:
-      include: true
-      aggregate_per_algorithm: true
     # 'ward', 'complete', 'average', 'single'
     # if linkage: ward, must use metric: euclidean
     linkage: 'ward'
diff --git a/spras/config/schema.py b/spras/config/schema.py
index 3e97c1721..43da8a100 100644
--- a/spras/config/schema.py
+++ b/spras/config/schema.py
@@ -82,7 +82,7 @@ class PcaAnalysis(AggregateEvaluationAnalysis):
     kde: bool = False
     remove_empty_pathways: bool = False
 
-class HacAnalysis(AggregateEvaluationAnalysis):
+class HacAnalysis(AggregateAnalysis):
     linkage: HacLinkage = HacLinkage.ward
     metric: HacMetric = HacMetric.euclidean
 
diff --git a/test/analysis/input/config.yaml b/test/analysis/input/config.yaml
index 871bd6c84..35aa9766b 100644
--- a/test/analysis/input/config.yaml
+++ b/test/analysis/input/config.yaml
@@ -141,9 +141,6 @@ analysis:
   hac:
     include: false
     aggregate_per_algorithm: false
-    evaluation:
-      include: false
-      aggregate_per_algorithm: false
     # 'ward', 'complete', 'average', 'single'
     # if linkage: ward, must use metric: euclidean
     linkage: 'ward'
diff --git a/test/analysis/input/egfr.yaml b/test/analysis/input/egfr.yaml
index d8a0c5c09..dcfd7ae84 100644
--- a/test/analysis/input/egfr.yaml
+++ b/test/analysis/input/egfr.yaml
@@ -114,9 +114,6 @@ analysis:
   hac:
     include: false
     aggregate_per_algorithm: false
-    evaluation:
-      include: false
-      aggregate_per_algorithm: false
     linkage: 'ward'
     metric: 'euclidean'
   ensemble:
diff --git a/test/test_config.py b/test/test_config.py
index 70d7175fd..573f702e1 100644
--- a/test/test_config.py
+++ b/test/test_config.py
@@ -114,10 +114,7 @@ def get_test_config():
             },
             "hac": {
                 "include": False,
-                "aggregate_per_algorithm": False,
-                "evaluation": {
-                    "include": False
-                }
+                "aggregate_per_algorithm": False
             },
             "ensemble": {
                 "include": False,
@@ -279,7 +276,7 @@ def test_config_values(self):
         (False, True, False, False),
         (False, False, False, False)
     ])
-    @pytest.mark.parametrize("analysis_type", ["pca", "hac", "ensemble"])
+    @pytest.mark.parametrize("analysis_type", ["pca", "ensemble"])
     def test_eval_pca_coupling(self, include, eval_include, expected_include, expected_eval, analysis_type):
         test_config = get_test_config()
         test_config["analysis"][analysis_type]["include"] = include
@@ -296,7 +293,7 @@ def test_eval_pca_coupling(self, include, eval_include, expected_include, expect
         (True,  True,  True,  True,  True,  True,  True,  True),
         (True,  False, False, False, True,  False, False, False),
     ])
-    @pytest.mark.parametrize("analysis_type", ["pca", "hac", "ensemble"])
+    @pytest.mark.parametrize("analysis_type", ["pca", "ensemble"])
     def test_eval_ml_agg_algo_coupling(self, ml_include, ml_agg, eval_include, eval_agg, expected_ml, expected_ml_agg,
                                        expected_eval, expected_eval_agg, analysis_type):
         # the value of pca include and pca aggregate_per_algorithm can affect the value of evaluation include and

From 32614806d0aead25c5a2b9fda0f23203e20073ac Mon Sep 17 00:00:00 2001
From: "Tristan F.-R." <pub.tristanf@gmail.com>
Date: Mon, 1 Dec 2025 19:17:01 +0000
Subject: [PATCH 07/14] fix(snakemake): remove hac evaluation exclusion

---
 Snakefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Snakefile b/Snakefile
index 31f0d279e..99937d5af 100644
--- a/Snakefile
+++ b/Snakefile
@@ -29,7 +29,7 @@ algorithm_params = _config.config.algorithm_params
 algorithm_directed = _config.config.algorithm_directed
 container_settings = _config.config.container_settings
 pca_params = without_keys(vars(_config.config.analysis.pca), ["evaluation", "include", "aggregate_per_algorithm"])
-hac_params = without_keys(vars(_config.config.analysis.hac), ["evaluation", "include", "aggregate_per_algorithm"])
+hac_params = without_keys(vars(_config.config.analysis.hac), ["include", "aggregate_per_algorithm"])
 
 # Return the dataset or gold_standard dictionary from the config file given the label
 def get_dataset(_datasets, label):

From eb68738b8b5b57c67777796d9c983cfcbf4f4445 Mon Sep 17 00:00:00 2001
From: "Tristan F." <pub.tristanf@gmail.com>
Date: Fri, 5 Dec 2025 06:56:27 +0000
Subject: [PATCH 08/14] fix: rename evaluation -> pca_chosen under pca

---
 config/config.yaml                        |  2 +-
 config/egfr.yaml                          |  2 +-
 docker-wrappers/SPRAS/example_config.yaml |  2 +-
 spras/config/schema.py                    | 40 +++++++++++++----------
 test/analysis/input/config.yaml           |  2 +-
 test/analysis/input/egfr.yaml             |  2 +-
 test/test_config.py                       | 28 +++++++++-------
 7 files changed, 45 insertions(+), 33 deletions(-)

diff --git a/config/config.yaml b/config/config.yaml
index 5d525e74f..4261776f4 100644
--- a/config/config.yaml
+++ b/config/config.yaml
@@ -240,7 +240,7 @@ analysis:
   ensemble:
     include: true
     aggregate_per_algorithm: true
-    evaluation:
+    pca_chosen:
       include: true
       aggregate_per_algorithm: true
   # Jaccard pathway output
diff --git a/config/egfr.yaml b/config/egfr.yaml
index 4c9186cd4..3db52d5bb 100644
--- a/config/egfr.yaml
+++ b/config/egfr.yaml
@@ -172,7 +172,7 @@ analysis:
   ensemble:
     include: true
     aggregate_per_algorithm: true
-    evaluation:
+    pca_chosen:
       include: true
       aggregate_per_algorithm: true
   jaccard:
diff --git a/docker-wrappers/SPRAS/example_config.yaml b/docker-wrappers/SPRAS/example_config.yaml
index 4c354e657..5b7396a1f 100644
--- a/docker-wrappers/SPRAS/example_config.yaml
+++ b/docker-wrappers/SPRAS/example_config.yaml
@@ -174,7 +174,7 @@ analysis:
   ensemble:
     include: true
     aggregate_per_algorithm: true
-    evaluation:
+    pca_chosen:
       include: true
       aggregate_per_algorithm: true
   evaluation:
diff --git a/spras/config/schema.py b/spras/config/schema.py
index 43da8a100..5b7d02e4e 100644
--- a/spras/config/schema.py
+++ b/spras/config/schema.py
@@ -49,6 +49,12 @@ class HacMetric(CaseInsensitiveEnum):
     manhattan = 'manhattan'
     cosine = 'cosine'
 
+def implies(source: bool, target: bool, source_str: str, target_str: str):
+    if target and not source:
+        warnings.warn(f"{source_str} is set to True but {target_str} is set to False; setting {target_str} to False", stacklevel=2)
+        return False
+    return target
+
 class AggregateAnalysis(BaseModel):
     include: bool
     aggregate_per_algorithm: bool = False
@@ -57,36 +63,36 @@ class AggregateAnalysis(BaseModel):
 
     @model_validator(mode='after')
     def check_aggregate_when_include(self):
-        if self.aggregate_per_algorithm and not self.include:
-            warnings.warn("aggregate_per_algorithm is set to True but include is set to False; setting aggregate_per_algorithm to False", stacklevel=2)
-            self.aggregate_per_algorithm = False
+        self.aggregate_per_algorithm = implies(self.include, self.aggregate_per_algorithm, "include", "aggregate_per_algorithm")
         return self
 
 class EvaluationAnalysis(AggregateAnalysis): pass
-class AggregateEvaluationAnalysis(AggregateAnalysis):
-    evaluation: EvaluationAnalysis = EvaluationAnalysis(include=False)
-
-    @model_validator(mode='after')
-    def check_include_when_evaluation_include(self):
-        if self.evaluation.include and not self.include:
-            warnings.warn("evaluation.include is set to True but include is set to False; setting evaluation.include to False", stacklevel=2)
-            self.evaluation.include = False
-        if self.evaluation.aggregate_per_algorithm and not self.aggregate_per_algorithm:
-            warnings.warn("evaluation.aggregate_per_algorithm is set to True but aggregate_per_algorithm is set to False; setting evaluation.aggregate_per_algorithm to False", stacklevel=2)
-            self.evaluation.aggregate_per_algorithm = False
-        return self
 
-class PcaAnalysis(AggregateEvaluationAnalysis):
+class PcaAnalysis(AggregateAnalysis):
     components: int = 2
     labels: bool = True
     kde: bool = False
     remove_empty_pathways: bool = False
+    pca_chosen: EvaluationAnalysis = EvaluationAnalysis(include=False)
+
+    @model_validator(mode='after')
+    def check_include_when_evaluation_include(self):
+        self.pca_chosen.include = implies(self.include, self.pca_chosen.include, "include", "pca_chosen.include")
+        self.pca_chosen.aggregate_per_algorithm = implies(self.aggregate_per_algorithm, self.pca_chosen.aggregate_per_algorithm, "aggregate_per_algorithm", "pca_chosen.aggregate_per_algorithm")
+        return self
 
 class HacAnalysis(AggregateAnalysis):
     linkage: HacLinkage = HacLinkage.ward
     metric: HacMetric = HacMetric.euclidean
 
-class EnsembleAnalysis(AggregateEvaluationAnalysis): pass
+class EnsembleAnalysis(AggregateAnalysis):
+    evaluation: EvaluationAnalysis = EvaluationAnalysis(include=False)
+
+    @model_validator(mode='after')
+    def check_include_when_evaluation_include(self):
+        self.evaluation.include = implies(self.include, self.evaluation.include, "include", "evaluation.include")
+        self.evaluation.aggregate_per_algorithm = implies(self.aggregate_per_algorithm, self.evaluation.aggregate_per_algorithm, "aggregate_per_algorithm", "evaluation.aggregate_per_algorithm")
+        return self
 class JaccardAnalysis(AggregateAnalysis): pass
 
 class Analysis(BaseModel):
diff --git a/test/analysis/input/config.yaml b/test/analysis/input/config.yaml
index 35aa9766b..403c9151a 100644
--- a/test/analysis/input/config.yaml
+++ b/test/analysis/input/config.yaml
@@ -150,7 +150,7 @@ analysis:
   ensemble:
     include: false
     aggregate_per_algorithm: true
-    evaluation:
+    pca_chosen:
       include: false
       aggregate_per_algorithm: false
   evaluation:
diff --git a/test/analysis/input/egfr.yaml b/test/analysis/input/egfr.yaml
index dcfd7ae84..f06b08a93 100644
--- a/test/analysis/input/egfr.yaml
+++ b/test/analysis/input/egfr.yaml
@@ -119,7 +119,7 @@ analysis:
   ensemble:
     include: false
     aggregate_per_algorithm: false
-    evaluation:
+    pca_chosen:
       include: false
       aggregate_per_algorithm: false
   evaluation:
diff --git a/test/test_config.py b/test/test_config.py
index 573f702e1..7e82c866a 100644
--- a/test/test_config.py
+++ b/test/test_config.py
@@ -108,7 +108,7 @@ def get_test_config():
             "pca": {
                 "include": False,
                 "aggregate_per_algorithm": False,
-                "evaluation": {
+                "pca_chosen": {
                     "include": False
                 }
             },
@@ -276,15 +276,18 @@ def test_config_values(self):
         (False, True, False, False),
         (False, False, False, False)
     ])
-    @pytest.mark.parametrize("analysis_type", ["pca", "ensemble"])
-    def test_eval_pca_coupling(self, include, eval_include, expected_include, expected_eval, analysis_type):
+    @pytest.mark.parametrize("analysis_type, evaluation_type", [
+        ("pca", "pca_chosen"),
+        ("ensemble", "evaluation")
+    ])
+    def test_eval_pca_coupling(self, include, eval_include, expected_include, expected_eval, analysis_type, evaluation_type):
         test_config = get_test_config()
         test_config["analysis"][analysis_type]["include"] = include
-        test_config["analysis"][analysis_type]["evaluation"]["include"] = eval_include
+        test_config["analysis"][analysis_type][evaluation_type]["include"] = eval_include
         config.init_global(test_config)
 
         assert vars(config.config.analysis)[analysis_type].include == expected_include
-        assert vars(config.config.analysis)[analysis_type].evaluation.include == expected_eval
+        assert vars(vars(config.config.analysis)[analysis_type])[evaluation_type].include == expected_eval
 
     @pytest.mark.parametrize("ml_include, ml_agg, eval_include, eval_agg, expected_ml, expected_ml_agg, expected_eval, expected_eval_agg", [
         (False, True,  True,  True,  False, False, False, False),
@@ -293,21 +296,24 @@ def test_eval_pca_coupling(self, include, eval_include, expected_include, expect
         (True,  True,  True,  True,  True,  True,  True,  True),
         (True,  False, False, False, True,  False, False, False),
     ])
-    @pytest.mark.parametrize("analysis_type", ["pca", "ensemble"])
+    @pytest.mark.parametrize("analysis_type, evaluation_type", [
+        ("pca", "pca_chosen"),
+        ("ensemble", "evaluation")
+    ])
     def test_eval_ml_agg_algo_coupling(self, ml_include, ml_agg, eval_include, eval_agg, expected_ml, expected_ml_agg,
-                                       expected_eval, expected_eval_agg, analysis_type):
+                                       expected_eval, expected_eval_agg, analysis_type, evaluation_type):
         # the value of pca include and pca aggregate_per_algorithm can affect the value of evaluation include and
         # evaluation aggregate_per_algorithm
         test_config = get_test_config()
 
         test_config["analysis"][analysis_type]["include"] = ml_include
         test_config["analysis"][analysis_type]["aggregate_per_algorithm"] = ml_agg
-        test_config["analysis"][analysis_type]["evaluation"]["include"] = eval_include
-        test_config["analysis"][analysis_type]["evaluation"]["aggregate_per_algorithm"] = eval_agg
+        test_config["analysis"][analysis_type][evaluation_type]["include"] = eval_include
+        test_config["analysis"][analysis_type][evaluation_type]["aggregate_per_algorithm"] = eval_agg
 
         config.init_global(test_config)
 
         assert vars(config.config.analysis)[analysis_type].include == expected_ml, f"Include was not {expected_ml}!"
         assert vars(config.config.analysis)[analysis_type].aggregate_per_algorithm == expected_ml_agg, f"Aggregate per algorithm was not {expected_ml_agg}!"
-        assert vars(config.config.analysis)[analysis_type].evaluation.include == expected_eval, f"evaluation include was not {expected_eval}!"
-        assert vars(config.config.analysis)[analysis_type].evaluation.aggregate_per_algorithm == expected_eval_agg, f"evaluation aggregate per algorithm was not {expected_eval_agg}!"
+        assert vars(vars(config.config.analysis)[analysis_type])[evaluation_type].include == expected_eval, f"evaluation include was not {expected_eval}!"
+        assert vars(vars(config.config.analysis)[analysis_type])[evaluation_type].aggregate_per_algorithm == expected_eval_agg, f"evaluation aggregate per algorithm was not {expected_eval_agg}!"

From 2b87accfd9555c9dc7dfb879424b951af8a56fb4 Mon Sep 17 00:00:00 2001
From: "Tristan F." <pub.tristanf@gmail.com>
Date: Fri, 5 Dec 2025 07:05:49 +0000
Subject: [PATCH 09/14] chore: correct configs

---
 config/config.yaml                        | 4 ++--
 config/egfr.yaml                          | 4 ++--
 docker-wrappers/SPRAS/example_config.yaml | 4 ++--
 test/analysis/input/config.yaml           | 4 ++--
 test/analysis/input/egfr.yaml             | 4 ++--
 5 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/config/config.yaml b/config/config.yaml
index 4261776f4..ebadac917 100644
--- a/config/config.yaml
+++ b/config/config.yaml
@@ -213,7 +213,7 @@ analysis:
   pca:
     include: true
     aggregate_per_algorithm: true
-    evaluation:
+    pca_chosen:
       include: true
       aggregate_per_algorithm: true
     # specify how many principal components to calculate
@@ -240,7 +240,7 @@ analysis:
   ensemble:
     include: true
     aggregate_per_algorithm: true
-    pca_chosen:
+    evaluation:
       include: true
       aggregate_per_algorithm: true
   # Jaccard pathway output
diff --git a/config/egfr.yaml b/config/egfr.yaml
index 3db52d5bb..ad30a1bf2 100644
--- a/config/egfr.yaml
+++ b/config/egfr.yaml
@@ -157,7 +157,7 @@ analysis:
   pca:
     include: true
     aggregate_per_algorithm: true
-    evaluation:
+    pca_chosen:
       include: true
       aggregate_per_algorithm: true
     components: 2
@@ -172,7 +172,7 @@ analysis:
   ensemble:
     include: true
     aggregate_per_algorithm: true
-    pca_chosen:
+    evaluation:
       include: true
       aggregate_per_algorithm: true
   jaccard:
diff --git a/docker-wrappers/SPRAS/example_config.yaml b/docker-wrappers/SPRAS/example_config.yaml
index 5b7396a1f..65a5299e5 100644
--- a/docker-wrappers/SPRAS/example_config.yaml
+++ b/docker-wrappers/SPRAS/example_config.yaml
@@ -147,7 +147,7 @@ analysis:
   pca:
     include: true
     aggregate_per_algorithm: true
-    evaluation:
+    pca_chosen:
       include: true
       aggregate_per_algorithm: true
     # specify how many principal components to calculate
@@ -174,7 +174,7 @@ analysis:
   ensemble:
     include: true
     aggregate_per_algorithm: true
-    pca_chosen:
+    evaluation:
       include: true
       aggregate_per_algorithm: true
   evaluation:
diff --git a/test/analysis/input/config.yaml b/test/analysis/input/config.yaml
index 403c9151a..4e8ad8bea 100644
--- a/test/analysis/input/config.yaml
+++ b/test/analysis/input/config.yaml
@@ -123,7 +123,7 @@ analysis:
   pca:
     include: false
     aggregate_per_algorithm: false
-    evaluation:
+    pca_chosen:
       include: false
       aggregate_per_algorithm: false
     # specify how many principal components to calculate
@@ -150,7 +150,7 @@ analysis:
   ensemble:
     include: false
     aggregate_per_algorithm: true
-    pca_chosen:
+    evaluation:
       include: false
       aggregate_per_algorithm: false
   evaluation:
diff --git a/test/analysis/input/egfr.yaml b/test/analysis/input/egfr.yaml
index f06b08a93..d18fc3333 100644
--- a/test/analysis/input/egfr.yaml
+++ b/test/analysis/input/egfr.yaml
@@ -104,7 +104,7 @@ analysis:
   pca:
     include: false
     aggregate_per_algorithm: false
-    evaluation:
+    pca_chosen:
       include: false
       aggregate_per_algorithm: false
     components: 2
@@ -119,7 +119,7 @@ analysis:
   ensemble:
     include: false
     aggregate_per_algorithm: false
-    pca_chosen:
+    evaluation:
       include: false
       aggregate_per_algorithm: false
   evaluation:

From 633d64adc5d798a0e2eff36d11d2795cb4f494db Mon Sep 17 00:00:00 2001
From: "Tristan F." <pub.tristanf@gmail.com>
Date: Fri, 5 Dec 2025 07:08:36 +0000
Subject: [PATCH 10/14] fix(snakefile): without pca_chosen

---
 Snakefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Snakefile b/Snakefile
index 99937d5af..3607d34a1 100644
--- a/Snakefile
+++ b/Snakefile
@@ -28,7 +28,7 @@ out_dir = _config.config.out_dir
 algorithm_params = _config.config.algorithm_params
 algorithm_directed = _config.config.algorithm_directed
 container_settings = _config.config.container_settings
-pca_params = without_keys(vars(_config.config.analysis.pca), ["evaluation", "include", "aggregate_per_algorithm"])
+pca_params = without_keys(vars(_config.config.analysis.pca), ["pca_chosen", "include", "aggregate_per_algorithm"])
 hac_params = without_keys(vars(_config.config.analysis.hac), ["include", "aggregate_per_algorithm"])
 
 # Return the dataset or gold_standard dictionary from the config file given the label

From bfb90c666eb160fdb456a6f9106302aa42d626a1 Mon Sep 17 00:00:00 2001
From: "Tristan F." <pub.tristanf@gmail.com>
Date: Fri, 5 Dec 2025 07:11:23 +0000
Subject: [PATCH 11/14] chore: ref pca_chosen

---
 Snakefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Snakefile b/Snakefile
index 3607d34a1..93959353d 100644
--- a/Snakefile
+++ b/Snakefile
@@ -131,11 +131,11 @@ def make_final_input(wildcards):
         final_input.extend(expand('{out_dir}{sep}{dataset_gold_standard_pair}-eval{sep}pr-per-pathway-for-{algorithm}-nodes.txt',out_dir=out_dir,sep=SEP,dataset_gold_standard_pair=dataset_gold_standard_node_pairs,algorithm=algorithms))
         final_input.extend(expand('{out_dir}{sep}{dataset_gold_standard_pair}-eval{sep}pr-per-pathway-for-{algorithm}-nodes.png',out_dir=out_dir,sep=SEP,dataset_gold_standard_pair=dataset_gold_standard_node_pairs,algorithm=algorithms))
     
-    if _config.config.analysis.pca.evaluation.include:
+    if _config.config.analysis.pca.pca_chosen.include:
         final_input.extend(expand('{out_dir}{sep}{dataset_gold_standard_pair}-eval{sep}pr-pca-chosen-pathway-nodes.txt',out_dir=out_dir,sep=SEP,dataset_gold_standard_pair=dataset_gold_standard_node_pairs))
         final_input.extend(expand('{out_dir}{sep}{dataset_gold_standard_pair}-eval{sep}pr-pca-chosen-pathway-nodes.png',out_dir=out_dir,sep=SEP,dataset_gold_standard_pair=dataset_gold_standard_node_pairs))
 
-    if _config.config.analysis.pca.evaluation.aggregate_per_algorithm:
+    if _config.config.analysis.pca.pca_chosen.aggregate_per_algorithm:
         final_input.extend(expand('{out_dir}{sep}{dataset_gold_standard_pair}-eval{sep}pr-pca-chosen-pathway-per-algorithm-nodes.txt',out_dir=out_dir,sep=SEP,dataset_gold_standard_pair=dataset_gold_standard_node_pairs))
         final_input.extend(expand('{out_dir}{sep}{dataset_gold_standard_pair}-eval{sep}pr-pca-chosen-pathway-per-algorithm-nodes.png',out_dir=out_dir,sep=SEP,dataset_gold_standard_pair=dataset_gold_standard_node_pairs))
     

From 08d25ee8fe5924b7fdc50a5a9d7714e454e20688 Mon Sep 17 00:00:00 2001
From: "Tristan F.-R." <pub.tristanf@gmail.com>
Date: Fri, 5 Dec 2025 18:31:05 +0000
Subject: [PATCH 12/14] fix(snakemake): use correct pca field

---
 Snakefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Snakefile b/Snakefile
index 93959353d..2a1261371 100644
--- a/Snakefile
+++ b/Snakefile
@@ -535,7 +535,7 @@ rule evaluation_per_algo_pca_chosen:
         node_table = Evaluation.from_file(input.node_gold_standard_file).node_table
         pca_chosen_pathways = Evaluation.pca_chosen_pathway(input.pca_coordinates_file, input.pathway_summary_file, out_dir)
         pr_df = Evaluation.node_precision_and_recall(pca_chosen_pathways, node_table)
-        Evaluation.precision_and_recall_pca_chosen_pathway(pr_df, output.node_pca_chosen_pr_file, output.node_pca_chosen_pr_png, _config.config.analysis.pca.evaluation.aggregate_per_algorithm)
+        Evaluation.precision_and_recall_pca_chosen_pathway(pr_df, output.node_pca_chosen_pr_file, output.node_pca_chosen_pr_png, _config.config.analysis.pca.pca_chosen.aggregate_per_algorithm)
 
 # Return the dataset pickle file for a specific dataset
 def get_dataset_pickle_file(wildcards):

From e3ea0263413cad005b4db25772e01bdf26b2503d Mon Sep 17 00:00:00 2001
From: "Tristan F.-R." <pub.tristanf@gmail.com>
Date: Fri, 5 Dec 2025 18:48:29 +0000
Subject: [PATCH 13/14] fix: correct implies message

---
 spras/config/schema.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/spras/config/schema.py b/spras/config/schema.py
index 5b7d02e4e..19649759c 100644
--- a/spras/config/schema.py
+++ b/spras/config/schema.py
@@ -51,7 +51,7 @@ class HacMetric(CaseInsensitiveEnum):
 
 def implies(source: bool, target: bool, source_str: str, target_str: str):
     if target and not source:
-        warnings.warn(f"{source_str} is set to True but {target_str} is set to False; setting {target_str} to False", stacklevel=2)
+        warnings.warn(f"{source_str} is False but {target_str} is True; setting {target_str} to False", stacklevel=2)
         return False
     return target
 

From 32546232eb4a2240e818e7ae7210678f0411ec04 Mon Sep 17 00:00:00 2001
From: "Tristan F." <pub.tristanf@gmail.com>
Date: Fri, 9 Jan 2026 20:24:24 -0800
Subject: [PATCH 14/14] chore: drop pca/hac param access

---
 Snakefile | 2 --
 1 file changed, 2 deletions(-)

diff --git a/Snakefile b/Snakefile
index a660d542e..6ff4d73a9 100644
--- a/Snakefile
+++ b/Snakefile
@@ -26,8 +26,6 @@ def without_keys(d: dict, keys: list):
 
 out_dir = _config.config.out_dir
 algorithm_params = _config.config.algorithm_params
-pca_params = _config.config.pca_params
-hac_params = _config.config.hac_params
 container_settings = _config.config.container_settings
 pca_params = without_keys(vars(_config.config.analysis.pca), ["pca_chosen", "include", "aggregate_per_algorithm"])
 hac_params = without_keys(vars(_config.config.analysis.hac), ["include", "aggregate_per_algorithm"])