Reed-CompBio · tristan-f-r · Nov 6, 2025 · Nov 6, 2025 · Nov 6, 2025 · Nov 7, 2025
diff --git a/.github/workflows/test-spras.yml b/.github/workflows/test-spras.yml
@@ -94,6 +94,9 @@ jobs:
         # race conditions from #268 and #279
         # We also enforce strict DAG evaluation to catch DAG problems before they appear as user errors. (#359)
         run: snakemake --cores 4 --configfile config/config.yaml --show-failed-logs --strict-dag-evaluation cyclic-graph --strict-dag-evaluation functions --strict-dag-evaluation periodic-wildcards
+      - name: Collect Snakemake workflow report
+        shell: bash --login {0}
+        run: snakemake --configfile config/config.yaml --report report.zip
 
   # Run pre-commit checks on source files
   pre-commit:

diff --git a/.gitignore b/.gitignore
@@ -134,6 +134,8 @@ dmypy.json
 
 # Snakemake
 .snakemake/
+report.html
+report.zip
 
 # Output files
 output/

diff --git a/Snakefile b/Snakefile
@@ -290,7 +290,12 @@ rule parse_output:
     input: 
         raw_file = SEP.join([out_dir, '{dataset}-{algorithm}-{params}', 'raw-pathway.txt']),
         dataset_file = SEP.join([out_dir, 'dataset-{dataset}-merged.pickle'])
-    output: standardized_file = SEP.join([out_dir, '{dataset}-{algorithm}-{params}', 'pathway.txt'])
+    output:
+        standardized_file = report(
+            SEP.join([out_dir, '{dataset}-{algorithm}-{params}', 'pathway.txt']),
+            category="dataset-{dataset}",
+            subcategory="Reconstructed Output"
+        )
     run:
         params = reconstruction_params(wildcards.algorithm, wildcards.params).copy()
         params['dataset'] = input.dataset_file
@@ -311,7 +316,11 @@ rule parse_output:
 rule viz_cytoscape:
     input: pathways = expand('{out_dir}{sep}{{dataset}}-{algorithm_params}{sep}pathway.txt', out_dir=out_dir, sep=SEP, algorithm_params=algorithms_with_params)
     output: 
-        session = SEP.join([out_dir, '{dataset}-cytoscape.cys'])
+        session = report(
+            SEP.join([out_dir, '{dataset}-cytoscape.cys']),
+            category="dataset-{dataset}",
+            subcategory="Visualization"
+        )
     run:
         cytoscape.run_cytoscape(input.pathways, output.session, FRAMEWORK)
 
@@ -322,7 +331,8 @@ rule summary_table:
         # Collect all pathways generated for the dataset
         pathways = expand('{out_dir}{sep}{{dataset}}-{algorithm_params}{sep}pathway.txt', out_dir=out_dir, sep=SEP, algorithm_params=algorithms_with_params),
         dataset_file = SEP.join([out_dir, 'dataset-{dataset}-merged.pickle'])
-    output: summary_table = SEP.join([out_dir, '{dataset}-pathway-summary.txt'])
+    output:
+        summary_table = report(SEP.join([out_dir, '{dataset}-pathway-summary.txt']), category="dataset-{dataset}", subcategory="Summary")
     run:
         # Load the node table from the pickled dataset file
         node_table = Dataset.from_file(input.dataset_file).node_table
@@ -334,13 +344,13 @@ rule ml_analysis:
     input: 
         pathways = expand('{out_dir}{sep}{{dataset}}-{algorithm_params}{sep}pathway.txt', out_dir=out_dir, sep=SEP, algorithm_params=algorithms_with_params)
     output: 
-        pca_image = SEP.join([out_dir, '{dataset}-ml', 'pca.png']),
-        pca_variance= SEP.join([out_dir, '{dataset}-ml', 'pca-variance.txt']),
-        pca_coordinates = SEP.join([out_dir, '{dataset}-ml', 'pca-coordinates.txt']),
-        hac_image_vertical = SEP.join([out_dir, '{dataset}-ml', 'hac-vertical.png']),
-        hac_clusters_vertical = SEP.join([out_dir, '{dataset}-ml', 'hac-clusters-vertical.txt']),
-        hac_image_horizontal = SEP.join([out_dir, '{dataset}-ml', 'hac-horizontal.png']),
-        hac_clusters_horizontal = SEP.join([out_dir, '{dataset}-ml', 'hac-clusters-horizontal.txt']),
+        pca_image = report(SEP.join([out_dir, '{dataset}-ml', 'pca.png']), category="dataset-{dataset}", subcategory="ML"),
+        pca_variance = report(SEP.join([out_dir, '{dataset}-ml', 'pca-variance.txt']), category="dataset-{dataset}", subcategory="ML"),
+        pca_coordinates = report(SEP.join([out_dir, '{dataset}-ml', 'pca-coordinates.txt']), category="dataset-{dataset}", subcategory="ML"),
+        hac_image_vertical = report(SEP.join([out_dir, '{dataset}-ml', 'hac-vertical.png']), category="dataset-{dataset}", subcategory="ML"),
+        hac_clusters_vertical = report(SEP.join([out_dir, '{dataset}-ml', 'hac-clusters-vertical.txt']), category="dataset-{dataset}", subcategory="ML"),
+        hac_image_horizontal = report(SEP.join([out_dir, '{dataset}-ml', 'hac-horizontal.png']), category="dataset-{dataset}", subcategory="ML"),
+        hac_clusters_horizontal = report(SEP.join([out_dir, '{dataset}-ml', 'hac-clusters-horizontal.txt']), category="dataset-{dataset}", subcategory="ML"),
     run: 
         summary_df = ml.summarize_networks(input.pathways)
         ml.hac_vertical(summary_df, output.hac_image_vertical, output.hac_clusters_vertical, **hac_params)
@@ -353,8 +363,8 @@ rule jaccard_similarity:
         pathways = expand('{out_dir}{sep}{{dataset}}-{algorithm_params}{sep}pathway.txt',
                           out_dir=out_dir, sep=SEP, algorithm_params=algorithms_with_params)
     output:
-        jaccard_similarity_matrix = SEP.join([out_dir, '{dataset}-ml', 'jaccard-matrix.txt']),
-        jaccard_similarity_heatmap = SEP.join([out_dir, '{dataset}-ml', 'jaccard-heatmap.png'])
+        jaccard_similarity_matrix = report(SEP.join([out_dir, '{dataset}-ml', 'jaccard-matrix.txt']), category="dataset-{dataset}", subcategory="ML"),
+        jaccard_similarity_heatmap = report(SEP.join([out_dir, '{dataset}-ml', 'jaccard-heatmap.png']), category="dataset-{dataset}", subcategory="ML"),
     run:
         summary_df = ml.summarize_networks(input.pathways)
         ml.jaccard_similarity_eval(summary_df, output.jaccard_similarity_matrix, output.jaccard_similarity_heatmap)
@@ -365,7 +375,7 @@ rule ensemble:
     input:
         pathways = expand('{out_dir}{sep}{{dataset}}-{algorithm_params}{sep}pathway.txt', out_dir=out_dir, sep=SEP, algorithm_params=algorithms_with_params)
     output:
-        ensemble_network_file = SEP.join([out_dir,'{dataset}-ml', 'ensemble-pathway.txt'])
+        ensemble_network_file = report(SEP.join([out_dir,'{dataset}-ml', 'ensemble-pathway.txt']), category="dataset-{dataset}", subcategory="ML"),
     run:
         summary_df = ml.summarize_networks(input.pathways)
         ml.ensemble_network(summary_df, output.ensemble_network_file)
@@ -381,13 +391,13 @@ rule ml_analysis_aggregate_algo:
     input:
         pathways = collect_pathways_per_algo
     output:
-        pca_image = SEP.join([out_dir, '{dataset}-ml', '{algorithm}-pca.png']),
-        pca_variance= SEP.join([out_dir, '{dataset}-ml', '{algorithm}-pca-variance.txt']),
-        pca_coordinates = SEP.join([out_dir, '{dataset}-ml', '{algorithm}-pca-coordinates.txt']),
-        hac_image_vertical = SEP.join([out_dir, '{dataset}-ml', '{algorithm}-hac-vertical.png']),
-        hac_clusters_vertical = SEP.join([out_dir, '{dataset}-ml', '{algorithm}-hac-clusters-vertical.txt']),
-        hac_image_horizontal = SEP.join([out_dir, '{dataset}-ml', '{algorithm}-hac-horizontal.png']),
-        hac_clusters_horizontal = SEP.join([out_dir, '{dataset}-ml', '{algorithm}-hac-clusters-horizontal.txt']),
+        pca_image = report(SEP.join([out_dir, '{dataset}-ml', '{algorithm}-pca.png']), category="dataset-{dataset}", subcategory="ML"),
+        pca_variance = report(SEP.join([out_dir, '{dataset}-ml', '{algorithm}-pca-variance.txt']), category="dataset-{dataset}", subcategory="ML"),
+        pca_coordinates = report(SEP.join([out_dir, '{dataset}-ml', '{algorithm}-pca-coordinates.txt']), category="dataset-{dataset}", subcategory="ML"),
+        hac_image_vertical = report(SEP.join([out_dir, '{dataset}-ml', '{algorithm}-hac-vertical.png']), category="dataset-{dataset}", subcategory="ML"),
+        hac_clusters_vertical = report(SEP.join([out_dir, '{dataset}-ml', '{algorithm}-hac-clusters-vertical.txt']), category="dataset-{dataset}", subcategory="ML"),
+        hac_image_horizontal = report(SEP.join([out_dir, '{dataset}-ml', '{algorithm}-hac-horizontal.png']), category="dataset-{dataset}", subcategory="ML"),
+        hac_clusters_horizontal = report(SEP.join([out_dir, '{dataset}-ml', '{algorithm}-hac-clusters-horizontal.txt']), category="dataset-{dataset}", subcategory="ML"),
     run:
         summary_df = ml.summarize_networks(input.pathways)
         ml.hac_vertical(summary_df, output.hac_image_vertical, output.hac_clusters_vertical, **hac_params)
@@ -399,7 +409,7 @@ rule ensemble_per_algo:
     input:
         pathways = collect_pathways_per_algo
     output:
-        ensemble_network_file = SEP.join([out_dir,'{dataset}-ml', '{algorithm}-ensemble-pathway.txt'])
+        ensemble_network_file = report(SEP.join([out_dir,'{dataset}-ml', '{algorithm}-ensemble-pathway.txt']), category="dataset-{dataset}", subcategory="ML"),
     run:
         summary_df = ml.summarize_networks(input.pathways)
         ml.ensemble_network(summary_df, output.ensemble_network_file)
@@ -409,8 +419,8 @@ rule jaccard_similarity_per_algo:
     input:
          pathways = collect_pathways_per_algo
     output:
-        jaccard_similarity_matrix = SEP.join([out_dir, '{dataset}-ml', '{algorithm}-jaccard-matrix.txt']),
-        jaccard_similarity_heatmap = SEP.join([out_dir, '{dataset}-ml', '{algorithm}-jaccard-heatmap.png'])
+        jaccard_similarity_matrix = report(SEP.join([out_dir, '{dataset}-ml', '{algorithm}-jaccard-matrix.txt']), category="dataset-{dataset}", subcategory="ML"),
+        jaccard_similarity_heatmap = report(SEP.join([out_dir, '{dataset}-ml', '{algorithm}-jaccard-heatmap.png']), category="dataset-{dataset}", subcategory="ML"),
     run:
         summary_df = ml.summarize_networks(input.pathways)
         ml.jaccard_similarity_eval(summary_df, output.jaccard_similarity_matrix, output.jaccard_similarity_heatmap)
@@ -439,8 +449,8 @@ rule evaluation_pr_per_pathways:
         node_gold_standard_file = get_gold_standard_pickle_file,
         pathways = collect_pathways_per_dataset
     output: 
-        node_pr_file = SEP.join([out_dir, '{dataset_gold_standard_pair}-eval', "pr-per-pathway-nodes.txt"]),
-        node_pr_png = SEP.join([out_dir, '{dataset_gold_standard_pair}-eval', 'pr-per-pathway-nodes.png']),
+        node_pr_file = report(SEP.join([out_dir, '{dataset_gold_standard_pair}-eval', "pr-per-pathway-nodes.txt"]), category="dgs-{dataset_gold_standard_pair}", subcategory="Evaluation"),
+        node_pr_png = report(SEP.join([out_dir, '{dataset_gold_standard_pair}-eval', 'pr-per-pathway-nodes.png']), category="dgs-{dataset_gold_standard_pair}", subcategory="Evaluation"),
     run:
         node_table = Evaluation.from_file(input.node_gold_standard_file).node_table
         pr_df = Evaluation.node_precision_and_recall(input.pathways, node_table)
@@ -458,8 +468,8 @@ rule evaluation_per_algo_pr_per_pathways:
         node_gold_standard_file = get_gold_standard_pickle_file,
         pathways =  collect_pathways_per_algo_per_dataset,
     output: 
-        node_pr_file = SEP.join([out_dir, '{dataset_gold_standard_pair}-eval', "pr-per-pathway-for-{algorithm}-nodes.txt"]),
-        node_pr_png = SEP.join([out_dir, '{dataset_gold_standard_pair}-eval', 'pr-per-pathway-for-{algorithm}-nodes.png']),
+        node_pr_file = report(SEP.join([out_dir, '{dataset_gold_standard_pair}-eval', "pr-per-pathway-for-{algorithm}-nodes.txt"]), category="dgs-{dataset_gold_standard_pair}", subcategory="Evaluation"),
+        node_pr_png = report(SEP.join([out_dir, '{dataset_gold_standard_pair}-eval', 'pr-per-pathway-for-{algorithm}-nodes.png']), category="dgs-{dataset_gold_standard_pair}", subcategory="Evaluation"),
     run:
         node_table = Evaluation.from_file(input.node_gold_standard_file).node_table
         pr_df = Evaluation.node_precision_and_recall(input.pathways, node_table)
@@ -484,8 +494,8 @@ rule evaluation_pca_chosen:
         pca_coordinates_file = collect_pca_coordinates_per_dataset,
         pathway_summary_file = collect_summary_statistics_per_dataset
     output: 
-        node_pca_chosen_pr_file = SEP.join([out_dir, '{dataset_gold_standard_pair}-eval', 'pr-pca-chosen-pathway-nodes.txt']),
-        node_pca_chosen_pr_png = SEP.join([out_dir, '{dataset_gold_standard_pair}-eval', 'pr-pca-chosen-pathway-nodes.png']),
+        node_pca_chosen_pr_file = report(SEP.join([out_dir, '{dataset_gold_standard_pair}-eval', 'pr-pca-chosen-pathway-nodes.txt']), category="dgs-{dataset_gold_standard_pair}", subcategory="Evaluation"),
+        node_pca_chosen_pr_png = report(SEP.join([out_dir, '{dataset_gold_standard_pair}-eval', 'pr-pca-chosen-pathway-nodes.png']), category="dgs-{dataset_gold_standard_pair}", subcategory="Evaluation"),
     run:
         node_table = Evaluation.from_file(input.node_gold_standard_file).node_table
         pca_chosen_pathway = Evaluation.pca_chosen_pathway(input.pca_coordinates_file, input.pathway_summary_file, out_dir)
@@ -505,8 +515,8 @@ rule evaluation_per_algo_pca_chosen:
         pca_coordinates_file = collect_pca_coordinates_per_algo_per_dataset,
         pathway_summary_file = collect_summary_statistics_per_dataset
     output: 
-        node_pca_chosen_pr_file = SEP.join([out_dir, '{dataset_gold_standard_pair}-eval', 'pr-pca-chosen-pathway-per-algorithm-nodes.txt']),
-        node_pca_chosen_pr_png = SEP.join([out_dir, '{dataset_gold_standard_pair}-eval', 'pr-pca-chosen-pathway-per-algorithm-nodes.png']),
+        node_pca_chosen_pr_file = report(SEP.join([out_dir, '{dataset_gold_standard_pair}-eval', 'pr-pca-chosen-pathway-per-algorithm-nodes.txt']), category="dgs-{dataset_gold_standard_pair}", subcategory="Evaluation"),
+        node_pca_chosen_pr_png = report(SEP.join([out_dir, '{dataset_gold_standard_pair}-eval', 'pr-pca-chosen-pathway-per-algorithm-nodes.png']), category="dgs-{dataset_gold_standard_pair}", subcategory="Evaluation"),
     run:
         node_table = Evaluation.from_file(input.node_gold_standard_file).node_table
         pca_chosen_pathways = Evaluation.pca_chosen_pathway(input.pca_coordinates_file, input.pathway_summary_file, out_dir)
@@ -530,8 +540,8 @@ rule evaluation_ensemble_pr_curve:
         dataset_file = get_dataset_pickle_file,
         ensemble_file = collect_ensemble_per_dataset
     output: 
-        node_pr_curve_png = SEP.join([out_dir, '{dataset_gold_standard_pair}-eval', 'pr-curve-ensemble-nodes.png']),
-        node_pr_curve_file = SEP.join([out_dir, '{dataset_gold_standard_pair}-eval', 'pr-curve-ensemble-nodes.txt']),
+        node_pr_curve_png = report(SEP.join([out_dir, '{dataset_gold_standard_pair}-eval', 'pr-curve-ensemble-nodes.png']), category="dgs-{dataset_gold_standard_pair}", subcategory="Evaluation"),
+        node_pr_curve_file = report(SEP.join([out_dir, '{dataset_gold_standard_pair}-eval', 'pr-curve-ensemble-nodes.txt']), category="dgs-{dataset_gold_standard_pair}", subcategory="Evaluation"),
     run:
         node_table = Evaluation.from_file(input.node_gold_standard_file).node_table
         node_ensemble_dict = Evaluation.edge_frequency_node_ensemble(node_table, input.ensemble_file, input.dataset_file)
@@ -549,8 +559,8 @@ rule evaluation_per_algo_ensemble_pr_curve:
         dataset_file = get_dataset_pickle_file,
         ensemble_files = collect_ensemble_per_algo_per_dataset
     output: 
-        node_pr_curve_png = SEP.join([out_dir, '{dataset_gold_standard_pair}-eval', 'pr-curve-ensemble-nodes-per-algorithm-nodes.png']),
-        node_pr_curve_file = SEP.join([out_dir, '{dataset_gold_standard_pair}-eval', 'pr-curve-ensemble-nodes-per-algorithm-nodes.txt']),
+        node_pr_curve_png = report(SEP.join([out_dir, '{dataset_gold_standard_pair}-eval', 'pr-curve-ensemble-nodes-per-algorithm-nodes.png']), category="dgs-{dataset_gold_standard_pair}", subcategory="Evaluation"),
+        node_pr_curve_file = report(SEP.join([out_dir, '{dataset_gold_standard_pair}-eval', 'pr-curve-ensemble-nodes-per-algorithm-nodes.txt']), category="dgs-{dataset_gold_standard_pair}", subcategory="Evaluation"),
     run:
         node_table = Evaluation.from_file(input.node_gold_standard_file).node_table
         node_ensembles_dict = Evaluation.edge_frequency_node_ensemble(node_table, input.ensemble_files, input.dataset_file)
@@ -560,7 +570,7 @@ rule evaluation_edge_dummy:
     input: 
         edge_gold_standard_file = get_gold_standard_pickle_file,
     output: 
-        dummy_file = SEP.join([out_dir, '{dataset_gold_standard_pair}-eval', 'dummy-edge.txt']),
+        dummy_file = report(SEP.join([out_dir, '{dataset_gold_standard_pair}-eval', 'dummy-edge.txt']), category="dgs-{dataset_gold_standard_pair}", subcategory="Evaluation"),
     run:
         mixed_edge_table = Evaluation.from_file(input.edge_gold_standard_file).mixed_edge_table
         undirected_edge_table = Evaluation.from_file(input.edge_gold_standard_file).undirected_edge_table