From 5d495da0d05c7230177b1c0781e15e783d14b11b Mon Sep 17 00:00:00 2001
From: ntalluri <nehatalluri@live.com>
Date: Thu, 30 Oct 2025 13:42:47 -0500
Subject: [PATCH 01/10] added edge evaluation per dataset-edge-goldstandard
 pair

---
 Snakefile           |  34 ++++++----
 spras/evaluation.py | 162 +++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 173 insertions(+), 23 deletions(-)

diff --git a/Snakefile b/Snakefile
index 02f019e8d..a54ea6015 100644
--- a/Snakefile
+++ b/Snakefile
@@ -112,9 +112,12 @@ def make_final_input(wildcards):
         final_input.extend(expand('{out_dir}{sep}{dataset_gold_standard_pair}-eval{sep}pr-pca-chosen-pathway-nodes.png',out_dir=out_dir,sep=SEP,dataset_gold_standard_pair=dataset_gold_standard_node_pairs))
         final_input.extend(expand('{out_dir}{sep}{dataset_gold_standard_pair}-eval{sep}pr-curve-ensemble-nodes.png',out_dir=out_dir,sep=SEP,dataset_gold_standard_pair=dataset_gold_standard_node_pairs))
         final_input.extend(expand('{out_dir}{sep}{dataset_gold_standard_pair}-eval{sep}pr-curve-ensemble-nodes.txt',out_dir=out_dir,sep=SEP,dataset_gold_standard_pair=dataset_gold_standard_node_pairs))
+        
         # dummy file
         final_input.extend(expand('{out_dir}{sep}{dataset_gold_standard_pair}-eval{sep}dummy-edge.txt',out_dir=out_dir,sep=SEP,dataset_gold_standard_pair=dataset_gold_standard_edge_pairs))
-    
+        final_input.extend(expand('{out_dir}{sep}{dataset_gold_standard_pair}-eval{sep}pr-pca-chosen-pathway-edges.txt',out_dir=out_dir,sep=SEP,dataset_gold_standard_pair=dataset_gold_standard_edge_pairs))
+        final_input.extend(expand('{out_dir}{sep}{dataset_gold_standard_pair}-eval{sep}pr-pca-chosen-pathway-edges.png',out_dir=out_dir,sep=SEP,dataset_gold_standard_pair=dataset_gold_standard_edge_pairs))
+
     if _config.config.analysis_include_evaluation_aggregate_algo:
         final_input.extend(expand('{out_dir}{sep}{dataset_gold_standard_pair}-eval{sep}pr-per-pathway-for-{algorithm}-nodes.txt',out_dir=out_dir,sep=SEP,dataset_gold_standard_pair=dataset_gold_standard_node_pairs,algorithm=algorithms))
         final_input.extend(expand('{out_dir}{sep}{dataset_gold_standard_pair}-eval{sep}pr-per-pathway-for-{algorithm}-nodes.png',out_dir=out_dir,sep=SEP,dataset_gold_standard_pair=dataset_gold_standard_node_pairs,algorithm=algorithms))
@@ -478,7 +481,7 @@ def collect_pca_coordinates_per_dataset(wildcards):
 
 # Run PCA chosen to select the representative from all pathway outputs for a given dataset, 
 # then evaluate with precision and recall against the corresponding gold standard
-rule evaluation_pca_chosen:
+rule evaluation_pca_chosen_nodes:
     input: 
         node_gold_standard_file = get_gold_standard_pickle_file,
         pca_coordinates_file = collect_pca_coordinates_per_dataset,
@@ -492,6 +495,22 @@ rule evaluation_pca_chosen:
         pr_df = Evaluation.node_precision_and_recall(pca_chosen_pathway, node_table)
         Evaluation.precision_and_recall_pca_chosen_pathway(pr_df, output.node_pca_chosen_pr_file, output.node_pca_chosen_pr_png)
 
+rule evaluation_pca_chosen_edges:
+    input: 
+        edge_gold_standard_file = get_gold_standard_pickle_file,
+        pca_coordinates_file = collect_pca_coordinates_per_dataset,
+        pathway_summary_file = collect_summary_statistics_per_dataset
+    output: 
+        edge_pca_chosen_pr_file = SEP.join([out_dir, '{dataset_gold_standard_pair}-eval', 'pr-pca-chosen-pathway-edges.txt']),
+        edge_pca_chosen_pr_png = SEP.join([out_dir, '{dataset_gold_standard_pair}-eval', 'pr-pca-chosen-pathway-edges.png']),
+    run:
+        mixed_edge_table = Evaluation.from_file(input.edge_gold_standard_file).mixed_edge_table
+        undirected_edge_table = Evaluation.from_file(input.edge_gold_standard_file).undirected_edge_table
+        directed_edge_table = Evaluation.from_file(input.edge_gold_standard_file).directed_edge_table
+        pca_chosen_pathway = Evaluation.pca_chosen_pathway(input.pca_coordinates_file, input.pathway_summary_file, out_dir)
+        pr_df = Evaluation.edge_precision_and_recall(pca_chosen_pathway, mixed_edge_table, directed_edge_table, undirected_edge_table)
+        Evaluation.precision_and_recall_pca_chosen_pathway(pr_df, output.edge_pca_chosen_pr_file, output.edge_pca_chosen_pr_png, edge_evaluation=True)
+
 # Returns pca coordinates for a specific algorithm and dataset
 def collect_pca_coordinates_per_algo_per_dataset(wildcards):
     dataset_label = get_dataset_label(wildcards)
@@ -556,17 +575,6 @@ rule evaluation_per_algo_ensemble_pr_curve:
         node_ensembles_dict = Evaluation.edge_frequency_node_ensemble(node_table, input.ensemble_files, input.dataset_file)
         Evaluation.precision_recall_curve_node_ensemble(node_ensembles_dict, node_table, output.node_pr_curve_png, output.node_pr_curve_file, include_aggregate_algo_eval)
 
-rule evaluation_edge_dummy:
-    input: 
-        edge_gold_standard_file = get_gold_standard_pickle_file,
-    output: 
-        dummy_file = SEP.join([out_dir, '{dataset_gold_standard_pair}-eval', 'dummy-edge.txt']),
-    run:
-        mixed_edge_table = Evaluation.from_file(input.edge_gold_standard_file).mixed_edge_table
-        undirected_edge_table = Evaluation.from_file(input.edge_gold_standard_file).undirected_edge_table
-        directed_edge_table = Evaluation.from_file(input.edge_gold_standard_file).directed_edge_table
-        Evaluation.edge_dummy_function(mixed_edge_table, undirected_edge_table, directed_edge_table, output.dummy_file)
-
 # Remove the output directory
 rule clean:
     shell: f'rm -rf {out_dir}'
diff --git a/spras/evaluation.py b/spras/evaluation.py
index 8c09e3f34..c82e5d2ed 100644
--- a/spras/evaluation.py
+++ b/spras/evaluation.py
@@ -179,8 +179,9 @@ def node_precision_and_recall(file_paths: Iterable[Union[str, PathLike]], node_t
         pr_df = pd.DataFrame(results)
         return pr_df
 
+
     @staticmethod
-    def visualize_precision_and_recall_plot(pr_df: pd.DataFrame, output_file: str | PathLike, output_png: str | PathLike, title: str):
+    def nodes_visualize_precision_and_recall_plot(pr_df: pd.DataFrame, output_file: str | PathLike, output_png: str | PathLike, title: str):
         """
         Generates a scatter plot of precision and recall values for each pathway and saves both
         the plot and the data.
@@ -230,11 +231,127 @@ def visualize_precision_and_recall_plot(pr_df: pd.DataFrame, output_file: str |
         pr_df.drop(columns=['Algorithm'], inplace=True)
         pr_df.to_csv(output_file, sep='\t', index=False)
 
+    def edge_precision_and_recall(file_paths: Iterable[Union[str, PathLike]], mixed_edge_table: pd.DataFrame, directed_edge_table: pd.DataFrame, undirected_edge_table: pd.DataFrame) -> pd.DataFrame:
+        """
+        Computes edge-level precision and recall for each pathway reconstruction output file against three edge gold standard tables.
+
+        This function takes a list of file paths corresponding to pathway reconstruction algorithm outputs,
+        each formatted as a tab-separated file with columns 'Node1', 'Node2', 'Rank', and 'Direction'.
+        It compares the set of predicted edges to the three provided gold standard edge tables and computes precision and recall per file.
+
+        @param file_paths: list of file paths of pathway reconstruction algorithm outputs
+        @param mixed_edge_table: the gold standard edges that includes directed and undirected edges
+        @param directed_edge_table: the gold standard edges that only includes directed edges
+        @param undirected_edge_table: the gold standard edges that only includes undirected edges
+        @return: A DataFrame with the following columns:
+                - 'Pathway': Path object corresponding to each pathway file
+                - 'Precision': Precision of predicted nodes vs. gold standard nodes
+                - 'Recall': Recall of predicted nodes vs. gold standard nodes
+                - 'Gold_Standard_Type': Which  gold standard was used to calculate the precision and recall
+        """
+
+        y_true_mixed = set(map(tuple, mixed_edge_table[['Interactor1', 'Interactor2', 'Direction']].values))
+        y_true_directed = set(map(tuple, directed_edge_table[['Interactor1', 'Interactor2', 'Direction']].values))
+        y_true_undirected =  set(map(tuple, undirected_edge_table[['Interactor1', 'Interactor2', 'Direction']].values))
+
+        results = []
+        for f in file_paths:
+            df = pd.read_table(f, sep='\t', header=0)
+            y_pred =  set(map(tuple, df[['Node1', 'Node2', 'Direction']].values))
+
+            all_edges_mixed = y_true_mixed.union(y_pred)
+            y_true_mixed_binary = [1 if edge in y_true_mixed else 0 for edge in all_edges_mixed]
+            y_pred_mixed_binary = [1 if edge in y_pred else 0 for edge in all_edges_mixed]
+            # default to 0.0 if there is a divide by 0 error
+            # not using precision_recall_curve because thresholds are binary (0 or 1); rather we are directly
+            # calculating precision and recall per pathway
+            precision_mixed = precision_score(y_true_mixed_binary, y_pred_mixed_binary, zero_division=0.0)
+            recall_mixed = recall_score(y_true_mixed_binary, y_pred_mixed_binary, zero_division=0.0)
+            results.append({'Pathway': f, 'Precision': precision_mixed, 'Recall': recall_mixed, 'Gold_Standard_Type': "mixed"})
+
+            all_edges_directed = y_true_directed.union(y_pred)
+            y_true_directed_binary = [1 if edge in y_true_directed else 0 for edge in all_edges_directed]
+            y_pred_directed_binary = [1 if edge in y_pred else 0 for edge in all_edges_directed]
+            precision_directed = precision_score(y_true_directed_binary, y_pred_directed_binary, zero_division=0.0)
+            recall_directed = recall_score(y_true_directed_binary, y_pred_directed_binary, zero_division=0.0)
+            results.append({'Pathway': f, 'Precision': precision_directed, 'Recall': recall_directed, 'Gold_Standard_Type': "directed"})
+
+            all_edges_undirected = y_true_undirected.union(y_pred)
+            y_true_undirected_binary = [1 if edge in y_true_undirected else 0 for edge in all_edges_undirected]
+            y_pred_undirected_binary = [1 if edge in y_pred else 0 for edge in all_edges_undirected]
+            precision_undirected = precision_score(y_true_undirected_binary, y_pred_undirected_binary, zero_division=0.0)
+            recall_undirected = recall_score(y_true_undirected_binary, y_pred_undirected_binary, zero_division=0.0)
+            results.append({'Pathway': f, 'Precision': precision_undirected, 'Recall': recall_undirected, 'Gold_Standard_Type': "undirected"})
+
+        pr_df = pd.DataFrame(results)
+        return pr_df
+
+    @staticmethod
+    def edges_visualize_precision_and_recall_plot(pr_df: pd.DataFrame, output_file: str | PathLike, output_png: str | PathLike, title: str):
+        """
+        Generates three scatter plot subplots showing edge precision and recall values for each pathway across three edge gold standard types,
+        and saves both the resulting plots and the corresponding data.
+
+        This function is intended for visualizing how different pathway reconstructions perform
+        (not a precision-recall curve) showing the precision and recall of each parameter combination
+        for each algorithm per edge gold standard dataset.
+
+        @param pr_df: Dataframe of calculated precision and recall for each pathway file per edge gold standard.
+                      Must include a preprocessed 'Algorithm' column and 'Gold_Standard_Type" column
+        @param output_file: the filename to save the precision and recall of each pathway per gold standard type
+        @param output_png: the filename to plot the precision and recall of each pathway (not a PRC) per gold standard type
+        @param title: The title to use for the plot
+        """
+        if 'Algorithm' not in pr_df.columns:
+            raise ValueError(
+                "Column 'Algorithm' not found in DataFrame. "
+                "The input DataFrame must include a preprocessed 'Algorithm' column to visulize a precision and recall per pathway file per gold standard type."
+            )
+        if 'Gold_Standard_Type' not in pr_df.columns:
+            raise ValueError(
+                "Column 'Gold_Standard_Type' not found in DataFrame. "
+                "The input DataFrame must include a preprocessed 'Gold_Standard_Type' column indicating the edge directionality used for the gold standard, which is required to visualize precision and recall for each pathway file per gold standard type."
+            )
+
+
+        gs_types = pr_df["Gold_Standard_Type"].unique().tolist()
+        fig, axes = plt.subplots(1, len(gs_types), figsize=(6 * len(gs_types), 5), sharex=True, sharey=True, constrained_layout=True)
+        color_palette = create_palette(pr_df['Algorithm'].tolist())
+
+        for ax, gs_type in zip(axes, gs_types, strict=True):
+            df_gs_type = pr_df[pr_df["Gold_Standard_Type"] == gs_type]
+            for algorithm, subset in df_gs_type.groupby('Algorithm'):
+                if not subset.empty:
+                    ax.plot(
+                        subset['Recall'],
+                        subset['Precision'],
+                        color=color_palette[algorithm],
+                        marker='o',
+                        linestyle='',
+                        label=algorithm.capitalize()
+                    )
+            ax.set_title(gs_type.capitalize())
+            ax.set_xlim(-0.05, 1.05)
+            ax.set_ylim(-0.05, 1.05)
+            ax.grid(True)
+
+        fig.supxlabel("Recall")
+        fig.supylabel("Precision")
+        fig.suptitle(title)
+        handles, labels = axes[0].get_legend_handles_labels()
+        fig.legend(handles, labels, loc="upper right") # TODO: when doing aggregate per algorithm, check if this needs to be fixed to be in a different place (issue might be constrained_layout)
+        plt.savefig(output_png)
+        plt.close(fig)
+
+        # save dataframe
+        pr_df.drop(columns=['Algorithm'], inplace=True)
+        pr_df.to_csv(output_file, sep='\t', index=False)
+
     @staticmethod
     def precision_and_recall_per_pathway(pr_df: pd.DataFrame, output_file: str | PathLike, output_png: str | PathLike, aggregate_per_algorithm: bool = False):
         """
         Function for visualizing per pathway precision and recall across all algorithms. Each point in the plot represents
-        a single pathway reconstruction. If `aggregate_per_algorithm` is set to True, the plot is restricted to a single
+        a single pathway reconstruction. If `aggregate_per_algorithm` is set to True, each plot is restricted to a single
         algorithm and titled accordingly.
 
         @param pr_df: Dataframe of calculated precision and recall for each pathway file
@@ -252,7 +369,7 @@ def precision_and_recall_per_pathway(pr_df: pd.DataFrame, output_file: str | Pat
             else:
                 title = "Precision and Recall Plot Per Pathway Per Algorithm"
 
-            Evaluation.visualize_precision_and_recall_plot(pr_df, output_file, output_png, title)
+            Evaluation.nodes_visualize_precision_and_recall_plot(pr_df, output_file, output_png, title)
 
         else:
             # this block should never be reached — having 0 pathways implies that no algorithms or parameter combinations were run,
@@ -260,18 +377,20 @@ def precision_and_recall_per_pathway(pr_df: pd.DataFrame, output_file: str | Pat
             raise ValueError("No pathways were provided to evaluate and visulize on. This likely means no algorithms or parameter combinations were run.")
 
     @staticmethod
-    def precision_and_recall_pca_chosen_pathway(pr_df: pd.DataFrame, output_file: str | PathLike, output_png: str | PathLike, aggregate_per_algorithm: bool = False):
+    def precision_and_recall_pca_chosen_pathway(pr_df: pd.DataFrame, output_file: str | PathLike, output_png: str | PathLike, aggregate_per_algorithm: bool = False, edge_evaluation: bool = False):
         """
 
         Function for visualizing the precision and recall of the single parameter combination selected via PCA,
         either for each algorithm individually or one combination shared across all algorithms. Each point represents
         a pathway reconstruction corresponding to the PCA-selected parameter combination. If `aggregate_per_algorithm`
-        is True, the plot includes a pca chosen pathway per algorithm and titled accordingly.
+        is True, the plot includes a pca chosen pathway per algorithm and titled accordingly. If `edge_evaluation` is True,
+        the plot will include the evaluation across the three gold standard edge files.
 
         @param pr_df: Dataframe of calculated precision and recall for each pathway file
         @param output_file: the filename to save the precision and recall of each pathway
         @param output_png: the filename to plot the precision and recall of each pathway (not a PRC)
-        @param aggregate_per_algorithm: Boolean indicating if function is used per algorithm (Default False)
+        @param aggregate_per_algorithm: Boolean indicating if this function is used per algorithm (Default False)
+        @param edge_evaluation: Boolean indicating if this function is used for creating edge_evaluation plots (Default False; used for node evaluation)
         """
         # TODO update to add in the pathways for the algorithms that do not provide a pca chosen pathway https://github.com/Reed-CompBio/spras/issues/341
 
@@ -279,12 +398,22 @@ def precision_and_recall_pca_chosen_pathway(pr_df: pd.DataFrame, output_file: st
             pr_df['Algorithm'] = pr_df['Pathway'].apply(lambda p: Path(p).parent.name.split('-')[1])
             pr_df.sort_values(by=['Recall', 'Pathway'], axis=0, ascending=True, inplace=True)
 
-            if aggregate_per_algorithm:
-                title = "PCA-Chosen Pathway Per Algorithm Precision and Recall Plot"
+            if not edge_evaluation:
+                if aggregate_per_algorithm:
+                    title = "Node Evaluation PCA-Chosen Pathway Per Algorithm Precision and Recall Plot"
+                else:
+                    title = "Node Evaluation PCA-Chosen Pathway Across all Algorithms Precision and Recall Plot"
+
+                Evaluation.nodes_visualize_precision_and_recall_plot(pr_df, output_file, output_png, title)
+
             else:
-                title = "PCA-Chosen Pathway Across All Algorithms Precision and Recall Plot"
+                if aggregate_per_algorithm :
+                    title = "Edge Evaluation PCA-Chosen Pathway Per Algorithm Precision and Recall Plot"
+                else:
+                    title = "Edge Evaluation PCA-Chosen Pathway Across all Algorithms Precision and Recall Plot"
+
+                Evaluation.edges_visualize_precision_and_recall_plot(pr_df, output_file, output_png, title)
 
-            Evaluation.visualize_precision_and_recall_plot(pr_df, output_file, output_png, title)
 
         else:
             # Edge case: if all algorithms chosen use only 1 parameter combination
@@ -300,6 +429,16 @@ def precision_and_recall_pca_chosen_pathway(pr_df: pd.DataFrame, output_file: st
                 plt.savefig(output_png)
                 plt.close()
 
+    # TODO
+    # need to make a edge_precision_recall function to make the pr_df
+    # I think then the precision_and_recall_pca_chosen_pathway function can be reused but needs to be updated to be able to differentiate between nodes or edges
+    # i think I can do that with a boolean
+    # then I need to make a edges_visualize_precision_and_recall_plot that is called
+    # these can then be reused for no parameter selection evaluation
+    # i think I will need to make a new snakemake rule for each of the evaluatuon because the gold standards only include nodes or edges,
+    # sharing the same one will cause errors that one type of evalaution doesn't exist
+
+
     @staticmethod
     def pca_chosen_pathway(coordinates_files: list[Union[str, PathLike]], pathway_summary_file: str, output_dir: str):
         """
@@ -349,6 +488,7 @@ def pca_chosen_pathway(coordinates_files: list[Union[str, PathLike]], pathway_su
             rep_pathway = os.path.join(output_dir, f"{closest_to_kde_peak['datapoint_labels']}", "pathway.txt")
             rep_pathways.append(rep_pathway)
 
+        print(rep_pathways)
         return rep_pathways
 
     @staticmethod
@@ -536,3 +676,5 @@ def edge_dummy_function(mixed_edge_table: pd.DataFrame, undirected_edge_table: p
             undirected_edge_table.to_csv(f, index=False)
             f.write("\n\nDirected Edge Table\n")
             directed_edge_table.to_csv(f, index=False)
+
+

From 206c45351dcfdd1109184ec6f602396bd7b3f3ce Mon Sep 17 00:00:00 2001
From: Neha Talluri <78840540+ntalluri@users.noreply.github.com>
Date: Thu, 30 Oct 2025 14:01:12 -0500
Subject: [PATCH 02/10] Apply suggestions from code review

Co-authored-by: Tristan F.-R. <pub.tristanf@gmail.com>
---
 Snakefile           | 2 --
 spras/evaluation.py | 1 -
 2 files changed, 3 deletions(-)

diff --git a/Snakefile b/Snakefile
index a54ea6015..32dcc233b 100644
--- a/Snakefile
+++ b/Snakefile
@@ -113,8 +113,6 @@ def make_final_input(wildcards):
         final_input.extend(expand('{out_dir}{sep}{dataset_gold_standard_pair}-eval{sep}pr-curve-ensemble-nodes.png',out_dir=out_dir,sep=SEP,dataset_gold_standard_pair=dataset_gold_standard_node_pairs))
         final_input.extend(expand('{out_dir}{sep}{dataset_gold_standard_pair}-eval{sep}pr-curve-ensemble-nodes.txt',out_dir=out_dir,sep=SEP,dataset_gold_standard_pair=dataset_gold_standard_node_pairs))
         
-        # dummy file
-        final_input.extend(expand('{out_dir}{sep}{dataset_gold_standard_pair}-eval{sep}dummy-edge.txt',out_dir=out_dir,sep=SEP,dataset_gold_standard_pair=dataset_gold_standard_edge_pairs))
         final_input.extend(expand('{out_dir}{sep}{dataset_gold_standard_pair}-eval{sep}pr-pca-chosen-pathway-edges.txt',out_dir=out_dir,sep=SEP,dataset_gold_standard_pair=dataset_gold_standard_edge_pairs))
         final_input.extend(expand('{out_dir}{sep}{dataset_gold_standard_pair}-eval{sep}pr-pca-chosen-pathway-edges.png',out_dir=out_dir,sep=SEP,dataset_gold_standard_pair=dataset_gold_standard_edge_pairs))
 
diff --git a/spras/evaluation.py b/spras/evaluation.py
index c82e5d2ed..6ce035acb 100644
--- a/spras/evaluation.py
+++ b/spras/evaluation.py
@@ -488,7 +488,6 @@ def pca_chosen_pathway(coordinates_files: list[Union[str, PathLike]], pathway_su
             rep_pathway = os.path.join(output_dir, f"{closest_to_kde_peak['datapoint_labels']}", "pathway.txt")
             rep_pathways.append(rep_pathway)
 
-        print(rep_pathways)
         return rep_pathways
 
     @staticmethod

From 5a6c4a80b71306e2cf683741717a384b88089c56 Mon Sep 17 00:00:00 2001
From: ntalluri <nehatalluri@live.com>
Date: Thu, 30 Oct 2025 14:16:20 -0500
Subject: [PATCH 03/10] in process of aggregate

---
 Snakefile | 29 ++++++++++++++++++++++++++++-
 1 file changed, 28 insertions(+), 1 deletion(-)

diff --git a/Snakefile b/Snakefile
index a54ea6015..fb5261d77 100644
--- a/Snakefile
+++ b/Snakefile
@@ -126,6 +126,9 @@ def make_final_input(wildcards):
         final_input.extend(expand('{out_dir}{sep}{dataset_gold_standard_pair}-eval{sep}pr-curve-ensemble-nodes-per-algorithm-nodes.png',out_dir=out_dir,sep=SEP,dataset_gold_standard_pair=dataset_gold_standard_node_pairs))
         final_input.extend(expand('{out_dir}{sep}{dataset_gold_standard_pair}-eval{sep}pr-curve-ensemble-nodes-per-algorithm-nodes.txt',out_dir=out_dir,sep=SEP,dataset_gold_standard_pair=dataset_gold_standard_node_pairs))
 
+        edge_pca_chosen_pr_file = SEP.join([out_dir, '{dataset_gold_standard_pair}-eval', 'pr-pca-chosen-pathway-per-algorithm-edges.txt']),
+        edge_pca_chosen_pr_png = SEP.join([out_dir, '{dataset_gold_standard_pair}-eval', 'pr-pca-chosen-pathway-per-algorithm-edges.png']),
+        
     # Since (formatted) pathway files are interesting to the user, we preserve them.
     final_input.extend(expand('{out_dir}{sep}{dataset}-{algorithm_params}{sep}pathway.txt', out_dir=out_dir, sep=SEP, dataset=dataset_labels, algorithm_params=algorithms_with_params))
 
@@ -518,7 +521,7 @@ def collect_pca_coordinates_per_algo_per_dataset(wildcards):
 
 # Run PCA chosen to select the representative pathway per algorithm pathway outputs for a given dataset, 
 # then evaluate with precision and recall against the corresponding gold standard
-rule evaluation_per_algo_pca_chosen:
+rule evaluation_per_algo_pca_chosen_nodes:
     input: 
         node_gold_standard_file = get_gold_standard_pickle_file,
         pca_coordinates_file = collect_pca_coordinates_per_algo_per_dataset,
@@ -532,6 +535,30 @@ rule evaluation_per_algo_pca_chosen:
         pr_df = Evaluation.node_precision_and_recall(pca_chosen_pathways, node_table)
         Evaluation.precision_and_recall_pca_chosen_pathway(pr_df, output.node_pca_chosen_pr_file, output.node_pca_chosen_pr_png, include_aggregate_algo_eval)
 
+rule evaluation_per_algo_pca_chosen_edges:
+    input: 
+        edge_gold_standard_file = get_gold_standard_pickle_file,
+        pca_coordinates_file = collect_pca_coordinates_per_algo_per_dataset,
+        pathway_summary_file = collect_summary_statistics_per_dataset
+    output: 
+        edge_pca_chosen_pr_file = SEP.join([out_dir, '{dataset_gold_standard_pair}-eval', 'pr-pca-chosen-pathway-per-algorithm-edges.txt']),
+        edge_pca_chosen_pr_png = SEP.join([out_dir, '{dataset_gold_standard_pair}-eval', 'pr-pca-chosen-pathway-per-algorithm-edges.png']),
+    run:
+        mixed_edge_table = Evaluation.from_file(input.edge_gold_standard_file).mixed_edge_table
+        undirected_edge_table = Evaluation.from_file(input.edge_gold_standard_file).undirected_edge_table
+        directed_edge_table = Evaluation.from_file(input.edge_gold_standard_file).directed_edge_table
+        
+        pca_chosen_pathways = Evaluation.pca_chosen_pathway(input.pca_coordinates_file, input.pathway_summary_file, out_dir)
+        pr_df = Evaluation.edge_precision_and_recall(pca_chosen_pathways, mixed_edge_table, directed_edge_table, undirected_edge_table)
+        
+        Evaluation.precision_and_recall_pca_chosen_pathway(pr_df, output.edge_pca_chosen_pr_file, output.edge_pca_chosen_pr_png, include_aggregate_algo_eval, edge_evaluation=True)
+
+# Returns pca coordinates for a specific algorithm and dataset
+def collect_pca_coordinates_per_algo_per_dataset(wildcards):
+    dataset_label = get_dataset_label(wildcards)
+    return expand('{out_dir}{sep}{dataset}-ml{sep}{algorithm}-pca-coordinates.txt', out_dir=out_dir, sep=SEP, dataset=dataset_label, algorithm=algorithms_mult_param_combos) #TODO we are using algos with mult param combos, what to do when empty?
+
+
 # Return the dataset pickle file for a specific dataset
 def get_dataset_pickle_file(wildcards):
     dataset_label = get_dataset_label(wildcards)

From bead0e7f232d613b7e2ae8879e259a78adaad5ed Mon Sep 17 00:00:00 2001
From: ntalluri <nehatalluri@live.com>
Date: Thu, 30 Oct 2025 14:50:19 -0500
Subject: [PATCH 04/10] added aggregate per algorithm

---
 Snakefile           | 19 +++++++++++--------
 spras/evaluation.py |  7 ++++---
 2 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/Snakefile b/Snakefile
index 50ea1a131..fcb45ed69 100644
--- a/Snakefile
+++ b/Snakefile
@@ -106,6 +106,7 @@ def make_final_input(wildcards):
         final_input.extend(expand('{out_dir}{sep}{dataset}-ml{sep}{algorithm}-jaccard-heatmap.png',out_dir=out_dir,sep=SEP,dataset=dataset_labels,algorithm=algorithms))
 
     if _config.config.analysis_include_evaluation:
+        # node evaluation
         final_input.extend(expand('{out_dir}{sep}{dataset_gold_standard_pair}-eval{sep}pr-per-pathway-nodes.txt',out_dir=out_dir,sep=SEP,dataset_gold_standard_pair=dataset_gold_standard_node_pairs,algorithm_params=algorithms_with_params))
         final_input.extend(expand('{out_dir}{sep}{dataset_gold_standard_pair}-eval{sep}pr-per-pathway-nodes.png',out_dir=out_dir,sep=SEP,dataset_gold_standard_pair=dataset_gold_standard_node_pairs))
         final_input.extend(expand('{out_dir}{sep}{dataset_gold_standard_pair}-eval{sep}pr-pca-chosen-pathway-nodes.txt',out_dir=out_dir,sep=SEP,dataset_gold_standard_pair=dataset_gold_standard_node_pairs))
@@ -113,10 +114,12 @@ def make_final_input(wildcards):
         final_input.extend(expand('{out_dir}{sep}{dataset_gold_standard_pair}-eval{sep}pr-curve-ensemble-nodes.png',out_dir=out_dir,sep=SEP,dataset_gold_standard_pair=dataset_gold_standard_node_pairs))
         final_input.extend(expand('{out_dir}{sep}{dataset_gold_standard_pair}-eval{sep}pr-curve-ensemble-nodes.txt',out_dir=out_dir,sep=SEP,dataset_gold_standard_pair=dataset_gold_standard_node_pairs))
         
+        # edge evaluation
         final_input.extend(expand('{out_dir}{sep}{dataset_gold_standard_pair}-eval{sep}pr-pca-chosen-pathway-edges.txt',out_dir=out_dir,sep=SEP,dataset_gold_standard_pair=dataset_gold_standard_edge_pairs))
         final_input.extend(expand('{out_dir}{sep}{dataset_gold_standard_pair}-eval{sep}pr-pca-chosen-pathway-edges.png',out_dir=out_dir,sep=SEP,dataset_gold_standard_pair=dataset_gold_standard_edge_pairs))
 
     if _config.config.analysis_include_evaluation_aggregate_algo:
+        # node evaluation
         final_input.extend(expand('{out_dir}{sep}{dataset_gold_standard_pair}-eval{sep}pr-per-pathway-for-{algorithm}-nodes.txt',out_dir=out_dir,sep=SEP,dataset_gold_standard_pair=dataset_gold_standard_node_pairs,algorithm=algorithms))
         final_input.extend(expand('{out_dir}{sep}{dataset_gold_standard_pair}-eval{sep}pr-per-pathway-for-{algorithm}-nodes.png',out_dir=out_dir,sep=SEP,dataset_gold_standard_pair=dataset_gold_standard_node_pairs,algorithm=algorithms))
         final_input.extend(expand('{out_dir}{sep}{dataset_gold_standard_pair}-eval{sep}pr-pca-chosen-pathway-per-algorithm-nodes.txt',out_dir=out_dir,sep=SEP,dataset_gold_standard_pair=dataset_gold_standard_node_pairs))
@@ -124,9 +127,10 @@ def make_final_input(wildcards):
         final_input.extend(expand('{out_dir}{sep}{dataset_gold_standard_pair}-eval{sep}pr-curve-ensemble-nodes-per-algorithm-nodes.png',out_dir=out_dir,sep=SEP,dataset_gold_standard_pair=dataset_gold_standard_node_pairs))
         final_input.extend(expand('{out_dir}{sep}{dataset_gold_standard_pair}-eval{sep}pr-curve-ensemble-nodes-per-algorithm-nodes.txt',out_dir=out_dir,sep=SEP,dataset_gold_standard_pair=dataset_gold_standard_node_pairs))
 
-        edge_pca_chosen_pr_file = SEP.join([out_dir, '{dataset_gold_standard_pair}-eval', 'pr-pca-chosen-pathway-per-algorithm-edges.txt']),
-        edge_pca_chosen_pr_png = SEP.join([out_dir, '{dataset_gold_standard_pair}-eval', 'pr-pca-chosen-pathway-per-algorithm-edges.png']),
-        
+        # edge evaluation
+        final_input.extend(expand('{out_dir}{sep}{dataset_gold_standard_pair}-eval{sep}pr-pca-chosen-pathway-per-algorithm-edges.txt',out_dir=out_dir,sep=SEP,dataset_gold_standard_pair=dataset_gold_standard_edge_pairs))
+        final_input.extend(expand('{out_dir}{sep}{dataset_gold_standard_pair}-eval{sep}pr-pca-chosen-pathway-per-algorithm-edges.png',out_dir=out_dir,sep=SEP,dataset_gold_standard_pair=dataset_gold_standard_edge_pairs))
+
     # Since (formatted) pathway files are interesting to the user, we preserve them.
     final_input.extend(expand('{out_dir}{sep}{dataset}-{algorithm_params}{sep}pathway.txt', out_dir=out_dir, sep=SEP, dataset=dataset_labels, algorithm_params=algorithms_with_params))
 
@@ -431,7 +435,6 @@ def get_dataset_label(wildcards):
     dataset = parts[0]
     return dataset
 
-
 # Returns all pathways for a specific dataset
 def collect_pathways_per_dataset(wildcards):
     dataset_label = get_dataset_label(wildcards)
@@ -522,21 +525,21 @@ def collect_pca_coordinates_per_algo_per_dataset(wildcards):
 rule evaluation_per_algo_pca_chosen_nodes:
     input: 
         node_gold_standard_file = get_gold_standard_pickle_file,
-        pca_coordinates_file = collect_pca_coordinates_per_algo_per_dataset,
+        pca_coordinates_files = collect_pca_coordinates_per_algo_per_dataset,
         pathway_summary_file = collect_summary_statistics_per_dataset
     output: 
         node_pca_chosen_pr_file = SEP.join([out_dir, '{dataset_gold_standard_pair}-eval', 'pr-pca-chosen-pathway-per-algorithm-nodes.txt']),
         node_pca_chosen_pr_png = SEP.join([out_dir, '{dataset_gold_standard_pair}-eval', 'pr-pca-chosen-pathway-per-algorithm-nodes.png']),
     run:
         node_table = Evaluation.from_file(input.node_gold_standard_file).node_table
-        pca_chosen_pathways = Evaluation.pca_chosen_pathway(input.pca_coordinates_file, input.pathway_summary_file, out_dir)
+        pca_chosen_pathways = Evaluation.pca_chosen_pathway(input.pca_coordinates_files, input.pathway_summary_file, out_dir)
         pr_df = Evaluation.node_precision_and_recall(pca_chosen_pathways, node_table)
         Evaluation.precision_and_recall_pca_chosen_pathway(pr_df, output.node_pca_chosen_pr_file, output.node_pca_chosen_pr_png, include_aggregate_algo_eval)
 
 rule evaluation_per_algo_pca_chosen_edges:
     input: 
         edge_gold_standard_file = get_gold_standard_pickle_file,
-        pca_coordinates_file = collect_pca_coordinates_per_algo_per_dataset,
+        pca_coordinates_files = collect_pca_coordinates_per_algo_per_dataset,
         pathway_summary_file = collect_summary_statistics_per_dataset
     output: 
         edge_pca_chosen_pr_file = SEP.join([out_dir, '{dataset_gold_standard_pair}-eval', 'pr-pca-chosen-pathway-per-algorithm-edges.txt']),
@@ -546,7 +549,7 @@ rule evaluation_per_algo_pca_chosen_edges:
         undirected_edge_table = Evaluation.from_file(input.edge_gold_standard_file).undirected_edge_table
         directed_edge_table = Evaluation.from_file(input.edge_gold_standard_file).directed_edge_table
         
-        pca_chosen_pathways = Evaluation.pca_chosen_pathway(input.pca_coordinates_file, input.pathway_summary_file, out_dir)
+        pca_chosen_pathways = Evaluation.pca_chosen_pathway(input.pca_coordinates_files, input.pathway_summary_file, out_dir)
         pr_df = Evaluation.edge_precision_and_recall(pca_chosen_pathways, mixed_edge_table, directed_edge_table, undirected_edge_table)
         
         Evaluation.precision_and_recall_pca_chosen_pathway(pr_df, output.edge_pca_chosen_pr_file, output.edge_pca_chosen_pr_png, include_aggregate_algo_eval, edge_evaluation=True)
diff --git a/spras/evaluation.py b/spras/evaluation.py
index 6ce035acb..9427dfd22 100644
--- a/spras/evaluation.py
+++ b/spras/evaluation.py
@@ -202,6 +202,8 @@ def nodes_visualize_precision_and_recall_plot(pr_df: pd.DataFrame, output_file:
                 "The input DataFrame must include a preprocessed 'Algorithm' column to calculate precision and recall per pathway file."
             )
 
+        pr_df.sort_values(by=['Algorithm', 'Recall', 'Pathway'], axis=0, ascending=True, inplace=True)
+
         # save figure
         plt.figure(figsize=(10, 7))
         color_palette = create_palette(pr_df['Algorithm'].tolist())
@@ -313,9 +315,10 @@ def edges_visualize_precision_and_recall_plot(pr_df: pd.DataFrame, output_file:
                 "The input DataFrame must include a preprocessed 'Gold_Standard_Type' column indicating the edge directionality used for the gold standard, which is required to visualize precision and recall for each pathway file per gold standard type."
             )
 
+        pr_df.sort_values(by=['Algorithm', 'Gold_Standard_Type', 'Recall', 'Pathway'], axis=0, ascending=True, inplace=True)
 
         gs_types = pr_df["Gold_Standard_Type"].unique().tolist()
-        fig, axes = plt.subplots(1, len(gs_types), figsize=(6 * len(gs_types), 5), sharex=True, sharey=True, constrained_layout=True)
+        fig, axes = plt.subplots(1, len(gs_types), figsize=(6 * len(gs_types), 5), sharex=True, sharey=True)
         color_palette = create_palette(pr_df['Algorithm'].tolist())
 
         for ax, gs_type in zip(axes, gs_types, strict=True):
@@ -361,7 +364,6 @@ def precision_and_recall_per_pathway(pr_df: pd.DataFrame, output_file: str | Pat
         """
         if not pr_df.empty:
             pr_df['Algorithm'] = pr_df['Pathway'].apply(lambda p: Path(p).parent.name.split('-')[1])
-            pr_df.sort_values(by=['Recall', 'Pathway'], axis=0, ascending=True, inplace=True)
 
             if aggregate_per_algorithm:
                 # Guaranteed to only have one algorithm in Algorithm column
@@ -396,7 +398,6 @@ def precision_and_recall_pca_chosen_pathway(pr_df: pd.DataFrame, output_file: st
 
         if not pr_df.empty:
             pr_df['Algorithm'] = pr_df['Pathway'].apply(lambda p: Path(p).parent.name.split('-')[1])
-            pr_df.sort_values(by=['Recall', 'Pathway'], axis=0, ascending=True, inplace=True)
 
             if not edge_evaluation:
                 if aggregate_per_algorithm:

From 94149b9152f4b435a7abd0ff9fb593d06d953584 Mon Sep 17 00:00:00 2001
From: ntalluri <nehatalluri@live.com>
Date: Thu, 30 Oct 2025 14:52:59 -0500
Subject: [PATCH 05/10] add a todo and remove the plan I made

---
 spras/evaluation.py | 13 ++-----------
 1 file changed, 2 insertions(+), 11 deletions(-)

diff --git a/spras/evaluation.py b/spras/evaluation.py
index 9427dfd22..1dade4b0f 100644
--- a/spras/evaluation.py
+++ b/spras/evaluation.py
@@ -316,6 +316,7 @@ def edges_visualize_precision_and_recall_plot(pr_df: pd.DataFrame, output_file:
             )
 
         pr_df.sort_values(by=['Algorithm', 'Gold_Standard_Type', 'Recall', 'Pathway'], axis=0, ascending=True, inplace=True)
+        # TODO: fix the layout of the output png
 
         gs_types = pr_df["Gold_Standard_Type"].unique().tolist()
         fig, axes = plt.subplots(1, len(gs_types), figsize=(6 * len(gs_types), 5), sharex=True, sharey=True)
@@ -342,7 +343,7 @@ def edges_visualize_precision_and_recall_plot(pr_df: pd.DataFrame, output_file:
         fig.supylabel("Precision")
         fig.suptitle(title)
         handles, labels = axes[0].get_legend_handles_labels()
-        fig.legend(handles, labels, loc="upper right") # TODO: when doing aggregate per algorithm, check if this needs to be fixed to be in a different place (issue might be constrained_layout)
+        fig.legend(handles, labels, loc="upper right")
         plt.savefig(output_png)
         plt.close(fig)
 
@@ -430,16 +431,6 @@ def precision_and_recall_pca_chosen_pathway(pr_df: pd.DataFrame, output_file: st
                 plt.savefig(output_png)
                 plt.close()
 
-    # TODO
-    # need to make a edge_precision_recall function to make the pr_df
-    # I think then the precision_and_recall_pca_chosen_pathway function can be reused but needs to be updated to be able to differentiate between nodes or edges
-    # i think I can do that with a boolean
-    # then I need to make a edges_visualize_precision_and_recall_plot that is called
-    # these can then be reused for no parameter selection evaluation
-    # i think I will need to make a new snakemake rule for each of the evaluatuon because the gold standards only include nodes or edges,
-    # sharing the same one will cause errors that one type of evalaution doesn't exist
-
-
     @staticmethod
     def pca_chosen_pathway(coordinates_files: list[Union[str, PathLike]], pathway_summary_file: str, output_dir: str):
         """

From 7277120e508008127166ab450877a80ef6b42688 Mon Sep 17 00:00:00 2001
From: ntalluri <nehatalluri@live.com>
Date: Fri, 7 Nov 2025 15:00:15 -0600
Subject: [PATCH 06/10] updated test cases and reworded comments

---
 spras/evaluation.py                           | 30 ++++----
 ...pected-pr-per-pathway-pca-chosen-edges.txt |  4 +
 ...ected-pr-per-pathway-pca-chosen-nodes.txt} |  0
 .../evaluate/input/gs_directed_edge_table.csv |  3 +
 test/evaluate/input/gs_mixed_edge_table.csv   |  2 +
 .../input/gs_undirected_edge_table.csv        |  2 +
 test/evaluate/test_evaluate.py                | 76 ++++++++++++++-----
 7 files changed, 87 insertions(+), 30 deletions(-)
 create mode 100644 test/evaluate/expected/expected-pr-per-pathway-pca-chosen-edges.txt
 rename test/evaluate/expected/{expected-pr-per-pathway-pca-chosen.txt => expected-pr-per-pathway-pca-chosen-nodes.txt} (100%)
 create mode 100644 test/evaluate/input/gs_directed_edge_table.csv
 create mode 100644 test/evaluate/input/gs_mixed_edge_table.csv
 create mode 100644 test/evaluate/input/gs_undirected_edge_table.csv

diff --git a/spras/evaluation.py b/spras/evaluation.py
index 1dade4b0f..fb878cc8f 100644
--- a/spras/evaluation.py
+++ b/spras/evaluation.py
@@ -152,7 +152,7 @@ def node_precision_and_recall(file_paths: Iterable[Union[str, PathLike]], node_t
         This function takes a list of file paths corresponding to pathway reconstruction algorithm outputs,
         each formatted as a tab-separated file with columns 'Node1', 'Node2', 'Rank', and 'Direction'.
         It compares the set of predicted nodes (from both columns Node1 and Node2) to a provided gold standard node table
-        and computes precision and recall per file.
+        and computes a precision and recall per file.
 
         @param file_paths: list of file paths of pathway reconstruction algorithm outputs
         @param node_table: the gold standard nodes
@@ -239,7 +239,7 @@ def edge_precision_and_recall(file_paths: Iterable[Union[str, PathLike]], mixed_
 
         This function takes a list of file paths corresponding to pathway reconstruction algorithm outputs,
         each formatted as a tab-separated file with columns 'Node1', 'Node2', 'Rank', and 'Direction'.
-        It compares the set of predicted edges to the three provided gold standard edge tables and computes precision and recall per file.
+        It compares the set of predicted edges to the three provided gold standard edge tables and computes a precision and recall per file.
 
         @param file_paths: list of file paths of pathway reconstruction algorithm outputs
         @param mixed_edge_table: the gold standard edges that includes directed and undirected edges
@@ -249,7 +249,7 @@ def edge_precision_and_recall(file_paths: Iterable[Union[str, PathLike]], mixed_
                 - 'Pathway': Path object corresponding to each pathway file
                 - 'Precision': Precision of predicted nodes vs. gold standard nodes
                 - 'Recall': Recall of predicted nodes vs. gold standard nodes
-                - 'Gold_Standard_Type': Which  gold standard was used to calculate the precision and recall
+                - 'Gold_Standard_Type': Which gold standard was used to calculate the precision and recall
         """
 
         y_true_mixed = set(map(tuple, mixed_edge_table[['Interactor1', 'Interactor2', 'Direction']].values))
@@ -291,12 +291,12 @@ def edge_precision_and_recall(file_paths: Iterable[Union[str, PathLike]], mixed_
     @staticmethod
     def edges_visualize_precision_and_recall_plot(pr_df: pd.DataFrame, output_file: str | PathLike, output_png: str | PathLike, title: str):
         """
-        Generates three scatter plot subplots showing edge precision and recall values for each pathway across three edge gold standard types,
+        Generates three scatter subplots showing edge precision and recall values for each pathway across the three edge gold standard types,
         and saves both the resulting plots and the corresponding data.
 
-        This function is intended for visualizing how different pathway reconstructions perform
-        (not a precision-recall curve) showing the precision and recall of each parameter combination
-        for each algorithm per edge gold standard dataset.
+        This function is intended for visualizing how different pathway reconstructions perform,
+        showing the precision and recall of each parameter combination for each algorithm across
+        each edge gold standard dataset (not a precision-recall curve).
 
         @param pr_df: Dataframe of calculated precision and recall for each pathway file per edge gold standard.
                       Must include a preprocessed 'Algorithm' column and 'Gold_Standard_Type" column
@@ -355,8 +355,9 @@ def edges_visualize_precision_and_recall_plot(pr_df: pd.DataFrame, output_file:
     def precision_and_recall_per_pathway(pr_df: pd.DataFrame, output_file: str | PathLike, output_png: str | PathLike, aggregate_per_algorithm: bool = False):
         """
         Function for visualizing per pathway precision and recall across all algorithms. Each point in the plot represents
-        a single pathway reconstruction. If `aggregate_per_algorithm` is set to True, each plot is restricted to a single
-        algorithm and titled accordingly.
+        a single pathway reconstruction.
+
+        If `aggregate_per_algorithm` is set to True, each plot is restricted to a single algorithm and titled accordingly.
 
         @param pr_df: Dataframe of calculated precision and recall for each pathway file
         @param output_file: the filename to save the precision and recall of each pathway
@@ -385,15 +386,18 @@ def precision_and_recall_pca_chosen_pathway(pr_df: pd.DataFrame, output_file: st
 
         Function for visualizing the precision and recall of the single parameter combination selected via PCA,
         either for each algorithm individually or one combination shared across all algorithms. Each point represents
-        a pathway reconstruction corresponding to the PCA-selected parameter combination. If `aggregate_per_algorithm`
-        is True, the plot includes a pca chosen pathway per algorithm and titled accordingly. If `edge_evaluation` is True,
-        the plot will include the evaluation across the three gold standard edge files.
+        a pathway reconstruction corresponding to the PCA-selected parameter combination.
+
+        If `aggregate_per_algorithm` is True, the output_png includes a pca chosen pathway per algorithm and titled accordingly.
+
+        If `edge_evaluation` is True, the output PNG shows performance across all three edge gold standards;
+        if False, the output PNG shows evaluation for the single node gold standard.
 
         @param pr_df: Dataframe of calculated precision and recall for each pathway file
         @param output_file: the filename to save the precision and recall of each pathway
         @param output_png: the filename to plot the precision and recall of each pathway (not a PRC)
         @param aggregate_per_algorithm: Boolean indicating if this function is used per algorithm (Default False)
-        @param edge_evaluation: Boolean indicating if this function is used for creating edge_evaluation plots (Default False; used for node evaluation)
+        @param edge_evaluation: Boolean indicating if this function is used for creating edge_evaluation plots (Default False)
         """
         # TODO update to add in the pathways for the algorithms that do not provide a pca chosen pathway https://github.com/Reed-CompBio/spras/issues/341
 
diff --git a/test/evaluate/expected/expected-pr-per-pathway-pca-chosen-edges.txt b/test/evaluate/expected/expected-pr-per-pathway-pca-chosen-edges.txt
new file mode 100644
index 000000000..9134b4503
--- /dev/null
+++ b/test/evaluate/expected/expected-pr-per-pathway-pca-chosen-edges.txt
@@ -0,0 +1,4 @@
+Pathway	Precision	Recall	Gold_Standard_Type
+test/evaluate/input/data-test-params-123/pathway.txt	0.0	0.0	directed
+test/evaluate/input/data-test-params-123/pathway.txt	0.5	0.5	mixed
+test/evaluate/input/data-test-params-123/pathway.txt	1.0	1.0	undirected
diff --git a/test/evaluate/expected/expected-pr-per-pathway-pca-chosen.txt b/test/evaluate/expected/expected-pr-per-pathway-pca-chosen-nodes.txt
similarity index 100%
rename from test/evaluate/expected/expected-pr-per-pathway-pca-chosen.txt
rename to test/evaluate/expected/expected-pr-per-pathway-pca-chosen-nodes.txt
diff --git a/test/evaluate/input/gs_directed_edge_table.csv b/test/evaluate/input/gs_directed_edge_table.csv
new file mode 100644
index 000000000..c3755a19c
--- /dev/null
+++ b/test/evaluate/input/gs_directed_edge_table.csv
@@ -0,0 +1,3 @@
+A	B	D
+B	A	D
+B	C	D
\ No newline at end of file
diff --git a/test/evaluate/input/gs_mixed_edge_table.csv b/test/evaluate/input/gs_mixed_edge_table.csv
new file mode 100644
index 000000000..d819aa76e
--- /dev/null
+++ b/test/evaluate/input/gs_mixed_edge_table.csv
@@ -0,0 +1,2 @@
+A	B	U
+B	C	D
\ No newline at end of file
diff --git a/test/evaluate/input/gs_undirected_edge_table.csv b/test/evaluate/input/gs_undirected_edge_table.csv
new file mode 100644
index 000000000..af85f211b
--- /dev/null
+++ b/test/evaluate/input/gs_undirected_edge_table.csv
@@ -0,0 +1,2 @@
+A	B	U
+B	C	U
\ No newline at end of file
diff --git a/test/evaluate/test_evaluate.py b/test/evaluate/test_evaluate.py
index ce50350e5..e909cfc21 100644
--- a/test/evaluate/test_evaluate.py
+++ b/test/evaluate/test_evaluate.py
@@ -13,6 +13,9 @@
 OUT_DIR = 'test/evaluate/output/'
 EXPECT_DIR = 'test/evaluate/expected/'
 GS_NODE_TABLE = pd.read_csv(INPUT_DIR + 'gs_node_table.csv', header=0)
+GS_MIXED_EDGE_TABLE  = pd.read_csv(INPUT_DIR + 'gs_mixed_edge_table.csv', names=["Interactor1", "Interactor2", "Direction"], sep="\t")
+GS_DIRECTED_EDGE_TABLE  = pd.read_csv(INPUT_DIR + 'gs_directed_edge_table.csv', names=["Interactor1", "Interactor2", "Direction"], sep="\t")
+GS_UNDIRECTED_EDGE_TABLE  = pd.read_csv(INPUT_DIR + 'gs_undirected_edge_table.csv', names=["Interactor1", "Interactor2", "Direction"], sep="\t")
 SUMMARY_FILE = INPUT_DIR + 'example_summary.txt'
 
 
@@ -81,26 +84,43 @@ def test_node_precision_recall_per_pathway_not_provided(self):
             Evaluation.precision_and_recall_per_pathway(pr_df, output_file, output_png)
 
     def test_node_precision_recall_pca_chosen_pathway_not_provided(self):
-        output_file = Path( OUT_DIR + 'pr-per-pathway-pca-chosen-not-provided.txt')
-        output_file.unlink(missing_ok=True)
-        output_png = Path(OUT_DIR + 'pr-per-pathway-pca-chosen-not-provided.png')
-        output_png.unlink(missing_ok=True)
+        node_output_file = Path( OUT_DIR + 'pr-per-pathway-pca-chosen-not-provided_nodes.txt')
+        node_output_file.unlink(missing_ok=True)
+        node_output_png = Path(OUT_DIR + 'pr-per-pathway-pca-chosen-not-provided_nodes.png')
+        node_output_png.unlink(missing_ok=True)
+
         file_paths = []
 
         pr_df = Evaluation.node_precision_and_recall(file_paths, GS_NODE_TABLE)
-        Evaluation.precision_and_recall_pca_chosen_pathway(pr_df, output_file, output_png)
+        Evaluation.precision_and_recall_pca_chosen_pathway(pr_df, node_output_file, node_output_png)
 
-        output = pd.read_csv(output_file, sep='\t', header=0).round(8)
+        node_output = pd.read_csv(node_output_file, sep='\t', header=0).round(8)
         expected = pd.read_csv(EXPECT_DIR + 'expected-pr-pca-chosen-not-provided.txt', sep='\t',  header=0).round(8)
 
-        assert output.equals(expected)
-        assert output_png.exists()
+        assert node_output.equals(expected)
+        assert node_output_png.exists()
+
+    def test_edge_precision_recall_pca_chosen_pathway_not_provided(self):
+        edge_output_file = Path( OUT_DIR + 'pr-per-pathway-pca-chosen-not-provided_edges.txt')
+        edge_output_file.unlink(missing_ok=True)
+        edge_output_png = Path(OUT_DIR + 'pr-per-pathway-pca-chosen-not-provided_edges.png')
+        edge_output_png.unlink(missing_ok=True)
+
+        file_paths = []
+
+        pr_df = Evaluation.edge_precision_and_recall(file_paths, GS_MIXED_EDGE_TABLE, GS_DIRECTED_EDGE_TABLE, GS_UNDIRECTED_EDGE_TABLE)
+        Evaluation.precision_and_recall_pca_chosen_pathway(pr_df, edge_output_file, edge_output_png)
+        edge_output = pd.read_csv(edge_output_file, sep='\t', header=0).round(8)
+        expected = pd.read_csv(EXPECT_DIR + 'expected-pr-pca-chosen-not-provided.txt', sep='\t',  header=0).round(8)
+
+        assert edge_output.equals(expected)
+        assert edge_output_png.exists()
 
     def test_node_precision_recall_pca_chosen_pathway(self):
-        output_file = Path(OUT_DIR + 'pr-per-pathway-pca-chosen.txt')
-        output_file.unlink(missing_ok=True)
-        output_png = Path(OUT_DIR + 'pr-per-pathway-pca-chosen.png')
-        output_png.unlink(missing_ok=True)
+        node_output_file = Path(OUT_DIR + 'pr-per-pathway-pca-chosen_nodes.txt')
+        node_output_file.unlink(missing_ok=True)
+        node_output_png = Path(OUT_DIR + 'pr-per-pathway-pca-chosen_nodes.png')
+        node_output_png.unlink(missing_ok=True)
         output_coordinates = Path(OUT_DIR + 'pca-coordinates.tsv')
         output_coordinates.unlink(missing_ok=True)
 
@@ -111,16 +131,38 @@ def test_node_precision_recall_pca_chosen_pathway(self):
         ml.pca(dataframe, OUT_DIR + 'pca.png', OUT_DIR + 'pca-variance.txt', str(output_coordinates), kde=True, remove_empty_pathways=True)
 
         pathway = Evaluation.pca_chosen_pathway([output_coordinates], SUMMARY_FILE, INPUT_DIR)
-
         pr_df = Evaluation.node_precision_and_recall(pathway, GS_NODE_TABLE)
-        Evaluation.precision_and_recall_pca_chosen_pathway(pr_df, output_file, output_png, True)
+        Evaluation.precision_and_recall_pca_chosen_pathway(pr_df, node_output_file, node_output_png, True)
 
+        chosen = pd.read_csv(node_output_file, sep='\t', header=0).round(8)
+        expected = pd.read_csv(EXPECT_DIR + 'expected-pr-per-pathway-pca-chosen-nodes.txt', sep='\t',  header=0).round(8)
 
-        chosen = pd.read_csv(output_file, sep='\t', header=0).round(8)
-        expected = pd.read_csv(EXPECT_DIR + 'expected-pr-per-pathway-pca-chosen.txt', sep='\t',  header=0).round(8)
+        assert chosen.equals(expected)
+        assert node_output_png.exists()
+
+    def test_edge_precision_recall_pca_chosen_pathway(self):
+        edge_output_file = Path(OUT_DIR + 'pr-per-pathway-pca-chosen_edges.txt')
+        edge_output_file.unlink(missing_ok=True)
+        edge_output_png = Path(OUT_DIR + 'pr-per-pathway-pca-chosen_edges.png')
+        edge_output_png.unlink(missing_ok=True)
+        output_coordinates = Path(OUT_DIR + 'pca-coordinates.tsv')
+        output_coordinates.unlink(missing_ok=True)
+
+        file_paths = [INPUT_DIR + 'data-test-params-123/pathway.txt', INPUT_DIR + 'data-test-params-456/pathway.txt',
+                      INPUT_DIR + 'data-test-params-789/pathway.txt',  INPUT_DIR + 'data-test-params-empty/pathway.txt']
+
+        dataframe = ml.summarize_networks(file_paths)
+        ml.pca(dataframe, OUT_DIR + 'pca.png', OUT_DIR + 'pca-variance.txt', str(output_coordinates), kde=True, remove_empty_pathways=True)
+
+        pathway = Evaluation.pca_chosen_pathway([output_coordinates], SUMMARY_FILE, INPUT_DIR)
+        pr_df = Evaluation.edge_precision_and_recall(pathway, GS_MIXED_EDGE_TABLE, GS_DIRECTED_EDGE_TABLE, GS_UNDIRECTED_EDGE_TABLE)
+        Evaluation.precision_and_recall_pca_chosen_pathway(pr_df, edge_output_file, edge_output_png, True, True)
+
+        chosen = pd.read_csv(edge_output_file, sep='\t', header=0).round(8)
+        expected = pd.read_csv(EXPECT_DIR + 'expected-pr-per-pathway-pca-chosen-edges.txt', sep='\t',  header=0).round(8)
 
         assert chosen.equals(expected)
-        assert output_png.exists()
+        assert edge_output_png.exists()
 
     def test_node_ensemble(self):
         out_path_file = Path(OUT_DIR + 'node-ensemble.csv')

From 26f3d54d0602ae6a107b08108f4554bb8d0f443c Mon Sep 17 00:00:00 2001
From: ntalluri <nehatalluri@live.com>
Date: Fri, 7 Nov 2025 15:24:59 -0600
Subject: [PATCH 07/10] attempt to fix the spacing

---
 spras/evaluation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/spras/evaluation.py b/spras/evaluation.py
index fb878cc8f..a123661ec 100644
--- a/spras/evaluation.py
+++ b/spras/evaluation.py
@@ -319,7 +319,7 @@ def edges_visualize_precision_and_recall_plot(pr_df: pd.DataFrame, output_file:
         # TODO: fix the layout of the output png
 
         gs_types = pr_df["Gold_Standard_Type"].unique().tolist()
-        fig, axes = plt.subplots(1, len(gs_types), figsize=(6 * len(gs_types), 5), sharex=True, sharey=True)
+        fig, axes = plt.subplots(1, len(gs_types), figsize=(6 * len(gs_types), 5))
         color_palette = create_palette(pr_df['Algorithm'].tolist())
 
         for ax, gs_type in zip(axes, gs_types, strict=True):

From f31d598ed7309acd66b914884c33651cb9e4f020 Mon Sep 17 00:00:00 2001
From: ntalluri <nehatalluri@live.com>
Date: Fri, 7 Nov 2025 16:16:37 -0600
Subject: [PATCH 08/10] made changes based on review

---
 Snakefile           |  6 ------
 spras/evaluation.py | 27 +++------------------------
 2 files changed, 3 insertions(+), 30 deletions(-)

diff --git a/Snakefile b/Snakefile
index fcb45ed69..ea75092bc 100644
--- a/Snakefile
+++ b/Snakefile
@@ -554,12 +554,6 @@ rule evaluation_per_algo_pca_chosen_edges:
         
         Evaluation.precision_and_recall_pca_chosen_pathway(pr_df, output.edge_pca_chosen_pr_file, output.edge_pca_chosen_pr_png, include_aggregate_algo_eval, edge_evaluation=True)
 
-# Returns pca coordinates for a specific algorithm and dataset
-def collect_pca_coordinates_per_algo_per_dataset(wildcards):
-    dataset_label = get_dataset_label(wildcards)
-    return expand('{out_dir}{sep}{dataset}-ml{sep}{algorithm}-pca-coordinates.txt', out_dir=out_dir, sep=SEP, dataset=dataset_label, algorithm=algorithms_mult_param_combos) #TODO we are using algos with mult param combos, what to do when empty?
-
-
 # Return the dataset pickle file for a specific dataset
 def get_dataset_pickle_file(wildcards):
     dataset_label = get_dataset_label(wildcards)
diff --git a/spras/evaluation.py b/spras/evaluation.py
index a123661ec..928be684c 100644
--- a/spras/evaluation.py
+++ b/spras/evaluation.py
@@ -152,7 +152,7 @@ def node_precision_and_recall(file_paths: Iterable[Union[str, PathLike]], node_t
         This function takes a list of file paths corresponding to pathway reconstruction algorithm outputs,
         each formatted as a tab-separated file with columns 'Node1', 'Node2', 'Rank', and 'Direction'.
         It compares the set of predicted nodes (from both columns Node1 and Node2) to a provided gold standard node table
-        and computes a precision and recall per file.
+        and computes precision and recall per file.
 
         @param file_paths: list of file paths of pathway reconstruction algorithm outputs
         @param node_table: the gold standard nodes
@@ -233,13 +233,14 @@ def nodes_visualize_precision_and_recall_plot(pr_df: pd.DataFrame, output_file:
         pr_df.drop(columns=['Algorithm'], inplace=True)
         pr_df.to_csv(output_file, sep='\t', index=False)
 
+    @staticmethod
     def edge_precision_and_recall(file_paths: Iterable[Union[str, PathLike]], mixed_edge_table: pd.DataFrame, directed_edge_table: pd.DataFrame, undirected_edge_table: pd.DataFrame) -> pd.DataFrame:
         """
         Computes edge-level precision and recall for each pathway reconstruction output file against three edge gold standard tables.
 
         This function takes a list of file paths corresponding to pathway reconstruction algorithm outputs,
         each formatted as a tab-separated file with columns 'Node1', 'Node2', 'Rank', and 'Direction'.
-        It compares the set of predicted edges to the three provided gold standard edge tables and computes a precision and recall per file.
+        It compares the set of predicted edges to the three provided gold standard edge tables and computes precision and recall per file.
 
         @param file_paths: list of file paths of pathway reconstruction algorithm outputs
         @param mixed_edge_table: the gold standard edges that includes directed and undirected edges
@@ -650,26 +651,4 @@ def precision_recall_curve_node_ensemble(node_ensembles: dict, node_table: pd.Da
         complete_df.loc[not_last_rows, ['Average_Precision', 'Baseline']] = None
         complete_df.to_csv(output_file, index=False, sep='\t')
 
-    @staticmethod
-    def edge_dummy_function(mixed_edge_table: pd.DataFrame, undirected_edge_table: pd.DataFrame, directed_edge_table: pd.DataFrame, dummy_file: str):
-        """
-        Temporary function to test edge file implementation.
-        Will be removed from SPRAS's evaluation code in the future.
-
-        Takes in the different edge table versions (mixed, fully directed, fully undirected)
-        for a specific edge gold standard dataset and writes them to a file.
-
-        @param mixed_edge_table: Edge gold standard treated as mixed directionality.
-        @param undirected_edge_table: Edge gold standard treated as fully undirected.
-        @param directed_edge_table: Edge gold standard treated as fully directed.
-        @param dummy_file: Filename to save the edge tables.
-        """
-        with open(dummy_file, "w") as f:
-            f.write("Mixed Edge Table\n")
-            mixed_edge_table.to_csv(f, index=False)
-            f.write("\n\nUndirected Edge Table\n")
-            undirected_edge_table.to_csv(f, index=False)
-            f.write("\n\nDirected Edge Table\n")
-            directed_edge_table.to_csv(f, index=False)
-
 

From f19272377c8ac3bc15b0976fb2aa1c3d4122a7e8 Mon Sep 17 00:00:00 2001
From: Neha Talluri <78840540+ntalluri@users.noreply.github.com>
Date: Fri, 7 Nov 2025 16:19:27 -0600
Subject: [PATCH 09/10] Update spras/evaluation.py

---
 spras/evaluation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/spras/evaluation.py b/spras/evaluation.py
index 928be684c..04a3724e9 100644
--- a/spras/evaluation.py
+++ b/spras/evaluation.py
@@ -317,7 +317,7 @@ def edges_visualize_precision_and_recall_plot(pr_df: pd.DataFrame, output_file:
             )
 
         pr_df.sort_values(by=['Algorithm', 'Gold_Standard_Type', 'Recall', 'Pathway'], axis=0, ascending=True, inplace=True)
-        # TODO: fix the layout of the output png
+      
 
         gs_types = pr_df["Gold_Standard_Type"].unique().tolist()
         fig, axes = plt.subplots(1, len(gs_types), figsize=(6 * len(gs_types), 5))

From e3c69d7675840639d8dd9ec3431ecb8761f0a539 Mon Sep 17 00:00:00 2001
From: "Tristan F." <pub.tristanf@gmail.com>
Date: Thu, 4 Dec 2025 23:09:16 -0800
Subject: [PATCH 10/10] style: fmt

---
 spras/evaluation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/spras/evaluation.py b/spras/evaluation.py
index 04a3724e9..27eb419a0 100644
--- a/spras/evaluation.py
+++ b/spras/evaluation.py
@@ -317,7 +317,7 @@ def edges_visualize_precision_and_recall_plot(pr_df: pd.DataFrame, output_file:
             )
 
         pr_df.sort_values(by=['Algorithm', 'Gold_Standard_Type', 'Recall', 'Pathway'], axis=0, ascending=True, inplace=True)
-      
+
 
         gs_types = pr_df["Gold_Standard_Type"].unique().tolist()
         fig, axes = plt.subplots(1, len(gs_types), figsize=(6 * len(gs_types), 5))