Skip to content

Commit d60b779

Browse files
committed
Added jaccard index heatmap for edges
1 parent 4e42295 commit d60b779

File tree

4 files changed

+67
-5
lines changed

4 files changed

+67
-5
lines changed

Snakefile

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -395,6 +395,8 @@ rule evaluation:
395395
pr_edge_png = SEP.join([out_dir, '{dataset_gold_standard_pairs}-eval', 'precision-recall-per-pathway_edge.png']),
396396
pr_curve_png = SEP.join([out_dir, '{dataset_gold_standard_pairs}-eval', 'precision-recall-curve-ensemble-nodes.png']),
397397
pca_chosen_pr_file = SEP.join([out_dir, '{dataset_gold_standard_pairs}-eval', "precision-recall-pca-chosen-pathway.txt"]),
398+
heatmap_edge_file = SEP.join([out_dir, '{dataset_gold_standard_pairs}-eval', "jaccard_edge_heatmap.png"]),
399+
398400
run:
399401
node_table = Evaluation.from_file(input.gold_standard_file).node_table
400402
edge_table = Evaluation.from_file(input.gold_standard_file).edge_table
@@ -404,6 +406,7 @@ rule evaluation:
404406
Evaluation.precision_recall_curve_node_ensemble(node_ensemble, node_table, output.pr_curve_png)
405407
pca_chosen_pathway = Evaluation.pca_chosen_pathway(input.pca_coordinates_file, out_dir)
406408
Evaluation.precision_and_recall_node(pca_chosen_pathway, node_table, algorithms, output.pca_chosen_pr_file)
409+
Evaluation.jaccard_edge_heatmap(input.pathways, edge_table, algorithms, output.pr_edge_file, output.heatmap_edge_file)
407410

408411

409412
# Returns all pathways for a specific algorithm and dataset

config/config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,7 @@ gold_standards:
128128
dataset_labels: ["data0"]
129129
-
130130
label: gs1
131-
node_files: ["gs_nodes1.txt"]
131+
node_files: ["gs_nodes1.txt"] # why list?
132132
data_dir: "input"
133133
dataset_labels: ["data1", "data0"]
134134

config/synthetic.yaml

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -51,15 +51,15 @@ algorithms:
5151

5252
- name: "omicsintegrator1"
5353
params:
54-
include: true
54+
include: false
5555
run1:
5656
b: [5, 6]
5757
w: np.linspace(0,5,2)
5858
d: [10]
5959

6060
- name: "omicsintegrator2"
6161
params:
62-
include: true
62+
include: false
6363
run1:
6464
b: [4]
6565
g: [0]
@@ -77,7 +77,7 @@ algorithms:
7777

7878
- name: "mincostflow"
7979
params:
80-
include: true
80+
include: false
8181
run1:
8282
flow: [1] # The flow must be an int
8383
capacity: [1]
@@ -108,14 +108,24 @@ datasets:
108108
other_files: []
109109
# Relative path from the spras directory
110110
data_dir: "input"
111+
-
112+
# Labels can only contain letters, numbers, or underscores
113+
label: data3
114+
node_files: ["node-prizes-synthetic.txt", "sources.txt", "targets.txt"]
115+
# DataLoader.py can currently only load a single edge file, which is the primary network
116+
edge_files: ["network-synthetic.txt"]
117+
# Placeholder
118+
other_files: []
119+
# Relative path from the spras directory
120+
data_dir: "input"
111121

112122
gold_standards:
113123
-
114124
label: gs2
115125
node_files: ["gs_nodes2.txt"]
116126
edge_files: ["gs_edges.txt"]
117127
data_dir: "input"
118-
dataset_labels: ["data2"]
128+
dataset_labels: ["data2", "data3"]
119129

120130
# If we want to reconstruct then we should set run to true.
121131
# TODO: if include is true above but run is false here, algs are not run.

spras/evaluation.py

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
precision_score,
1313
recall_score,
1414
)
15+
import seaborn as sns
1516

1617

1718
class Evaluation:
@@ -211,6 +212,54 @@ def precision_and_recall_node(file_paths: Iterable[Path], node_table: pd.DataFra
211212
plt.savefig(output_png)
212213

213214

215+
@staticmethod
216+
def jaccard_edge_heatmap(file_paths: Iterable[Path], edge_table: pd.DataFrame, algorithms: list, output_file: str, output_png:str=None):
217+
"""
218+
Takes in file paths for a specific dataset and an associated gold standard edge table.
219+
Calculates precision and recall for each pathway file
220+
Returns output back to output_file
221+
@param file_paths: file paths of pathway reconstruction algorithm outputs
222+
@param edge_table: the gold standard edges
223+
@param algorithms: list of algorithms used in current run of SPRAS
224+
@param output_file: the filename to save the precision and recall of each pathway
225+
@param output_png (optional): the filename to plot the precision and recall of each pathway (not a PRC)
226+
"""
227+
print("jaccard_heatmap")
228+
229+
gs_edges = set()
230+
for row in edge_table.itertuples():
231+
gs_edges.add((row[1], row[2]))
232+
# calculate all the jaccard edge index for each method against the gs
233+
jaccard_edges_indices_list = []
234+
algs = []
235+
for file in file_paths:
236+
df = pd.read_table(file, sep="\t", header=0, usecols=["Node1", "Node2"])
237+
method_edges = set()
238+
for row in df.itertuples():
239+
method_edges.add((row[1], row[2]))
240+
edge_union = gs_edges | method_edges
241+
edge_intersection = gs_edges & method_edges
242+
jaccard_edge_index = len(edge_intersection) / len(edge_union)
243+
jaccard_edges_indices_list.append(float(jaccard_edge_index))
244+
algs.append(file.split("/")[1].split("-")[1])
245+
246+
jaccard_edges_indices = np.asanyarray([jaccard_edges_indices_list])
247+
248+
print(algs)
249+
250+
plt.figure(figsize=(10, 8))
251+
sns.heatmap(
252+
jaccard_edges_indices,
253+
annot=True,
254+
cmap="viridis",
255+
yticklabels=["Pathways"],
256+
xticklabels=algs,
257+
)
258+
plt.xlabel("Algorithms")
259+
plt.title("Jaccard Index Edge Heatmap")
260+
plt.savefig(output_png, format="png", dpi=300)
261+
262+
214263
def select_max_freq_and_node(row: pd.Series):
215264
"""
216265
Selects the node and frequency with the highest frequency value from two potential nodes in a row.

0 commit comments

Comments
 (0)