Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 49 additions & 19 deletions Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -106,23 +106,31 @@ def make_final_input(wildcards):
final_input.extend(expand('{out_dir}{sep}{dataset}-ml{sep}{algorithm}-jaccard-heatmap.png',out_dir=out_dir,sep=SEP,dataset=dataset_labels,algorithm=algorithms))

if _config.config.analysis_include_evaluation:
# node evaluation
final_input.extend(expand('{out_dir}{sep}{dataset_gold_standard_pair}-eval{sep}pr-per-pathway-nodes.txt',out_dir=out_dir,sep=SEP,dataset_gold_standard_pair=dataset_gold_standard_node_pairs,algorithm_params=algorithms_with_params))
final_input.extend(expand('{out_dir}{sep}{dataset_gold_standard_pair}-eval{sep}pr-per-pathway-nodes.png',out_dir=out_dir,sep=SEP,dataset_gold_standard_pair=dataset_gold_standard_node_pairs))
final_input.extend(expand('{out_dir}{sep}{dataset_gold_standard_pair}-eval{sep}pr-pca-chosen-pathway-nodes.txt',out_dir=out_dir,sep=SEP,dataset_gold_standard_pair=dataset_gold_standard_node_pairs))
final_input.extend(expand('{out_dir}{sep}{dataset_gold_standard_pair}-eval{sep}pr-pca-chosen-pathway-nodes.png',out_dir=out_dir,sep=SEP,dataset_gold_standard_pair=dataset_gold_standard_node_pairs))
final_input.extend(expand('{out_dir}{sep}{dataset_gold_standard_pair}-eval{sep}pr-curve-ensemble-nodes.png',out_dir=out_dir,sep=SEP,dataset_gold_standard_pair=dataset_gold_standard_node_pairs))
final_input.extend(expand('{out_dir}{sep}{dataset_gold_standard_pair}-eval{sep}pr-curve-ensemble-nodes.txt',out_dir=out_dir,sep=SEP,dataset_gold_standard_pair=dataset_gold_standard_node_pairs))
# dummy file
final_input.extend(expand('{out_dir}{sep}{dataset_gold_standard_pair}-eval{sep}dummy-edge.txt',out_dir=out_dir,sep=SEP,dataset_gold_standard_pair=dataset_gold_standard_edge_pairs))


# edge evaluation
final_input.extend(expand('{out_dir}{sep}{dataset_gold_standard_pair}-eval{sep}pr-pca-chosen-pathway-edges.txt',out_dir=out_dir,sep=SEP,dataset_gold_standard_pair=dataset_gold_standard_edge_pairs))
final_input.extend(expand('{out_dir}{sep}{dataset_gold_standard_pair}-eval{sep}pr-pca-chosen-pathway-edges.png',out_dir=out_dir,sep=SEP,dataset_gold_standard_pair=dataset_gold_standard_edge_pairs))

if _config.config.analysis_include_evaluation_aggregate_algo:
# node evaluation
final_input.extend(expand('{out_dir}{sep}{dataset_gold_standard_pair}-eval{sep}pr-per-pathway-for-{algorithm}-nodes.txt',out_dir=out_dir,sep=SEP,dataset_gold_standard_pair=dataset_gold_standard_node_pairs,algorithm=algorithms))
final_input.extend(expand('{out_dir}{sep}{dataset_gold_standard_pair}-eval{sep}pr-per-pathway-for-{algorithm}-nodes.png',out_dir=out_dir,sep=SEP,dataset_gold_standard_pair=dataset_gold_standard_node_pairs,algorithm=algorithms))
final_input.extend(expand('{out_dir}{sep}{dataset_gold_standard_pair}-eval{sep}pr-pca-chosen-pathway-per-algorithm-nodes.txt',out_dir=out_dir,sep=SEP,dataset_gold_standard_pair=dataset_gold_standard_node_pairs))
final_input.extend(expand('{out_dir}{sep}{dataset_gold_standard_pair}-eval{sep}pr-pca-chosen-pathway-per-algorithm-nodes.png',out_dir=out_dir,sep=SEP,dataset_gold_standard_pair=dataset_gold_standard_node_pairs))
final_input.extend(expand('{out_dir}{sep}{dataset_gold_standard_pair}-eval{sep}pr-curve-ensemble-nodes-per-algorithm-nodes.png',out_dir=out_dir,sep=SEP,dataset_gold_standard_pair=dataset_gold_standard_node_pairs))
final_input.extend(expand('{out_dir}{sep}{dataset_gold_standard_pair}-eval{sep}pr-curve-ensemble-nodes-per-algorithm-nodes.txt',out_dir=out_dir,sep=SEP,dataset_gold_standard_pair=dataset_gold_standard_node_pairs))

# edge evaluation
final_input.extend(expand('{out_dir}{sep}{dataset_gold_standard_pair}-eval{sep}pr-pca-chosen-pathway-per-algorithm-edges.txt',out_dir=out_dir,sep=SEP,dataset_gold_standard_pair=dataset_gold_standard_edge_pairs))
final_input.extend(expand('{out_dir}{sep}{dataset_gold_standard_pair}-eval{sep}pr-pca-chosen-pathway-per-algorithm-edges.png',out_dir=out_dir,sep=SEP,dataset_gold_standard_pair=dataset_gold_standard_edge_pairs))

# Since (formatted) pathway files are interesting to the user, we preserve them.
final_input.extend(expand('{out_dir}{sep}{dataset}-{algorithm_params}{sep}pathway.txt', out_dir=out_dir, sep=SEP, dataset=dataset_labels, algorithm_params=algorithms_with_params))

Expand Down Expand Up @@ -427,7 +435,6 @@ def get_dataset_label(wildcards):
dataset = parts[0]
return dataset


# Returns all pathways for a specific dataset
def collect_pathways_per_dataset(wildcards):
dataset_label = get_dataset_label(wildcards)
Expand Down Expand Up @@ -478,7 +485,7 @@ def collect_pca_coordinates_per_dataset(wildcards):

# Run PCA chosen to select the representative from all pathway outputs for a given dataset,
# then evaluate with precision and recall against the corresponding gold standard
rule evaluation_pca_chosen:
rule evaluation_pca_chosen_nodes:
input:
node_gold_standard_file = get_gold_standard_pickle_file,
pca_coordinates_file = collect_pca_coordinates_per_dataset,
Expand All @@ -492,27 +499,61 @@ rule evaluation_pca_chosen:
pr_df = Evaluation.node_precision_and_recall(pca_chosen_pathway, node_table)
Evaluation.precision_and_recall_pca_chosen_pathway(pr_df, output.node_pca_chosen_pr_file, output.node_pca_chosen_pr_png)

rule evaluation_pca_chosen_edges:
input:
edge_gold_standard_file = get_gold_standard_pickle_file,
pca_coordinates_file = collect_pca_coordinates_per_dataset,
pathway_summary_file = collect_summary_statistics_per_dataset
output:
edge_pca_chosen_pr_file = SEP.join([out_dir, '{dataset_gold_standard_pair}-eval', 'pr-pca-chosen-pathway-edges.txt']),
edge_pca_chosen_pr_png = SEP.join([out_dir, '{dataset_gold_standard_pair}-eval', 'pr-pca-chosen-pathway-edges.png']),
run:
mixed_edge_table = Evaluation.from_file(input.edge_gold_standard_file).mixed_edge_table
undirected_edge_table = Evaluation.from_file(input.edge_gold_standard_file).undirected_edge_table
directed_edge_table = Evaluation.from_file(input.edge_gold_standard_file).directed_edge_table
pca_chosen_pathway = Evaluation.pca_chosen_pathway(input.pca_coordinates_file, input.pathway_summary_file, out_dir)
pr_df = Evaluation.edge_precision_and_recall(pca_chosen_pathway, mixed_edge_table, directed_edge_table, undirected_edge_table)
Evaluation.precision_and_recall_pca_chosen_pathway(pr_df, output.edge_pca_chosen_pr_file, output.edge_pca_chosen_pr_png, edge_evaluation=True)

# Returns pca coordinates for a specific algorithm and dataset
def collect_pca_coordinates_per_algo_per_dataset(wildcards):
dataset_label = get_dataset_label(wildcards)
return expand('{out_dir}{sep}{dataset}-ml{sep}{algorithm}-pca-coordinates.txt', out_dir=out_dir, sep=SEP, dataset=dataset_label, algorithm=algorithms_mult_param_combos) #TODO we are using algos with mult param combos, what to do when empty?

# Run PCA chosen to select the representative pathway per algorithm pathway outputs for a given dataset,
# then evaluate with precision and recall against the corresponding gold standard
rule evaluation_per_algo_pca_chosen:
rule evaluation_per_algo_pca_chosen_nodes:
input:
node_gold_standard_file = get_gold_standard_pickle_file,
pca_coordinates_file = collect_pca_coordinates_per_algo_per_dataset,
pca_coordinates_files = collect_pca_coordinates_per_algo_per_dataset,
pathway_summary_file = collect_summary_statistics_per_dataset
output:
node_pca_chosen_pr_file = SEP.join([out_dir, '{dataset_gold_standard_pair}-eval', 'pr-pca-chosen-pathway-per-algorithm-nodes.txt']),
node_pca_chosen_pr_png = SEP.join([out_dir, '{dataset_gold_standard_pair}-eval', 'pr-pca-chosen-pathway-per-algorithm-nodes.png']),
run:
node_table = Evaluation.from_file(input.node_gold_standard_file).node_table
pca_chosen_pathways = Evaluation.pca_chosen_pathway(input.pca_coordinates_file, input.pathway_summary_file, out_dir)
pca_chosen_pathways = Evaluation.pca_chosen_pathway(input.pca_coordinates_files, input.pathway_summary_file, out_dir)
pr_df = Evaluation.node_precision_and_recall(pca_chosen_pathways, node_table)
Evaluation.precision_and_recall_pca_chosen_pathway(pr_df, output.node_pca_chosen_pr_file, output.node_pca_chosen_pr_png, include_aggregate_algo_eval)

rule evaluation_per_algo_pca_chosen_edges:
input:
edge_gold_standard_file = get_gold_standard_pickle_file,
pca_coordinates_files = collect_pca_coordinates_per_algo_per_dataset,
pathway_summary_file = collect_summary_statistics_per_dataset
output:
edge_pca_chosen_pr_file = SEP.join([out_dir, '{dataset_gold_standard_pair}-eval', 'pr-pca-chosen-pathway-per-algorithm-edges.txt']),
edge_pca_chosen_pr_png = SEP.join([out_dir, '{dataset_gold_standard_pair}-eval', 'pr-pca-chosen-pathway-per-algorithm-edges.png']),
run:
mixed_edge_table = Evaluation.from_file(input.edge_gold_standard_file).mixed_edge_table
undirected_edge_table = Evaluation.from_file(input.edge_gold_standard_file).undirected_edge_table
directed_edge_table = Evaluation.from_file(input.edge_gold_standard_file).directed_edge_table

pca_chosen_pathways = Evaluation.pca_chosen_pathway(input.pca_coordinates_files, input.pathway_summary_file, out_dir)
pr_df = Evaluation.edge_precision_and_recall(pca_chosen_pathways, mixed_edge_table, directed_edge_table, undirected_edge_table)

Evaluation.precision_and_recall_pca_chosen_pathway(pr_df, output.edge_pca_chosen_pr_file, output.edge_pca_chosen_pr_png, include_aggregate_algo_eval, edge_evaluation=True)

# Return the dataset pickle file for a specific dataset
def get_dataset_pickle_file(wildcards):
dataset_label = get_dataset_label(wildcards)
Expand Down Expand Up @@ -556,17 +597,6 @@ rule evaluation_per_algo_ensemble_pr_curve:
node_ensembles_dict = Evaluation.edge_frequency_node_ensemble(node_table, input.ensemble_files, input.dataset_file)
Evaluation.precision_recall_curve_node_ensemble(node_ensembles_dict, node_table, output.node_pr_curve_png, output.node_pr_curve_file, include_aggregate_algo_eval)

rule evaluation_edge_dummy:
input:
edge_gold_standard_file = get_gold_standard_pickle_file,
output:
dummy_file = SEP.join([out_dir, '{dataset_gold_standard_pair}-eval', 'dummy-edge.txt']),
run:
mixed_edge_table = Evaluation.from_file(input.edge_gold_standard_file).mixed_edge_table
undirected_edge_table = Evaluation.from_file(input.edge_gold_standard_file).undirected_edge_table
directed_edge_table = Evaluation.from_file(input.edge_gold_standard_file).directed_edge_table
Evaluation.edge_dummy_function(mixed_edge_table, undirected_edge_table, directed_edge_table, output.dummy_file)

# Remove the output directory
rule clean:
shell: f'rm -rf {out_dir}'
Loading
Loading