diff --git a/Snakefile b/Snakefile index 1cc6ffd3c..2cb91b023 100644 --- a/Snakefile +++ b/Snakefile @@ -5,6 +5,7 @@ import yaml from spras.dataset import Dataset from spras.evaluation import Evaluation from spras.analysis import ml, summary, cytoscape +from spras.attribution import attribute_algorithms import spras.config.config as _config # Snakemake updated the behavior in the 6.5.0 release https://github.com/snakemake/snakemake/pull/1037 @@ -126,6 +127,10 @@ def make_final_input(wildcards): # Since (formatted) pathway files are interesting to the user, we preserve them. final_input.extend(expand('{out_dir}{sep}{dataset}-{algorithm_params}{sep}pathway.txt', out_dir=out_dir, sep=SEP, dataset=dataset_labels, algorithm_params=algorithms_with_params)) + if _config.config.analysis_include_attribution: + final_input.extend(expand('{out_dir}{sep}attribution/{algorithm}.bib', out_dir=out_dir, sep=SEP, algorithm=algorithms)) + final_input.extend(expand('{out_dir}{sep}attribution/all.bib', out_dir=out_dir, sep=SEP, )) + # Create log files for the parameters and datasets final_input.extend(expand('{out_dir}{sep}logs{sep}parameters-{algorithm_params}.yaml', out_dir=out_dir, sep=SEP, algorithm_params=algorithms_with_params)) final_input.extend(expand('{out_dir}{sep}logs{sep}datasets-{dataset}.yaml', out_dir=out_dir, sep=SEP, dataset=dataset_labels)) @@ -407,7 +412,7 @@ rule ensemble_per_algo: # Calculated Jaccard similarity between output pathways for each dataset per algorithm rule jaccard_similarity_per_algo: input: - pathways = collect_pathways_per_algo + pathways = collect_pathways_per_algo output: jaccard_similarity_matrix = SEP.join([out_dir, '{dataset}-ml', '{algorithm}-jaccard-matrix.txt']), jaccard_similarity_heatmap = SEP.join([out_dir, '{dataset}-ml', '{algorithm}-jaccard-heatmap.png']) @@ -567,6 +572,13 @@ rule evaluation_edge_dummy: directed_edge_table = Evaluation.from_file(input.edge_gold_standard_file).directed_edge_table Evaluation.edge_dummy_function(mixed_edge_table, undirected_edge_table, directed_edge_table, output.dummy_file) +rule attribution: + output: + attribution_all = SEP.join([out_dir, 'attribution', 'all.bib']), + attribution_algorithms = expand('{out_dir}{sep}attribution{sep}{algorithms}.bib', out_dir=out_dir, sep=SEP, algorithms=algorithms), + run: + attribute_algorithms(output.attribution_all, output.attribution_algorithms) + # Remove the output directory rule clean: shell: f'rm -rf {out_dir}' diff --git a/config/config.yaml b/config/config.yaml index 8bd29e70a..d452cfbf9 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -223,3 +223,7 @@ analysis: # adds evaluation per algorithm per dataset-goldstandard pair # evaluation per algorithm will not run unless ml include and ml aggregate_per_algorithm are set to true aggregate_per_algorithm: true + attribution: + # Include generated bibtex citations at OUT_DIR/attribution/*.bib, for every included algorithm, + # including an aggregated attribution/all.bib + include: true diff --git a/docs/contributing/index.rst b/docs/contributing/index.rst index 600d05a02..87dba214d 100644 --- a/docs/contributing/index.rst +++ b/docs/contributing/index.rst @@ -360,6 +360,10 @@ Modify parse outputs: ``test/parse-outputs/test_parse_outputs.py``, with any parameters it needs. +Finally, modify ``test/attribution`` to have an empty +``localneighborhood.bib`` file, indicating that +LocalNeighborhood cites nothing else. + Step 6: Update documentation ---------------------------- diff --git a/spras/attribution.py b/spras/attribution.py new file mode 100644 index 000000000..ce13a0e05 --- /dev/null +++ b/spras/attribution.py @@ -0,0 +1,39 @@ +import urllib.parse +from pathlib import Path + +import requests + +from spras.runner import algorithms + +DOI_BASE = "https://citation.doi.org/format?style=bibtex&lang=en-US&doi=" + +def format_request(doi: str) -> str: + return DOI_BASE + urllib.parse.quote(doi) + +def get_bibtex(doi: str) -> str: + response = requests.get(format_request(doi)) + + return response.text.strip() + +def attribute_algorithms(all_file: str, alg_files: list[str]): + """ + Attributes all algorithms specified by alg_files, aggregating them in + all_file. + """ + algorithm_name_files = [(Path(file).stem, file) for file in alg_files] + + algorithm_citations = [ + (file, [get_bibtex(doi) for doi in algorithms[name].dois]) for (name, file) in algorithm_name_files + ] + + for alg_output, alg_citations in algorithm_citations: + Path(alg_output).parent.mkdir(parents=True, exist_ok=True) + with open(alg_output, '+w') as handle: + for citation in alg_citations: + handle.write(citation + '\n') + + Path(all_file).parent.mkdir(parents=True, exist_ok=True) + with open(all_file, '+w') as handle: + for _, alg_citations in algorithm_citations: + for citation in alg_citations: + handle.write(citation + '\n') diff --git a/spras/config/config.py b/spras/config/config.py index 346682f53..98049b9ec 100644 --- a/spras/config/config.py +++ b/spras/config/config.py @@ -104,6 +104,8 @@ def __init__(self, raw_config: dict[str, Any]): self.analysis_include_ml = None # A Boolean specifying whether to run the Evaluation analysis self.analysis_include_evaluation = None + # A Boolean specifying whether to run the attribution postprocessing + self.analysis_include_attribution = None # A Boolean specifying whether to run the ML per algorithm analysis self.analysis_include_ml_aggregate_algo = None # A Boolean specifying whether to run the evaluation per algorithm analysis @@ -249,6 +251,7 @@ def process_analysis(self, raw_config: RawConfig): self.analysis_include_cytoscape = raw_config.analysis.cytoscape.include self.analysis_include_ml = raw_config.analysis.ml.include self.analysis_include_evaluation = raw_config.analysis.evaluation.include + self.analysis_include_attribution = raw_config.analysis.attribution.include # Only run ML aggregate per algorithm if analysis include ML is set to True if self.ml_params.aggregate_per_algorithm and self.analysis_include_ml: diff --git a/spras/config/schema.py b/spras/config/schema.py index f99bbe2d7..3d508ff1f 100644 --- a/spras/config/schema.py +++ b/spras/config/schema.py @@ -65,11 +65,17 @@ class EvaluationAnalysis(BaseModel): model_config = ConfigDict(extra='forbid') +class Attribution(BaseModel): + include: bool + + model_config = ConfigDict(extra='forbid') + class Analysis(BaseModel): summary: SummaryAnalysis = SummaryAnalysis(include=False) cytoscape: CytoscapeAnalysis = CytoscapeAnalysis(include=False) ml: MlAnalysis = MlAnalysis(include=False) evaluation: EvaluationAnalysis = EvaluationAnalysis(include=False) + attribution: Attribution = Attribution(include=False) model_config = ConfigDict(extra='forbid') diff --git a/test/analysis/input/config.yaml b/test/analysis/input/config.yaml index abde6f979..7986ace24 100644 --- a/test/analysis/input/config.yaml +++ b/test/analysis/input/config.yaml @@ -128,3 +128,5 @@ analysis: metric: 'euclidean' evaluation: include: false + attribution: + include: false diff --git a/test/analysis/input/egfr.yaml b/test/analysis/input/egfr.yaml index da4560df9..eac858c40 100644 --- a/test/analysis/input/egfr.yaml +++ b/test/analysis/input/egfr.yaml @@ -100,3 +100,5 @@ analysis: include: false evaluation: include: false + attribution: + include: false diff --git a/test/attribution/expected/all.bib b/test/attribution/expected/all.bib new file mode 100644 index 000000000..014645280 --- /dev/null +++ b/test/attribution/expected/all.bib @@ -0,0 +1,9 @@ +@article{Supper_Spangenberg_Planatscher_Dräger_Schröder_Zell_2009, title={BowTieBuilder: modeling signal transduction pathways}, volume={3}, url={http://dx.doi.org/10.1186/1752-0509-3-67}, DOI={10.1186/1752-0509-3-67}, number={1}, journal={BMC Systems Biology}, publisher={Springer Science and Business Media LLC}, author={Supper, Jochen and Spangenberg, Lucía and Planatscher, Hannes and Dräger, Andreas and Schröder, Adrian and Zell, Andreas}, year={2009}, month=june, language={en} } +@article{Levi_Elkon_Shamir_2021, title={DOMINO: a network‐based active module identification algorithm with reduced rate of false calls}, volume={17}, url={http://dx.doi.org/10.15252/msb.20209593}, DOI={10.15252/msb.20209593}, number={1}, journal={Molecular Systems Biology}, publisher={Springer Science and Business Media LLC}, author={Levi, Hagai and Elkon, Ran and Shamir, Ron}, year={2021}, month=jan, language={en} } +@article{Gitter_Klein-Seetharaman_Gupta_Bar-Joseph_2010, title={Discovering pathways by orienting edges in protein interaction networks}, volume={39}, url={http://dx.doi.org/10.1093/nar/gkq1207}, DOI={10.1093/nar/gkq1207}, number={4}, journal={Nucleic Acids Research}, publisher={Oxford University Press (OUP)}, author={Gitter, Anthony and Klein-Seetharaman, Judith and Gupta, Anupam and Bar-Joseph, Ziv}, year={2010}, month=nov, pages={e22–e22}, language={en} } +@article{Yeger-Lotem_Riva_Su_Gitler_Cashikar_King_Auluck_Geddie_Valastyan_Karger_et al._2009, title={Bridging high-throughput genetic and transcriptional data reveals cellular responses to alpha-synuclein toxicity}, volume={41}, url={http://dx.doi.org/10.1038/ng.337}, DOI={10.1038/ng.337}, number={3}, journal={Nature Genetics}, publisher={Springer Science and Business Media LLC}, author={Yeger-Lotem, Esti and Riva, Laura and Su, Linhui Julie and Gitler, Aaron D and Cashikar, Anil G and King, Oliver D and Auluck, Pavan K and Geddie, Melissa L and Valastyan, Julie S and Karger, David R and Lindquist, Susan and Fraenkel, Ernest}, year={2009}, month=feb, pages={316–323}, language={en} } +@article{Tuncbag_Gosline_Kedaigle_Soltis_Gitter_Fraenkel_2016, title={Network-Based Interpretation of Diverse High-Throughput Datasets through the Omics Integrator Software Package}, volume={12}, url={http://dx.doi.org/10.1371/journal.pcbi.1004879}, DOI={10.1371/journal.pcbi.1004879}, number={4}, journal={PLOS Computational Biology}, publisher={Public Library of Science (PLoS)}, author={Tuncbag, Nurcan and Gosline, Sara J. C. and Kedaigle, Amanda and Soltis, Anthony R. and Gitter, Anthony and Fraenkel, Ernest}, editor={Prlic, Andreas}, year={2016}, month=apr, pages={e1004879}, language={en} } +@article{Tuncbag_Gosline_Kedaigle_Soltis_Gitter_Fraenkel_2016, title={Network-Based Interpretation of Diverse High-Throughput Datasets through the Omics Integrator Software Package}, volume={12}, url={http://dx.doi.org/10.1371/journal.pcbi.1004879}, DOI={10.1371/journal.pcbi.1004879}, number={4}, journal={PLOS Computational Biology}, publisher={Public Library of Science (PLoS)}, author={Tuncbag, Nurcan and Gosline, Sara J. C. and Kedaigle, Amanda and Soltis, Anthony R. and Gitter, Anthony and Fraenkel, Ernest}, editor={Prlic, Andreas}, year={2016}, month=apr, pages={e1004879}, language={en} } +@article{Ritz_Poirel_Tegge_Sharp_Simmons_Powell_Kale_Murali_2016, title={Pathways on demand: automated reconstruction of human signaling networks}, volume={2}, url={http://dx.doi.org/10.1038/npjsba.2016.2}, DOI={10.1038/npjsba.2016.2}, abstractNote={AbstractSignaling pathways are a cornerstone of systems biology. Several databases store high-quality representations of these pathways that are amenable for automated analyses. Despite painstaking and manual curation, these databases remain incomplete. We present PATHLINKER, a new computational method to reconstruct the interactions in a signaling pathway of interest. PATHLINKER efficiently computes multiple short paths from the receptors to transcriptional regulators (TRs) in a pathway within a background protein interaction network. We use PATHLINKER to accurately reconstruct a comprehensive set of signaling pathways from the NetPath and KEGG databases. We show that PATHLINKER has higher precision and recall than several state-of-the-art algorithms, while also ensuring that the resulting network connects receptor proteins to TRs. PATHLINKER’s reconstruction of the Wnt pathway identified CFTR, an ABC class chloride ion channel transporter, as a novel intermediary that facilitates the signaling of Ryk to Dab2, which are known components of Wnt/β-catenin signaling. In HEK293 cells, we show that the Ryk–CFTR–Dab2 path is a novel amplifier of β-catenin signaling specifically in response to Wnt 1, 2, 3, and 3a of the 11 Wnts tested. PATHLINKER captures the structure of signaling pathways as represented in pathway databases better than existing methods. PATHLINKER’s success in reconstructing pathways from NetPath and KEGG databases point to its applicability for complementing manual curation of these databases. PATHLINKER may serve as a promising approach for prioritizing proteins and interactions for experimental study, as illustrated by its discovery of a novel pathway in Wnt/β-catenin signaling. Our supplementary website at http://bioinformatics.cs.vt.edu/~murali/supplements/2016-sys-bio-applications-pathlinker/ provides links to the PATHLINKER software, input datasets, PATHLINKER reconstructions of NetPath pathways, and links to interactive visualizations of these reconstructions on GraphSpace.}, number={1}, journal={npj Systems Biology and Applications}, publisher={Springer Science and Business Media LLC}, author={Ritz, Anna and Poirel, Christopher L and Tegge, Allison N and Sharp, Nicholas and Simmons, Kelsey and Powell, Allison and Kale, Shiv D and Murali, TM}, year={2016}, month=mar, language={en} } +@article{Poirel_Rodrigues_Chen_Tyson_Murali_2013, title={Top-Down Network Analysis to Drive Bottom-Up Modeling of Physiological Processes}, volume={20}, url={http://dx.doi.org/10.1089/cmb.2012.0274}, DOI={10.1089/cmb.2012.0274}, number={5}, journal={Journal of Computational Biology}, publisher={Mary Ann Liebert Inc}, author={Poirel, Christopher L. and Rodrigues, Richard R. and Chen, Katherine C. and Tyson, John J. and Murali, T.M.}, year={2013}, month=may, pages={409–418}, language={en} } +@article{Yeger-Lotem_Riva_Su_Gitler_Cashikar_King_Auluck_Geddie_Valastyan_Karger_et al._2009, title={Bridging high-throughput genetic and transcriptional data reveals cellular responses to alpha-synuclein toxicity}, volume={41}, url={http://dx.doi.org/10.1038/ng.337}, DOI={10.1038/ng.337}, number={3}, journal={Nature Genetics}, publisher={Springer Science and Business Media LLC}, author={Yeger-Lotem, Esti and Riva, Laura and Su, Linhui Julie and Gitler, Aaron D and Cashikar, Anil G and King, Oliver D and Auluck, Pavan K and Geddie, Melissa L and Valastyan, Julie S and Karger, David R and Lindquist, Susan and Fraenkel, Ernest}, year={2009}, month=feb, pages={316–323}, language={en} } diff --git a/test/attribution/expected/allpairs.bib b/test/attribution/expected/allpairs.bib new file mode 100644 index 000000000..e69de29bb diff --git a/test/attribution/expected/bowtiebuilder.bib b/test/attribution/expected/bowtiebuilder.bib new file mode 100644 index 000000000..1960e0eb3 --- /dev/null +++ b/test/attribution/expected/bowtiebuilder.bib @@ -0,0 +1 @@ +@article{Supper_Spangenberg_Planatscher_Dräger_Schröder_Zell_2009, title={BowTieBuilder: modeling signal transduction pathways}, volume={3}, url={http://dx.doi.org/10.1186/1752-0509-3-67}, DOI={10.1186/1752-0509-3-67}, number={1}, journal={BMC Systems Biology}, publisher={Springer Science and Business Media LLC}, author={Supper, Jochen and Spangenberg, Lucía and Planatscher, Hannes and Dräger, Andreas and Schröder, Adrian and Zell, Andreas}, year={2009}, month=june, language={en} } diff --git a/test/attribution/expected/domino.bib b/test/attribution/expected/domino.bib new file mode 100644 index 000000000..26b6824fa --- /dev/null +++ b/test/attribution/expected/domino.bib @@ -0,0 +1 @@ +@article{Levi_Elkon_Shamir_2021, title={DOMINO: a network‐based active module identification algorithm with reduced rate of false calls}, volume={17}, url={http://dx.doi.org/10.15252/msb.20209593}, DOI={10.15252/msb.20209593}, number={1}, journal={Molecular Systems Biology}, publisher={Springer Science and Business Media LLC}, author={Levi, Hagai and Elkon, Ran and Shamir, Ron}, year={2021}, month=jan, language={en} } diff --git a/test/attribution/expected/meo.bib b/test/attribution/expected/meo.bib new file mode 100644 index 000000000..716ec5f11 --- /dev/null +++ b/test/attribution/expected/meo.bib @@ -0,0 +1 @@ +@article{Gitter_Klein-Seetharaman_Gupta_Bar-Joseph_2010, title={Discovering pathways by orienting edges in protein interaction networks}, volume={39}, url={http://dx.doi.org/10.1093/nar/gkq1207}, DOI={10.1093/nar/gkq1207}, number={4}, journal={Nucleic Acids Research}, publisher={Oxford University Press (OUP)}, author={Gitter, Anthony and Klein-Seetharaman, Judith and Gupta, Anupam and Bar-Joseph, Ziv}, year={2010}, month=nov, pages={e22–e22}, language={en} } diff --git a/test/attribution/expected/mincostflow.bib b/test/attribution/expected/mincostflow.bib new file mode 100644 index 000000000..e6a9ec742 --- /dev/null +++ b/test/attribution/expected/mincostflow.bib @@ -0,0 +1 @@ +@article{Yeger-Lotem_Riva_Su_Gitler_Cashikar_King_Auluck_Geddie_Valastyan_Karger_et al._2009, title={Bridging high-throughput genetic and transcriptional data reveals cellular responses to alpha-synuclein toxicity}, volume={41}, url={http://dx.doi.org/10.1038/ng.337}, DOI={10.1038/ng.337}, number={3}, journal={Nature Genetics}, publisher={Springer Science and Business Media LLC}, author={Yeger-Lotem, Esti and Riva, Laura and Su, Linhui Julie and Gitler, Aaron D and Cashikar, Anil G and King, Oliver D and Auluck, Pavan K and Geddie, Melissa L and Valastyan, Julie S and Karger, David R and Lindquist, Susan and Fraenkel, Ernest}, year={2009}, month=feb, pages={316–323}, language={en} } diff --git a/test/attribution/expected/omicsintegrator1.bib b/test/attribution/expected/omicsintegrator1.bib new file mode 100644 index 000000000..f9dcf6259 --- /dev/null +++ b/test/attribution/expected/omicsintegrator1.bib @@ -0,0 +1 @@ +@article{Tuncbag_Gosline_Kedaigle_Soltis_Gitter_Fraenkel_2016, title={Network-Based Interpretation of Diverse High-Throughput Datasets through the Omics Integrator Software Package}, volume={12}, url={http://dx.doi.org/10.1371/journal.pcbi.1004879}, DOI={10.1371/journal.pcbi.1004879}, number={4}, journal={PLOS Computational Biology}, publisher={Public Library of Science (PLoS)}, author={Tuncbag, Nurcan and Gosline, Sara J. C. and Kedaigle, Amanda and Soltis, Anthony R. and Gitter, Anthony and Fraenkel, Ernest}, editor={Prlic, Andreas}, year={2016}, month=apr, pages={e1004879}, language={en} } diff --git a/test/attribution/expected/omicsintegrator2.bib b/test/attribution/expected/omicsintegrator2.bib new file mode 100644 index 000000000..f9dcf6259 --- /dev/null +++ b/test/attribution/expected/omicsintegrator2.bib @@ -0,0 +1 @@ +@article{Tuncbag_Gosline_Kedaigle_Soltis_Gitter_Fraenkel_2016, title={Network-Based Interpretation of Diverse High-Throughput Datasets through the Omics Integrator Software Package}, volume={12}, url={http://dx.doi.org/10.1371/journal.pcbi.1004879}, DOI={10.1371/journal.pcbi.1004879}, number={4}, journal={PLOS Computational Biology}, publisher={Public Library of Science (PLoS)}, author={Tuncbag, Nurcan and Gosline, Sara J. C. and Kedaigle, Amanda and Soltis, Anthony R. and Gitter, Anthony and Fraenkel, Ernest}, editor={Prlic, Andreas}, year={2016}, month=apr, pages={e1004879}, language={en} } diff --git a/test/attribution/expected/pathlinker.bib b/test/attribution/expected/pathlinker.bib new file mode 100644 index 000000000..c56e4d445 --- /dev/null +++ b/test/attribution/expected/pathlinker.bib @@ -0,0 +1,2 @@ +@article{Ritz_Poirel_Tegge_Sharp_Simmons_Powell_Kale_Murali_2016, title={Pathways on demand: automated reconstruction of human signaling networks}, volume={2}, url={http://dx.doi.org/10.1038/npjsba.2016.2}, DOI={10.1038/npjsba.2016.2}, abstractNote={AbstractSignaling pathways are a cornerstone of systems biology. Several databases store high-quality representations of these pathways that are amenable for automated analyses. Despite painstaking and manual curation, these databases remain incomplete. We present PATHLINKER, a new computational method to reconstruct the interactions in a signaling pathway of interest. PATHLINKER efficiently computes multiple short paths from the receptors to transcriptional regulators (TRs) in a pathway within a background protein interaction network. We use PATHLINKER to accurately reconstruct a comprehensive set of signaling pathways from the NetPath and KEGG databases. We show that PATHLINKER has higher precision and recall than several state-of-the-art algorithms, while also ensuring that the resulting network connects receptor proteins to TRs. PATHLINKER’s reconstruction of the Wnt pathway identified CFTR, an ABC class chloride ion channel transporter, as a novel intermediary that facilitates the signaling of Ryk to Dab2, which are known components of Wnt/β-catenin signaling. In HEK293 cells, we show that the Ryk–CFTR–Dab2 path is a novel amplifier of β-catenin signaling specifically in response to Wnt 1, 2, 3, and 3a of the 11 Wnts tested. PATHLINKER captures the structure of signaling pathways as represented in pathway databases better than existing methods. PATHLINKER’s success in reconstructing pathways from NetPath and KEGG databases point to its applicability for complementing manual curation of these databases. PATHLINKER may serve as a promising approach for prioritizing proteins and interactions for experimental study, as illustrated by its discovery of a novel pathway in Wnt/β-catenin signaling. Our supplementary website at http://bioinformatics.cs.vt.edu/~murali/supplements/2016-sys-bio-applications-pathlinker/ provides links to the PATHLINKER software, input datasets, PATHLINKER reconstructions of NetPath pathways, and links to interactive visualizations of these reconstructions on GraphSpace.}, number={1}, journal={npj Systems Biology and Applications}, publisher={Springer Science and Business Media LLC}, author={Ritz, Anna and Poirel, Christopher L and Tegge, Allison N and Sharp, Nicholas and Simmons, Kelsey and Powell, Allison and Kale, Shiv D and Murali, TM}, year={2016}, month=mar, language={en} } +@article{Poirel_Rodrigues_Chen_Tyson_Murali_2013, title={Top-Down Network Analysis to Drive Bottom-Up Modeling of Physiological Processes}, volume={20}, url={http://dx.doi.org/10.1089/cmb.2012.0274}, DOI={10.1089/cmb.2012.0274}, number={5}, journal={Journal of Computational Biology}, publisher={Mary Ann Liebert Inc}, author={Poirel, Christopher L. and Rodrigues, Richard R. and Chen, Katherine C. and Tyson, John J. and Murali, T.M.}, year={2013}, month=may, pages={409–418}, language={en} } diff --git a/test/attribution/expected/responsenet.bib b/test/attribution/expected/responsenet.bib new file mode 100644 index 000000000..e6a9ec742 --- /dev/null +++ b/test/attribution/expected/responsenet.bib @@ -0,0 +1 @@ +@article{Yeger-Lotem_Riva_Su_Gitler_Cashikar_King_Auluck_Geddie_Valastyan_Karger_et al._2009, title={Bridging high-throughput genetic and transcriptional data reveals cellular responses to alpha-synuclein toxicity}, volume={41}, url={http://dx.doi.org/10.1038/ng.337}, DOI={10.1038/ng.337}, number={3}, journal={Nature Genetics}, publisher={Springer Science and Business Media LLC}, author={Yeger-Lotem, Esti and Riva, Laura and Su, Linhui Julie and Gitler, Aaron D and Cashikar, Anil G and King, Oliver D and Auluck, Pavan K and Geddie, Melissa L and Valastyan, Julie S and Karger, David R and Lindquist, Susan and Fraenkel, Ernest}, year={2009}, month=feb, pages={316–323}, language={en} } diff --git a/test/attribution/expected/rwr.bib b/test/attribution/expected/rwr.bib new file mode 100644 index 000000000..e69de29bb diff --git a/test/attribution/expected/strwr.bib b/test/attribution/expected/strwr.bib new file mode 100644 index 000000000..e69de29bb diff --git a/test/attribution/test_attribution.py b/test/attribution/test_attribution.py new file mode 100644 index 000000000..9441efb30 --- /dev/null +++ b/test/attribution/test_attribution.py @@ -0,0 +1,25 @@ +from filecmp import cmp +from pathlib import Path +from shutil import rmtree + +from spras.attribution import attribute_algorithms +from spras.runner import algorithms + +OUT_DIR = Path('test', 'attribution', 'output') +EXPECTED_DIR = Path('test', 'attribution', 'expected') + +class TestAttribution: + def test_attribute_algorithms(self): + if OUT_DIR.exists(): + rmtree(str(OUT_DIR)) + + # NOTE: This also serves as a dual test, confirming that the specified + # DOIs in every `PRA#dois` are all valid + attribution_files = [str(OUT_DIR / f"{name}.bib") for name in algorithms.keys()] + attribution_all = str(OUT_DIR / "all.bib") + + attribute_algorithms(attribution_all, attribution_files) + + for file in attribution_files + [attribution_all]: + assert cmp(file, EXPECTED_DIR / Path(file).name, shallow=False), \ + f"Algorithm attributions for {Path(file).stem} don't line up!" diff --git a/test/test_config.py b/test/test_config.py index f5ec454b7..f3d10b21f 100644 --- a/test/test_config.py +++ b/test/test_config.py @@ -114,6 +114,9 @@ def get_test_config(): "include": False, "aggregate_per_algorithm": False }, + "attribution": { + "include": False + }, }, }