diff --git a/.github/workflows/build-containers.yml b/.github/workflows/build-containers.yml index 8ebc59435..afdca2850 100644 --- a/.github/workflows/build-containers.yml +++ b/.github/workflows/build-containers.yml @@ -43,6 +43,11 @@ jobs: with: path: docker-wrappers/DOMINO container: reedcompbio/domino + build-and-remove-btb: + uses: "./.github/workflows/build-and-remove-template.yml" + with: + path: docker-wrappers/BowTieBuilder + container: reedcompbio/bowtiebuilder build-and-remove-cytoscape: uses: "./.github/workflows/build-and-remove-template.yml" with: diff --git a/config/config.yaml b/config/config.yaml index e572cc050..e5f616a90 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -110,6 +110,10 @@ algorithms: alpha: [0.85] threshold: [100, 200] + - name: "bowtiebuilder" + params: + include: true + # Here we specify which pathways to run and other file location information. # DataLoader.py can currently only load a single dataset # Assume that if a dataset label does not change, the lists of associated input files do not change diff --git a/docker-wrappers/BowTieBuilder/Dockerfile b/docker-wrappers/BowTieBuilder/Dockerfile new file mode 100644 index 000000000..750832c0d --- /dev/null +++ b/docker-wrappers/BowTieBuilder/Dockerfile @@ -0,0 +1,5 @@ +FROM python:3.8-bullseye + +WORKDIR /btb +RUN wget https://raw.githubusercontent.com/Reed-CompBio/BowTieBuilder-Algorithm/dd8519cd8a8397c0e0724106f498b6002d3f7be2/btb.py +RUN pip install networkx==2.8 \ No newline at end of file diff --git a/docker-wrappers/BowTieBuilder/README.md b/docker-wrappers/BowTieBuilder/README.md new file mode 100644 index 000000000..d0b80ee76 --- /dev/null +++ b/docker-wrappers/BowTieBuilder/README.md @@ -0,0 +1,15 @@ +# BowTieBuilder Docker image + +A Docker image for [BowTieBuilder](https://github.com/Reed-CompBio/BowTieBuilder-Algorithm) that is available on [DockerHub](https://hub.docker.com/repository/docker/reedcompbio/bowtiebuilder). + +To create the Docker image run: +``` +docker build -t reedcompbio/bowtiebuilder:v2 -f Dockerfile . +``` +from this directory. + +## Original Paper + +The original paper for [BowTieBuilder] can be accessed here: + +Supper, J., Spangenberg, L., Planatscher, H. et al. BowTieBuilder: modeling signal transduction pathways. BMC Syst Biol 3, 67 (2009). https://doi.org/10.1186/1752-0509-3-67 \ No newline at end of file diff --git a/spras/btb.py b/spras/btb.py new file mode 100644 index 000000000..416395a55 --- /dev/null +++ b/spras/btb.py @@ -0,0 +1,162 @@ +from pathlib import Path + +from spras.containers import prepare_volume, run_container_and_log +from spras.interactome import ( + convert_undirected_to_directed, + reinsert_direction_col_directed, +) +from spras.prm import PRM +from spras.util import ( + add_rank_column, + duplicate_edges, + raw_pathway_df, +) + +__all__ = ['BowTieBuilder'] + +""" +BTB will construct a BowTie-shaped graph from the provided input file. +BTB works with directed and undirected graphs. +It generates a graph connecting multiple source nodes to multiple target nodes with the minimal number of intermediate nodes as possible. + +Expected raw edge file format: +Interactor1 Interactor2 Weight +""" + +class BowTieBuilder(PRM): + required_inputs = ['sources', 'targets', 'edges'] + + #generate input taken from meo.py beacuse they have same input requirements + @staticmethod + def generate_inputs(data, filename_map): + """ + Access fields from the dataset and write the required input files + @param data: dataset + @param filename_map: a dict mapping file types in the required_inputs to the filename for that type + @return: + """ + for input_type in BowTieBuilder.required_inputs: + if input_type not in filename_map: + raise ValueError(f"{input_type} filename is missing") + + # Get sources and write to file, repeat for targets + # Does not check whether a node is a source and a target + for node_type in ['sources', 'targets']: + nodes = data.request_node_columns([node_type]) + if nodes is None: + raise ValueError(f'No {node_type} found in the node files') + + # TODO test whether this selection is needed, what values could the column contain that we would want to + # include or exclude? + nodes = nodes.loc[nodes[node_type]] + if node_type == "sources": + nodes.to_csv(filename_map["sources"], sep= '\t', index=False, columns=['NODEID'], header=False) + elif node_type == "targets": + nodes.to_csv(filename_map["targets"], sep= '\t', index=False, columns=['NODEID'], header=False) + + + # Create network file + edges = data.get_interactome() + + # Format into directed graph (BTB uses the nx.DiGraph constructor internally) + edges = convert_undirected_to_directed(edges) + + edges.to_csv(filename_map["edges"], sep="\t", index=False, + columns=["Interactor1", "Interactor2", "Weight"], + header=False) + + + + # Skips parameter validation step + @staticmethod + def run(sources=None, targets=None, edges=None, output_file=None, container_framework="docker"): + """ + Run BTB with Docker + @param sources: input source file (required) + @param targets: input target file (required) + @param edges: input edge file (required) + @param output_file: path to the output pathway file (required) + @param container_framework: choose the container runtime framework, currently supports "docker" or "singularity" (optional) + """ + + # Tests for pytest (docker container also runs this) + # Testing out here avoids the trouble that container errors provide + + if not sources or not targets or not edges or not output_file: + raise ValueError('Required BowTieBuilder arguments are missing') + + if not Path(sources).exists() or not Path(targets).exists() or not Path(edges).exists(): + raise ValueError('Missing input file') + + # Testing for btb index errors + # TODO: This error will never actually occur if the inputs are passed through + # `generate_inputs`. See the discussion about removing this or making this a habit at + # https://github.com/Reed-CompBio/spras/issues/306. + with open(edges, 'r') as edge_file: + try: + for line in edge_file: + line = line.strip().split('\t')[2] + + except Exception as err: + # catches a much harder to debug error in BTB. + raise IndexError("BTB edges are not formatted correctly") from err + + work_dir = '/btb' + + # Each volume is a tuple (src, dest) + volumes = list() + + bind_path, source_file = prepare_volume(sources, work_dir) + volumes.append(bind_path) + + bind_path, target_file = prepare_volume(targets, work_dir) + volumes.append(bind_path) + + bind_path, edges_file = prepare_volume(edges, work_dir) + volumes.append(bind_path) + + # Use its --output argument to set the output file prefix to specify an absolute path and prefix + out_dir = Path(output_file).parent + out_dir.mkdir(parents=True, exist_ok=True) + bind_path, mapped_out_dir = prepare_volume(str(out_dir), work_dir) + volumes.append(bind_path) + mapped_out_prefix = mapped_out_dir + '/raw-pathway.txt' # Use posix path inside the container + + command = ['python', + 'btb.py', + '--edges', + edges_file, + '--sources', + source_file, + '--targets', + target_file, + '--output_file', + mapped_out_prefix] + + container_suffix = "bowtiebuilder:v2" + run_container_and_log('BowTieBuilder', + container_framework, + container_suffix, + command, + volumes, + work_dir) + # Output is already written to raw-pathway.txt file + + + @staticmethod + def parse_output(raw_pathway_file, standardized_pathway_file, params): + """ + Convert a predicted pathway into the universal format + @param raw_pathway_file: pathway file produced by an algorithm's run function + @param standardized_pathway_file: the same pathway written in the universal format + """ + # TODO: consider using multiple raw_pathway_files + df = raw_pathway_df(raw_pathway_file, sep='\t', header=0) + if not df.empty: + df = add_rank_column(df) + df = reinsert_direction_col_directed(df) + df.columns = ['Node1', 'Node2', 'Rank', 'Direction'] + df, has_duplicates = duplicate_edges(df) + if has_duplicates: + print(f"Duplicate edges were removed from {raw_pathway_file}") + df.to_csv(standardized_pathway_file, index=False, sep='\t', header=True) diff --git a/spras/runner.py b/spras/runner.py index 0c36dc9be..735925007 100644 --- a/spras/runner.py +++ b/spras/runner.py @@ -2,6 +2,7 @@ # supported algorithm imports from spras.allpairs import AllPairs as allpairs +from spras.btb import BowTieBuilder as bowtiebuilder from spras.dataset import Dataset from spras.domino import DOMINO as domino from spras.meo import MEO as meo diff --git a/test/BowTieBuilder/__init__.py b/test/BowTieBuilder/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/test/BowTieBuilder/expected/bidirectional-output.txt b/test/BowTieBuilder/expected/bidirectional-output.txt new file mode 100644 index 000000000..4d722b5d9 --- /dev/null +++ b/test/BowTieBuilder/expected/bidirectional-output.txt @@ -0,0 +1,3 @@ +Node1 Node2 +S1 A +A T1 diff --git a/test/BowTieBuilder/expected/btb-output.txt b/test/BowTieBuilder/expected/btb-output.txt new file mode 100644 index 000000000..b5afacd4c --- /dev/null +++ b/test/BowTieBuilder/expected/btb-output.txt @@ -0,0 +1,5 @@ +Node1 Node2 +S1 A +S2 A +A T1 +A T2 diff --git a/test/BowTieBuilder/expected/disjoint-output.txt b/test/BowTieBuilder/expected/disjoint-output.txt new file mode 100644 index 000000000..27a250c7d --- /dev/null +++ b/test/BowTieBuilder/expected/disjoint-output.txt @@ -0,0 +1,6 @@ +Node1 Node2 +S1 A +S2 C +C T2 +A B +B T1 diff --git a/test/BowTieBuilder/expected/empty-output.txt b/test/BowTieBuilder/expected/empty-output.txt new file mode 100644 index 000000000..9d15ae30c --- /dev/null +++ b/test/BowTieBuilder/expected/empty-output.txt @@ -0,0 +1 @@ +Node1 Node2 diff --git a/test/BowTieBuilder/expected/loop-output.txt b/test/BowTieBuilder/expected/loop-output.txt new file mode 100644 index 000000000..87f007a06 --- /dev/null +++ b/test/BowTieBuilder/expected/loop-output.txt @@ -0,0 +1,6 @@ +Node1 Node2 +S1 A +A B +B T1 +T1 C +C T2 diff --git a/test/BowTieBuilder/expected/source-to-source-disjoint-output.txt b/test/BowTieBuilder/expected/source-to-source-disjoint-output.txt new file mode 100644 index 000000000..67331531d --- /dev/null +++ b/test/BowTieBuilder/expected/source-to-source-disjoint-output.txt @@ -0,0 +1,6 @@ +Node1 Node2 +S1 A +S1 S2 +S2 B +A T1 +B T2 diff --git a/test/BowTieBuilder/expected/source-to-source-output.txt b/test/BowTieBuilder/expected/source-to-source-output.txt new file mode 100644 index 000000000..8f1baa9c4 --- /dev/null +++ b/test/BowTieBuilder/expected/source-to-source-output.txt @@ -0,0 +1,4 @@ +Node1 Node2 +S1 A +A T1 +A T2 diff --git a/test/BowTieBuilder/expected/source-to-source2-output.txt b/test/BowTieBuilder/expected/source-to-source2-output.txt new file mode 100644 index 000000000..4d0c6f22b --- /dev/null +++ b/test/BowTieBuilder/expected/source-to-source2-output.txt @@ -0,0 +1,5 @@ +Node1 Node2 +S1 A +S2 S1 +A T1 +A T2 diff --git a/test/BowTieBuilder/expected/weighted-output.txt b/test/BowTieBuilder/expected/weighted-output.txt new file mode 100644 index 000000000..4d722b5d9 --- /dev/null +++ b/test/BowTieBuilder/expected/weighted-output.txt @@ -0,0 +1,3 @@ +Node1 Node2 +S1 A +A T1 diff --git a/test/BowTieBuilder/input/bad-edges.txt b/test/BowTieBuilder/input/bad-edges.txt new file mode 100644 index 000000000..c08a85035 --- /dev/null +++ b/test/BowTieBuilder/input/bad-edges.txt @@ -0,0 +1,6 @@ +A D 5 +B D 1.3 +C 0.4 +D E 4.5 +D F 2 +D G 3.2 \ No newline at end of file diff --git a/test/BowTieBuilder/input/bidirectional-edges.txt b/test/BowTieBuilder/input/bidirectional-edges.txt new file mode 100644 index 000000000..444ac9b34 --- /dev/null +++ b/test/BowTieBuilder/input/bidirectional-edges.txt @@ -0,0 +1,4 @@ +S1 A 1 +A T1 1 +A S1 1 +T1 A 1 \ No newline at end of file diff --git a/test/BowTieBuilder/input/btb-bad-edges.txt b/test/BowTieBuilder/input/btb-bad-edges.txt new file mode 100644 index 000000000..e69de29bb diff --git a/test/BowTieBuilder/input/btb-edges.txt b/test/BowTieBuilder/input/btb-edges.txt new file mode 100644 index 000000000..e5f85f130 --- /dev/null +++ b/test/BowTieBuilder/input/btb-edges.txt @@ -0,0 +1,5 @@ +S1 A 1 +S1 S2 1 +S2 A 1 +A T1 1 +A T2 1 \ No newline at end of file diff --git a/test/BowTieBuilder/input/btb-sources.txt b/test/BowTieBuilder/input/btb-sources.txt new file mode 100644 index 000000000..052a6f02c --- /dev/null +++ b/test/BowTieBuilder/input/btb-sources.txt @@ -0,0 +1,2 @@ +S1 +S2 \ No newline at end of file diff --git a/test/BowTieBuilder/input/btb-targets.txt b/test/BowTieBuilder/input/btb-targets.txt new file mode 100644 index 000000000..43b435f9b --- /dev/null +++ b/test/BowTieBuilder/input/btb-targets.txt @@ -0,0 +1,2 @@ +T1 +T2 \ No newline at end of file diff --git a/test/BowTieBuilder/input/disjoint-edges.txt b/test/BowTieBuilder/input/disjoint-edges.txt new file mode 100644 index 000000000..b8cb0f460 --- /dev/null +++ b/test/BowTieBuilder/input/disjoint-edges.txt @@ -0,0 +1,5 @@ +S1 A 1 +A B 1 +B T1 1 +S2 C 1 +C T2 1 \ No newline at end of file diff --git a/test/BowTieBuilder/input/disjoint-sources.txt b/test/BowTieBuilder/input/disjoint-sources.txt new file mode 100644 index 000000000..df71e4359 --- /dev/null +++ b/test/BowTieBuilder/input/disjoint-sources.txt @@ -0,0 +1,3 @@ +S1 +S2 +S3 \ No newline at end of file diff --git a/test/BowTieBuilder/input/disjoint-targets.txt b/test/BowTieBuilder/input/disjoint-targets.txt new file mode 100644 index 000000000..f640e8aa0 --- /dev/null +++ b/test/BowTieBuilder/input/disjoint-targets.txt @@ -0,0 +1,3 @@ +T1 +T2 +T3 \ No newline at end of file diff --git a/test/BowTieBuilder/input/disjoint2-edges.txt b/test/BowTieBuilder/input/disjoint2-edges.txt new file mode 100644 index 000000000..2df397828 --- /dev/null +++ b/test/BowTieBuilder/input/disjoint2-edges.txt @@ -0,0 +1,6 @@ +S1 A 1 +A B 1 +B T1 1 +S2 C 1 +C T2 1 +S3 D 1 \ No newline at end of file diff --git a/test/BowTieBuilder/input/edges1.txt b/test/BowTieBuilder/input/edges1.txt new file mode 100644 index 000000000..6f97ec4e4 --- /dev/null +++ b/test/BowTieBuilder/input/edges1.txt @@ -0,0 +1,6 @@ +A D 5 +B D 1.3 +C D 0.4 +D E 4.5 +D F 2 +D G 3.2 \ No newline at end of file diff --git a/test/BowTieBuilder/input/loop-edges.txt b/test/BowTieBuilder/input/loop-edges.txt new file mode 100644 index 000000000..74c9aa802 --- /dev/null +++ b/test/BowTieBuilder/input/loop-edges.txt @@ -0,0 +1,6 @@ +S1 A 1 +A B 1 +B T1 1 +T1 C 1 +C T2 1 +T2 S1 1 \ No newline at end of file diff --git a/test/BowTieBuilder/input/source-to-source-disjoint-edges.txt b/test/BowTieBuilder/input/source-to-source-disjoint-edges.txt new file mode 100644 index 000000000..9c7cec5be --- /dev/null +++ b/test/BowTieBuilder/input/source-to-source-disjoint-edges.txt @@ -0,0 +1,5 @@ +S1 S2 1 +S1 A 1 +A T1 1 +S2 B 1 +B T2 1 \ No newline at end of file diff --git a/test/BowTieBuilder/input/source-to-source-edges.txt b/test/BowTieBuilder/input/source-to-source-edges.txt new file mode 100644 index 000000000..733a09c3d --- /dev/null +++ b/test/BowTieBuilder/input/source-to-source-edges.txt @@ -0,0 +1,4 @@ +S1 A 1 +S1 S2 1 +A T1 1 +A T2 1 \ No newline at end of file diff --git a/test/BowTieBuilder/input/source-to-source2-edges.txt b/test/BowTieBuilder/input/source-to-source2-edges.txt new file mode 100644 index 000000000..f11f86f11 --- /dev/null +++ b/test/BowTieBuilder/input/source-to-source2-edges.txt @@ -0,0 +1,4 @@ +S1 A 1 +S2 S1 1 +A T1 1 +A T2 1 \ No newline at end of file diff --git a/test/BowTieBuilder/input/source1.txt b/test/BowTieBuilder/input/source1.txt new file mode 100644 index 000000000..b1e67221a --- /dev/null +++ b/test/BowTieBuilder/input/source1.txt @@ -0,0 +1,3 @@ +A +B +C diff --git a/test/BowTieBuilder/input/target-to-source-edges.txt b/test/BowTieBuilder/input/target-to-source-edges.txt new file mode 100644 index 000000000..5f9fc0018 --- /dev/null +++ b/test/BowTieBuilder/input/target-to-source-edges.txt @@ -0,0 +1,2 @@ +A S1 1 +T1 A 1 \ No newline at end of file diff --git a/test/BowTieBuilder/input/target1.txt b/test/BowTieBuilder/input/target1.txt new file mode 100644 index 000000000..0cae3d39a --- /dev/null +++ b/test/BowTieBuilder/input/target1.txt @@ -0,0 +1,3 @@ +E +F +G diff --git a/test/BowTieBuilder/input/weight-one-edges.txt b/test/BowTieBuilder/input/weight-one-edges.txt new file mode 100644 index 000000000..9b3059a13 --- /dev/null +++ b/test/BowTieBuilder/input/weight-one-edges.txt @@ -0,0 +1,4 @@ +S1 A 1 +A T1 1 +S1 B 0.5 +B T1 0.5 \ No newline at end of file diff --git a/test/BowTieBuilder/input/weighted-edges.txt b/test/BowTieBuilder/input/weighted-edges.txt new file mode 100644 index 000000000..76fc0337f --- /dev/null +++ b/test/BowTieBuilder/input/weighted-edges.txt @@ -0,0 +1,4 @@ +S1 A 0.9 +A T1 0.9 +S1 B 0.5 +B T1 0.5 \ No newline at end of file diff --git a/test/BowTieBuilder/test_btb.py b/test/BowTieBuilder/test_btb.py new file mode 100644 index 000000000..88b12d0dd --- /dev/null +++ b/test/BowTieBuilder/test_btb.py @@ -0,0 +1,309 @@ +import sys +from filecmp import cmp +from pathlib import Path + +import pytest + +import spras.config as config + +config.init_from_file("config/config.yaml") + +# TODO consider refactoring to simplify the import +# Modify the path because of the - in the directory +SPRAS_ROOT = Path(__file__).parent.parent.parent.absolute() +sys.path.append(str(Path(SPRAS_ROOT, 'docker-wrappers', 'BowTieBuilder'))) +from spras.btb import BowTieBuilder as BTB + +TEST_DIR = Path('test', 'BowTieBuilder/') +OUT_FILE_DEFAULT = Path(TEST_DIR, 'output', 'raw-pathway.txt') + + +class TestBowTieBuilder: + """ + Run the BowTieBuilder algorithm with missing arguments + """ + def test_btb_missing(self): + with pytest.raises(ValueError): + # No edges + BTB.run( + targets=Path(TEST_DIR, 'input', 'target.txt'), + sources=Path(TEST_DIR, 'input', 'source.txt'), + output_file=OUT_FILE_DEFAULT) + with pytest.raises(ValueError): + # No source + BTB.run( + targets=Path(TEST_DIR, 'input', 'target.txt'), + edges=Path(TEST_DIR, 'input', 'edges.txt'), + output_file=OUT_FILE_DEFAULT) + with pytest.raises(ValueError): + # No target + BTB.run( + sources=Path(TEST_DIR, 'input', 'source.txt'), + edges=Path(TEST_DIR, 'input', 'edges.txt'), + output_file=OUT_FILE_DEFAULT) + + + """ + Run the BowTieBuilder algorithm with missing files + """ + def test_btb_file(self): + with pytest.raises(ValueError): + BTB.run(sources=Path(TEST_DIR, 'input', 'unknown.txt'), + targets=Path(TEST_DIR, 'input', 'target.txt'), + edges=Path(TEST_DIR, 'input', 'edges.txt'), + output_file=OUT_FILE_DEFAULT) + + """ + Run the BowTieBuilder algorithm with bad input data + """ + def test_format_error(self): + with pytest.raises(IndexError): + BTB.run(sources=Path(TEST_DIR, 'input', 'btb-sources.txt'), + targets=Path(TEST_DIR, 'input', 'btb-targets.txt'), + edges=Path(TEST_DIR, 'input', 'bad-edges.txt'), + output_file=OUT_FILE_DEFAULT) + + """ + Run the BowTieBuilder algorithm on the example input files and check the output matches the expected output + """ + def test_btb(self): + OUT_FILE_DEFAULT.unlink(missing_ok=True) + BTB.run(edges=Path(TEST_DIR, 'input', 'btb-edges.txt'), + sources=Path(TEST_DIR, 'input', 'btb-sources.txt'), + targets=Path(TEST_DIR, 'input', 'btb-targets.txt'), + output_file=OUT_FILE_DEFAULT) + assert OUT_FILE_DEFAULT.exists(), 'Output file was not written' + expected_file = Path(TEST_DIR, 'expected', 'btb-output.txt') + + # Read the content of the output files and expected file into sets + with open(OUT_FILE_DEFAULT, 'r') as output_file: + output_content = set(output_file.read().splitlines()) + with open(expected_file, 'r') as expected_output_file: + expected_content = set(expected_output_file.read().splitlines()) + + # Check if the sets are equal, regardless of the order of lines + assert output_content == expected_content, 'Output file does not match expected output file' + + """ + Run the BowTieBuilder algorithm on the example disjoint input files and check the output matches the expected output + """ + def test_disjoint(self): + OUT_FILE_DEFAULT.unlink(missing_ok=True) + BTB.run(edges=Path(TEST_DIR, 'input', 'disjoint-edges.txt'), + sources=Path(TEST_DIR, 'input', 'disjoint-sources.txt'), + targets=Path(TEST_DIR, 'input', 'disjoint-targets.txt'), + output_file=OUT_FILE_DEFAULT) + assert OUT_FILE_DEFAULT.exists(), 'Output file was not written' + expected_file = Path(TEST_DIR, 'expected', 'disjoint-output.txt') + + # Read the content of the output files and expected file into sets + with open(OUT_FILE_DEFAULT, 'r') as output_file: + output_content = set(output_file.read().splitlines()) + with open(expected_file, 'r') as expected_output_file: + expected_content = set(expected_output_file.read().splitlines()) + + # Check if the sets are equal, regardless of the order of lines + assert output_content == expected_content, 'Output file does not match expected output file' + + """ + Run the BowTieBuilder algorithm on the example disjoint2 input files and check the output matches the expected output + """ + def test_disjoint2(self): + OUT_FILE_DEFAULT.unlink(missing_ok=True) + BTB.run(edges=Path(TEST_DIR, 'input', 'disjoint2-edges.txt'), + sources=Path(TEST_DIR, 'input', 'disjoint-sources.txt'), + targets=Path(TEST_DIR, 'input', 'disjoint-targets.txt'), + output_file=OUT_FILE_DEFAULT) + assert OUT_FILE_DEFAULT.exists(), 'Output file was not written' + expected_file = Path(TEST_DIR, 'expected', 'disjoint-output.txt') + + # Read the content of the output files and expected file into sets + with open(OUT_FILE_DEFAULT, 'r') as output_file: + output_content = set(output_file.read().splitlines()) + with open(expected_file, 'r') as expected_output_file: + expected_content = set(expected_output_file.read().splitlines()) + + # Check if the sets are equal, regardless of the order of lines + assert output_content == expected_content, 'Output file does not match expected output file' + + """ + Run the BowTieBuilder algorithm with a missing input file + """ + def test_missing_file(self): + with pytest.raises(ValueError): + with pytest.raises(OSError): + BTB.run(edges=Path(TEST_DIR, 'input', 'missing.txt'), + sources=Path(TEST_DIR, 'input', 'btb-sources.txt'), + targets=Path(TEST_DIR, 'input', 'btb-targets.txt'), + output_file=OUT_FILE_DEFAULT) + + + """ + Run the BowTieBuilder algorithm on the example source to source input files and check the output matches the expected output + """ + def test_source_to_source(self): + OUT_FILE_DEFAULT.unlink(missing_ok=True) + BTB.run(edges=Path(TEST_DIR, 'input', 'source-to-source-edges.txt'), + sources=Path(TEST_DIR, 'input', 'btb-sources.txt'), + targets=Path(TEST_DIR, 'input', 'btb-targets.txt'), + output_file=OUT_FILE_DEFAULT) + assert OUT_FILE_DEFAULT.exists(), 'Output file was not written' + expected_file = Path(TEST_DIR, 'expected', 'source-to-source-output.txt') + + # Read the content of the output files and expected file into sets + with open(OUT_FILE_DEFAULT, 'r') as output_file: + output_content = set(output_file.read().splitlines()) + with open(expected_file, 'r') as expected_output_file: + expected_content = set(expected_output_file.read().splitlines()) + + # Check if the sets are equal, regardless of the order of lines + assert output_content == expected_content, 'Output file does not match expected output file' + + """ + Run the BowTieBuilder algorithm on the example source to source input files and check the output matches the expected output + """ + def test_source_to_source2(self): + OUT_FILE_DEFAULT.unlink(missing_ok=True) + BTB.run(edges=Path(TEST_DIR, 'input', 'source-to-source2-edges.txt'), + sources=Path(TEST_DIR, 'input', 'btb-sources.txt'), + targets=Path(TEST_DIR, 'input', 'btb-targets.txt'), + output_file=OUT_FILE_DEFAULT) + assert OUT_FILE_DEFAULT.exists(), 'Output file was not written' + expected_file = Path(TEST_DIR, 'expected', 'source-to-source2-output.txt') + + # Read the content of the output files and expected file into sets + with open(OUT_FILE_DEFAULT, 'r') as output_file: + output_content = set(output_file.read().splitlines()) + with open(expected_file, 'r') as expected_output_file: + expected_content = set(expected_output_file.read().splitlines()) + + # Check if the sets are equal, regardless of the order of lines + assert output_content == expected_content, 'Output file does not match expected output file' + + """ + Run the BowTieBuilder algorithm on two separate source to target paths connected by sources and check the output matches the expected output + """ + + def test_source_to_source_disjoint(self): + OUT_FILE_DEFAULT.unlink(missing_ok=True) + BTB.run(edges=Path(TEST_DIR, 'input', 'source-to-source-disjoint-edges.txt'), + sources=Path(TEST_DIR, 'input', 'btb-sources.txt'), + targets=Path(TEST_DIR, 'input', 'btb-targets.txt'), + output_file=OUT_FILE_DEFAULT) + assert OUT_FILE_DEFAULT.exists(), 'Output file was not written' + expected_file = Path(TEST_DIR, 'expected', 'source-to-source-disjoint-output.txt') + + # Read the content of the output files and expected file into sets + with open(OUT_FILE_DEFAULT, 'r') as output_file: + output_content = set(output_file.read().splitlines()) + with open(expected_file, 'r') as expected_output_file: + expected_content = set(expected_output_file.read().splitlines()) + + # Check if the sets are equal, regardless of the order of lines + assert output_content == expected_content, 'Output file does not match expected output file' + + """ + Run the BowTieBuilder algorithm on the example bidirectional input files and check the output matches the expected output + """ + + def test_bidirectional(self): + OUT_FILE_DEFAULT.unlink(missing_ok=True) + BTB.run(edges=Path(TEST_DIR, 'input', 'bidirectional-edges.txt'), + sources=Path(TEST_DIR, 'input', 'btb-sources.txt'), + targets=Path(TEST_DIR, 'input', 'btb-targets.txt'), + output_file=OUT_FILE_DEFAULT) + assert OUT_FILE_DEFAULT.exists(), 'Output file was not written' + expected_file = Path(TEST_DIR, 'expected', 'bidirectional-output.txt') + + # Read the content of the output files and expected file into sets + with open(OUT_FILE_DEFAULT, 'r') as output_file: + output_content = set(output_file.read().splitlines()) + with open(expected_file, 'r') as expected_output_file: + expected_content = set(expected_output_file.read().splitlines()) + + # Check if the sets are equal, regardless of the order of lines + assert output_content == expected_content, 'Output file does not match expected output file' + + """ + Run the BowTieBuilder algorithm on the example target to source input files and check the output matches the expected output + """ + + def test_target_to_source(self): + OUT_FILE_DEFAULT.unlink(missing_ok=True) + BTB.run(edges=Path(TEST_DIR, 'input', 'target-to-source-edges.txt'), + sources=Path(TEST_DIR, 'input', 'btb-sources.txt'), + targets=Path(TEST_DIR, 'input', 'btb-targets.txt'), + output_file=OUT_FILE_DEFAULT) + assert OUT_FILE_DEFAULT.exists(), 'Output file was not written' + expected_file = Path(TEST_DIR, 'expected', 'empty-output.txt') + + # Read the content of the output files and expected file into sets + with open(OUT_FILE_DEFAULT, 'r') as output_file: + output_content = set(output_file.read().splitlines()) + with open(expected_file, 'r') as expected_output_file: + expected_content = set(expected_output_file.read().splitlines()) + + # Check if the sets are equal, regardless of the order of lines + assert output_content == expected_content, 'Output file does not match expected output file' + + """ + Run the BowTieBuilder algorithm on the example loop network files and check the output matches the expected output + """ + + def test_loop(self): + OUT_FILE_DEFAULT.unlink(missing_ok=True) + BTB.run(edges=Path(TEST_DIR, 'input', 'loop-edges.txt'), + sources=Path(TEST_DIR, 'input', 'btb-sources.txt'), + targets=Path(TEST_DIR, 'input', 'btb-targets.txt'), + output_file=OUT_FILE_DEFAULT) + assert OUT_FILE_DEFAULT.exists(), 'Output file was not written' + expected_file = Path(TEST_DIR, 'expected', 'loop-output.txt') + + # Read the content of the output files and expected file into sets + with open(OUT_FILE_DEFAULT, 'r') as output_file: + output_content = set(output_file.read().splitlines()) + with open(expected_file, 'r') as expected_output_file: + expected_content = set(expected_output_file.read().splitlines()) + + # Check if the sets are equal, regardless of the order of lines + assert output_content == expected_content, 'Output file does not match expected output file' + + """ + Run the BowTieBuilder algorithm on the weighted input files and check the output matches the expected output + """ + + def test_weighted(self): + OUT_FILE_DEFAULT.unlink(missing_ok=True) + BTB.run(edges=Path(TEST_DIR, 'input', 'weighted-edges.txt'), + sources=Path(TEST_DIR, 'input', 'btb-sources.txt'), + targets=Path(TEST_DIR, 'input', 'btb-targets.txt'), + output_file=OUT_FILE_DEFAULT) + assert OUT_FILE_DEFAULT.exists(), 'Output file was not written' + expected_file = Path(TEST_DIR, 'expected', 'weighted-output.txt') + + # Read the content of the output files and expected file into sets + with open(OUT_FILE_DEFAULT, 'r') as output_file: + output_content = set(output_file.read().splitlines()) + with open(expected_file, 'r') as expected_output_file: + expected_content = set(expected_output_file.read().splitlines()) + + # Check if the sets are equal, regardless of the order of lines + assert output_content == expected_content, 'Output file does not match expected output file' + + def test_weight_one(self): + OUT_FILE_DEFAULT.unlink(missing_ok=True) + BTB.run(edges=Path(TEST_DIR, 'input', 'weight-one-edges.txt'), + sources=Path(TEST_DIR, 'input', 'btb-sources.txt'), + targets=Path(TEST_DIR, 'input', 'btb-targets.txt'), + output_file=OUT_FILE_DEFAULT) + assert OUT_FILE_DEFAULT.exists(), 'Output file was not written' + expected_file = Path(TEST_DIR, 'expected', 'weighted-output.txt') + + # Read the content of the output files and expected file into sets + with open(OUT_FILE_DEFAULT, 'r') as output_file: + output_content = set(output_file.read().splitlines()) + with open(expected_file, 'r') as expected_output_file: + expected_content = set(expected_output_file.read().splitlines()) + + # Check if the sets are equal, regardless of the order of lines + assert output_content == expected_content, 'Output file does not match expected output file' diff --git a/test/generate-inputs/expected/bowtiebuilder-edges-expected.txt b/test/generate-inputs/expected/bowtiebuilder-edges-expected.txt new file mode 100644 index 000000000..a52b15933 --- /dev/null +++ b/test/generate-inputs/expected/bowtiebuilder-edges-expected.txt @@ -0,0 +1,4 @@ +test_A B 0.98 +B C 0.77 +B test_A 0.98 +C B 0.77 diff --git a/test/generate-inputs/test_generate_inputs.py b/test/generate-inputs/test_generate_inputs.py index 84869692c..6c17ecd14 100644 --- a/test/generate-inputs/test_generate_inputs.py +++ b/test/generate-inputs/test_generate_inputs.py @@ -17,6 +17,7 @@ 'domino': 'network', 'pathlinker': 'network', 'allpairs': 'network', + 'bowtiebuilder': 'edges', 'strwr': 'network', 'rwr': 'network' } diff --git a/test/parse-outputs/expected/bowtiebuilder-pathway-expected.txt b/test/parse-outputs/expected/bowtiebuilder-pathway-expected.txt new file mode 100644 index 000000000..5547a49c6 --- /dev/null +++ b/test/parse-outputs/expected/bowtiebuilder-pathway-expected.txt @@ -0,0 +1,3 @@ +Node1 Node2 Rank Direction +A B 1 D +B C 1 D diff --git a/test/parse-outputs/input/bowtiebuilder-raw-pathway.txt b/test/parse-outputs/input/bowtiebuilder-raw-pathway.txt new file mode 100644 index 000000000..d92837ade --- /dev/null +++ b/test/parse-outputs/input/bowtiebuilder-raw-pathway.txt @@ -0,0 +1,3 @@ +Node1 Node2 +A B +B C diff --git a/test/parse-outputs/input/duplicate-edges/bowtiebuilder-raw-pathway.txt b/test/parse-outputs/input/duplicate-edges/bowtiebuilder-raw-pathway.txt new file mode 100644 index 000000000..279603e69 --- /dev/null +++ b/test/parse-outputs/input/duplicate-edges/bowtiebuilder-raw-pathway.txt @@ -0,0 +1,5 @@ +Node1 Node2 +A B +B C +A B +B C diff --git a/test/parse-outputs/test_parse_outputs.py b/test/parse-outputs/test_parse_outputs.py index 9485596c4..216a951d2 100644 --- a/test/parse-outputs/test_parse_outputs.py +++ b/test/parse-outputs/test_parse_outputs.py @@ -21,6 +21,7 @@ 'pathlinker': {}, 'allpairs': {}, 'domino': {}, + 'bowtiebuilder': {}, 'strwr': { 'threshold': 3, 'dataset': Dataset({