diff --git a/.github/workflows/test-spras.yml b/.github/workflows/test-spras.yml index 472b66ac4..2b6762122 100644 --- a/.github/workflows/test-spras.yml +++ b/.github/workflows/test-spras.yml @@ -25,6 +25,24 @@ jobs: shell: bash --login {0} run: conda list + test_windows: + name: Run weakened windows tests + runs-on: windows-latest + steps: + - name: Checkout repository + uses: actions/checkout@v4 + - name: Install conda environment + uses: conda-incubator/setup-miniconda@v2 + with: + activate-environment: spras + environment-file: environment.yml + auto-activate-base: false + miniconda-version: 'latest' + - name: Run tests + shell: bash --login {0} + # Verbose output and disable stdout and stderr capturing + run: pytest -vs --ignore=test/AllPairs --ignore=test/BowTieBuilder --ignore=test/DOMINO --ignore=test/analysis/test_cytoscape.py --ignore=test/MEO --ignore=test/MinCostFlow --ignore=test/OmicsIntegrator1 --ignore=test/OmicsIntegrator2 --ignore=test/PathLinker --ignore=test/ResponseNet --ignore=test/RWR --ignore=test/ST_RWR + # Runs the test code and Snakemake workflow in the conda environment test: name: Run tests diff --git a/spras/evaluation.py b/spras/evaluation.py index 732e58576..0ac361c82 100644 --- a/spras/evaluation.py +++ b/spras/evaluation.py @@ -237,7 +237,7 @@ def precision_and_recall_pca_chosen_pathway(pr_df: pd.DataFrame, output_file: st plt.close() @staticmethod - def pca_chosen_pathway(coordinates_files: list[Union[str, PathLike]], pathway_summary_file: str, output_dir: str): + def pca_chosen_pathway(coordinates_files: list[Union[str, PathLike]], pathway_summary_file: Union[str, PathLike], output_dir: Union[str, PathLike]): """ Identifies the pathway closest to a specified highest kernel density estimated (KDE) peak based on PCA coordinates diff --git a/test/evaluate/expected/expected-pr-per-pathway-pca-chosen.txt b/test/evaluate/expected/expected-pr-per-pathway-pca-chosen.txt index 97c13ebc3..13c6fb8f7 100644 --- a/test/evaluate/expected/expected-pr-per-pathway-pca-chosen.txt +++ b/test/evaluate/expected/expected-pr-per-pathway-pca-chosen.txt @@ -1,2 +1,2 @@ -Pathway Precision Recall -test/evaluate/input/data-test-params-123/pathway.txt 0.6666666666666666 0.5 +Precision Recall +0.6666666666666666 0.5 diff --git a/test/evaluate/test_evaluate.py b/test/evaluate/test_evaluate.py index ce50350e5..8d62c272a 100644 --- a/test/evaluate/test_evaluate.py +++ b/test/evaluate/test_evaluate.py @@ -9,11 +9,11 @@ from spras.dataset import Dataset from spras.evaluation import Evaluation -INPUT_DIR = 'test/evaluate/input/' -OUT_DIR = 'test/evaluate/output/' -EXPECT_DIR = 'test/evaluate/expected/' -GS_NODE_TABLE = pd.read_csv(INPUT_DIR + 'gs_node_table.csv', header=0) -SUMMARY_FILE = INPUT_DIR + 'example_summary.txt' +INPUT_DIR = Path('test', 'evaluate', 'input') +OUT_DIR = Path('test', 'evaluate', 'output') +EXPECT_DIR = Path('test', 'evaluate', 'expected') +GS_NODE_TABLE = pd.read_csv(INPUT_DIR / 'gs_node_table.csv', header=0) +SUMMARY_FILE = INPUT_DIR / 'example_summary.txt' class TestEvaluate: @@ -39,9 +39,9 @@ def setup_class(cls): pickle.dump(dataset, f) def test_node_precision_recall_per_pathway(self): - file_paths = [INPUT_DIR + 'data-test-params-123/pathway.txt', INPUT_DIR + 'data-test-params-456/pathway.txt', INPUT_DIR + 'data-test-params-789/pathway.txt', INPUT_DIR + 'data-test-params-empty/pathway.txt'] - output_file = Path(OUT_DIR + 'pr-per-pathway.txt') - output_png = Path(OUT_DIR + 'pr-per-pathway.png') + file_paths = [INPUT_DIR / 'data-test-params-123' / 'pathway.txt', INPUT_DIR / 'data-test-params-456' / 'pathway.txt', INPUT_DIR / 'data-test-params-789' / 'pathway.txt', INPUT_DIR / 'data-test-params-empty' / 'pathway.txt'] + output_file = Path(OUT_DIR / 'pr-per-pathway.txt') + output_png = Path(OUT_DIR / 'pr-per-pathway.png') output_file.unlink(missing_ok=True) output_png.unlink(missing_ok=True) @@ -49,16 +49,16 @@ def test_node_precision_recall_per_pathway(self): Evaluation.precision_and_recall_per_pathway(pr_df, output_file, output_png, True) output = pd.read_csv(output_file, sep='\t', header=0).round(8) - expected = pd.read_csv(EXPECT_DIR + 'expected-pr-per-pathway.txt', sep='\t', header=0).round(8) + expected = pd.read_csv(EXPECT_DIR / 'expected-pr-per-pathway.txt', sep='\t', header=0).round(8) assert output.equals(expected) assert output_png.exists() def test_node_precision_recall_per_pathway_empty(self): - file_paths = [INPUT_DIR + 'data-test-params-empty/pathway.txt'] - output_file = Path(OUT_DIR + 'pr-per-pathway-empty.txt') - output_png = Path(OUT_DIR + 'pr-per-pathway-empty.png') + file_paths = [INPUT_DIR / 'data-test-params-empty/pathway.txt'] + output_file = OUT_DIR / 'pr-per-pathway-empty.txt' + output_png = OUT_DIR / 'pr-per-pathway-empty.png' output_file.unlink(missing_ok=True) output_png.unlink(missing_ok=True) @@ -66,14 +66,14 @@ def test_node_precision_recall_per_pathway_empty(self): Evaluation.precision_and_recall_per_pathway(pr_df, output_file, output_png, True) output = pd.read_csv(output_file, sep='\t', header=0).round(8) - expected = pd.read_csv(EXPECT_DIR + 'expected-pr-per-pathway-empty.txt', sep='\t', header=0).round(8) + expected = pd.read_csv(EXPECT_DIR / 'expected-pr-per-pathway-empty.txt', sep='\t', header=0).round(8) assert output.equals(expected) assert output_png.exists() def test_node_precision_recall_per_pathway_not_provided(self): - output_file = OUT_DIR + 'pr-per-pathway-not-provided.txt' - output_png = OUT_DIR + 'pr-per-pathway-not-provided.png' + output_file = OUT_DIR / 'pr-per-pathway-not-provided.txt' + output_png = OUT_DIR / 'pr-per-pathway-not-provided.png' file_paths = [] pr_df = Evaluation.node_precision_and_recall(file_paths, GS_NODE_TABLE) @@ -81,9 +81,9 @@ def test_node_precision_recall_per_pathway_not_provided(self): Evaluation.precision_and_recall_per_pathway(pr_df, output_file, output_png) def test_node_precision_recall_pca_chosen_pathway_not_provided(self): - output_file = Path( OUT_DIR + 'pr-per-pathway-pca-chosen-not-provided.txt') + output_file = OUT_DIR / 'pr-per-pathway-pca-chosen-not-provided.txt' output_file.unlink(missing_ok=True) - output_png = Path(OUT_DIR + 'pr-per-pathway-pca-chosen-not-provided.png') + output_png = OUT_DIR / 'pr-per-pathway-pca-chosen-not-provided.png' output_png.unlink(missing_ok=True) file_paths = [] @@ -91,24 +91,24 @@ def test_node_precision_recall_pca_chosen_pathway_not_provided(self): Evaluation.precision_and_recall_pca_chosen_pathway(pr_df, output_file, output_png) output = pd.read_csv(output_file, sep='\t', header=0).round(8) - expected = pd.read_csv(EXPECT_DIR + 'expected-pr-pca-chosen-not-provided.txt', sep='\t', header=0).round(8) + expected = pd.read_csv(EXPECT_DIR / 'expected-pr-pca-chosen-not-provided.txt', sep='\t', header=0).round(8) assert output.equals(expected) assert output_png.exists() def test_node_precision_recall_pca_chosen_pathway(self): - output_file = Path(OUT_DIR + 'pr-per-pathway-pca-chosen.txt') + output_file = OUT_DIR / 'pr-per-pathway-pca-chosen.txt' output_file.unlink(missing_ok=True) - output_png = Path(OUT_DIR + 'pr-per-pathway-pca-chosen.png') + output_png = OUT_DIR / 'pr-per-pathway-pca-chosen.png' output_png.unlink(missing_ok=True) - output_coordinates = Path(OUT_DIR + 'pca-coordinates.tsv') + output_coordinates = OUT_DIR / 'pca-coordinates.tsv' output_coordinates.unlink(missing_ok=True) - file_paths = [INPUT_DIR + 'data-test-params-123/pathway.txt', INPUT_DIR + 'data-test-params-456/pathway.txt', - INPUT_DIR + 'data-test-params-789/pathway.txt', INPUT_DIR + 'data-test-params-empty/pathway.txt'] + file_paths = [INPUT_DIR / 'data-test-params-123' / 'pathway.txt', INPUT_DIR / 'data-test-params-456' / 'pathway.txt', + INPUT_DIR / 'data-test-params-789' / 'pathway.txt', INPUT_DIR / 'data-test-params-empty' / 'pathway.txt'] dataframe = ml.summarize_networks(file_paths) - ml.pca(dataframe, OUT_DIR + 'pca.png', OUT_DIR + 'pca-variance.txt', str(output_coordinates), kde=True, remove_empty_pathways=True) + ml.pca(dataframe, str(OUT_DIR / 'pca.png'), str(OUT_DIR / 'pca-variance.txt'), str(output_coordinates), kde=True, remove_empty_pathways=True) pathway = Evaluation.pca_chosen_pathway([output_coordinates], SUMMARY_FILE, INPUT_DIR) @@ -117,76 +117,76 @@ def test_node_precision_recall_pca_chosen_pathway(self): chosen = pd.read_csv(output_file, sep='\t', header=0).round(8) - expected = pd.read_csv(EXPECT_DIR + 'expected-pr-per-pathway-pca-chosen.txt', sep='\t', header=0).round(8) + expected = pd.read_csv(EXPECT_DIR / 'expected-pr-per-pathway-pca-chosen.txt', sep='\t', header=0).round(8) assert chosen.equals(expected) assert output_png.exists() def test_node_ensemble(self): - out_path_file = Path(OUT_DIR + 'node-ensemble.csv') + out_path_file = OUT_DIR / 'node-ensemble.csv' out_path_file.unlink(missing_ok=True) - ensemble_network = [INPUT_DIR + 'ensemble-network.tsv'] - input_network = OUT_DIR + 'data.pickle' - node_ensemble_dict = Evaluation.edge_frequency_node_ensemble(GS_NODE_TABLE, ensemble_network, input_network) + ensemble_network = [INPUT_DIR / 'ensemble-network.tsv'] + input_network = OUT_DIR / 'data.pickle' + node_ensemble_dict = Evaluation.edge_frequency_node_ensemble(GS_NODE_TABLE, ensemble_network, str(input_network)) node_ensemble_dict['ensemble'].to_csv(out_path_file, sep='\t', index=False) - assert filecmp.cmp(out_path_file, EXPECT_DIR + 'expected-node-ensemble.csv', shallow=False) + assert filecmp.cmp(out_path_file, EXPECT_DIR / 'expected-node-ensemble.csv', shallow=False) def test_empty_node_ensemble(self): - out_path_file = Path(OUT_DIR + 'empty-node-ensemble.csv') + out_path_file = OUT_DIR / 'empty-node-ensemble.csv' out_path_file.unlink(missing_ok=True) - empty_ensemble_network = [INPUT_DIR + 'empty-ensemble-network.tsv'] - input_network = OUT_DIR + 'data.pickle' + empty_ensemble_network = [INPUT_DIR / 'empty-ensemble-network.tsv'] + input_network = OUT_DIR / 'data.pickle' node_ensemble_dict = Evaluation.edge_frequency_node_ensemble(GS_NODE_TABLE, empty_ensemble_network, - input_network) + str(input_network)) node_ensemble_dict['empty'].to_csv(out_path_file, sep='\t', index=False) - assert filecmp.cmp(out_path_file, EXPECT_DIR + 'expected-empty-node-ensemble.csv', shallow=False) + assert filecmp.cmp(out_path_file, EXPECT_DIR / 'expected-empty-node-ensemble.csv', shallow=False) def test_multiple_node_ensemble(self): - out_path_file = Path(OUT_DIR + 'node-ensemble.csv') + out_path_file = OUT_DIR / 'node-ensemble.csv' out_path_file.unlink(missing_ok=True) - out_path_empty_file = Path(OUT_DIR + 'empty-node-ensemble.csv') + out_path_empty_file = OUT_DIR / 'empty-node-ensemble.csv' out_path_empty_file.unlink(missing_ok=True) - ensemble_networks = [INPUT_DIR + 'ensemble-network.tsv', INPUT_DIR + 'empty-ensemble-network.tsv'] - input_network = OUT_DIR + 'data.pickle' - node_ensemble_dict = Evaluation.edge_frequency_node_ensemble(GS_NODE_TABLE, ensemble_networks, input_network) + ensemble_networks = [INPUT_DIR / 'ensemble-network.tsv', INPUT_DIR / 'empty-ensemble-network.tsv'] + input_network = OUT_DIR / 'data.pickle' + node_ensemble_dict = Evaluation.edge_frequency_node_ensemble(GS_NODE_TABLE, ensemble_networks, str(input_network)) node_ensemble_dict['ensemble'].to_csv(out_path_file, sep='\t', index=False) - assert filecmp.cmp(out_path_file, EXPECT_DIR + 'expected-node-ensemble.csv', shallow=False) + assert filecmp.cmp(out_path_file, EXPECT_DIR / 'expected-node-ensemble.csv', shallow=False) node_ensemble_dict['empty'].to_csv(out_path_empty_file, sep='\t', index=False) - assert filecmp.cmp(out_path_empty_file, EXPECT_DIR + 'expected-empty-node-ensemble.csv', shallow=False) + assert filecmp.cmp(out_path_empty_file, EXPECT_DIR / 'expected-empty-node-ensemble.csv', shallow=False) def test_precision_recall_curve_ensemble_nodes(self): - out_path_png = Path(OUT_DIR + 'pr-curve-ensemble-nodes.png') + out_path_png = OUT_DIR / 'pr-curve-ensemble-nodes.png' out_path_png.unlink(missing_ok=True) - out_path_file = Path(OUT_DIR + 'pr-curve-ensemble-nodes.txt') + out_path_file = OUT_DIR / 'pr-curve-ensemble-nodes.txt' out_path_file.unlink(missing_ok=True) - ensemble_file = pd.read_csv(INPUT_DIR + 'node-ensemble.csv', sep='\t', header=0) + ensemble_file = pd.read_csv(INPUT_DIR / 'node-ensemble.csv', sep='\t', header=0) node_ensembles_dict = {'ensemble': ensemble_file} Evaluation.precision_recall_curve_node_ensemble(node_ensembles_dict, GS_NODE_TABLE, out_path_png, out_path_file) assert out_path_png.exists() - assert filecmp.cmp(out_path_file, EXPECT_DIR + 'expected-pr-curve-ensemble-nodes.txt', shallow=False) + assert filecmp.cmp(out_path_file, EXPECT_DIR / 'expected-pr-curve-ensemble-nodes.txt', shallow=False) def test_precision_recall_curve_ensemble_nodes_empty(self): - out_path_png = Path(OUT_DIR + 'pr-curve-ensemble-nodes-empty.png') + out_path_png = OUT_DIR / 'pr-curve-ensemble-nodes-empty.png' out_path_png.unlink(missing_ok=True) - out_path_file = Path(OUT_DIR + 'pr-curve-ensemble-nodes-empty.txt') + out_path_file = OUT_DIR / 'pr-curve-ensemble-nodes-empty.txt' out_path_file.unlink(missing_ok=True) - empty_ensemble_file = pd.read_csv(INPUT_DIR + 'node-ensemble-empty.csv', sep='\t', header=0) + empty_ensemble_file = pd.read_csv(INPUT_DIR / 'node-ensemble-empty.csv', sep='\t', header=0) node_ensembles_dict = {'ensemble': empty_ensemble_file} Evaluation.precision_recall_curve_node_ensemble(node_ensembles_dict, GS_NODE_TABLE, out_path_png, out_path_file) assert out_path_png.exists() - assert filecmp.cmp(out_path_file, EXPECT_DIR + 'expected-pr-curve-ensemble-nodes-empty.txt', shallow=False) + assert filecmp.cmp(out_path_file, EXPECT_DIR / 'expected-pr-curve-ensemble-nodes-empty.txt', shallow=False) def test_precision_recall_curve_multiple_ensemble_nodes(self): - out_path_png = Path(OUT_DIR + 'pr-curve-multiple-ensemble-nodes.png') + out_path_png = OUT_DIR / 'pr-curve-multiple-ensemble-nodes.png' out_path_png.unlink(missing_ok=True) - out_path_file = Path(OUT_DIR + 'pr-curve-multiple-ensemble-nodes.txt') + out_path_file = OUT_DIR / 'pr-curve-multiple-ensemble-nodes.txt' out_path_file.unlink(missing_ok=True) - ensemble_file = pd.read_csv(INPUT_DIR + 'node-ensemble.csv', sep='\t', header=0) - empty_ensemble_file = pd.read_csv(INPUT_DIR + 'node-ensemble-empty.csv', sep='\t', header=0) + ensemble_file = pd.read_csv(INPUT_DIR / 'node-ensemble.csv', sep='\t', header=0) + empty_ensemble_file = pd.read_csv(INPUT_DIR / 'node-ensemble-empty.csv', sep='\t', header=0) node_ensembles_dict = {'ensemble1': ensemble_file, 'ensemble2': ensemble_file, 'ensemble3': empty_ensemble_file} Evaluation.precision_recall_curve_node_ensemble(node_ensembles_dict, GS_NODE_TABLE, out_path_png, out_path_file, True) assert out_path_png.exists() - assert filecmp.cmp(out_path_file, EXPECT_DIR + 'expected-pr-curve-multiple-ensemble-nodes.txt', shallow=False) + assert filecmp.cmp(out_path_file, EXPECT_DIR / 'expected-pr-curve-multiple-ensemble-nodes.txt', shallow=False)