diff --git a/pyproject.toml b/pyproject.toml
index bfc602c6d..82303d69d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -68,7 +68,8 @@ select = [
     "F823", # undefined-local
     "F841", # unused-variable
     "I", # isort
-    "W292", # missing-newline-at-end-of-file
+    "W292", # missing-newline-at-end-of-file,
+    "PD002", # pandas-use-of-inplace-argument
 ]
 
 [tool.setuptools.packages.find]
diff --git a/spras/evaluation.py b/spras/evaluation.py
index 507ffb10a..0145341c8 100644
--- a/spras/evaluation.py
+++ b/spras/evaluation.py
@@ -231,7 +231,7 @@ def visualize_precision_and_recall_plot(pr_df: pd.DataFrame, output_file: str |
         plt.close()
 
         # save dataframe
-        pr_df.drop(columns=['Algorithm'], inplace=True)
+        pr_df = pr_df.drop(columns=['Algorithm'])
         pr_df.to_csv(output_file, sep='\t', index=False)
 
     @staticmethod
@@ -248,7 +248,7 @@ def precision_and_recall_per_pathway(pr_df: pd.DataFrame, output_file: str | Pat
         """
         if not pr_df.empty:
             pr_df['Algorithm'] = pr_df['Pathway'].apply(lambda p: Path(p).parent.name.split('-')[1])
-            pr_df.sort_values(by=['Recall', 'Pathway'], axis=0, ascending=True, inplace=True)
+            pr_df = pr_df.sort_values(by=['Recall', 'Pathway'], axis=0, ascending=True)
 
             if aggregate_per_algorithm:
                 # Guaranteed to only have one algorithm in Algorithm column
@@ -281,7 +281,7 @@ def precision_and_recall_pca_chosen_pathway(pr_df: pd.DataFrame, output_file: st
 
         if not pr_df.empty:
             pr_df['Algorithm'] = pr_df['Pathway'].apply(lambda p: Path(p).parent.name.split('-')[1])
-            pr_df.sort_values(by=['Recall', 'Pathway'], axis=0, ascending=True, inplace=True)
+            pr_df = pr_df.sort_values(by=['Recall', 'Pathway'], axis=0, ascending=True)
 
             if aggregate_per_algorithm:
                 title = "PCA-Chosen Pathway Per Algorithm Precision and Recall Plot"
@@ -305,7 +305,7 @@ def precision_and_recall_pca_chosen_pathway(pr_df: pd.DataFrame, output_file: st
                 plt.close()
 
     @staticmethod
-    def pca_chosen_pathway(coordinates_files: Iterable[Union[str, PathLike]], pathway_summary_file: str, output_dir: str):
+    def pca_chosen_pathway(coordinates_files: Iterable[Union[str, PathLike]], pathway_summary_file: str | PathLike, output_dir: str | PathLike) -> list[str]:
         """
         Identifies the pathway closest to a specified highest kernel density estimated (KDE) peak based on PCA
         coordinates
@@ -323,7 +323,7 @@ def pca_chosen_pathway(coordinates_files: Iterable[Union[str, PathLike]], pathwa
         """
          # TODO update to add in the pathways for the algorithms that do not provide a pca chosen pathway https://github.com/Reed-CompBio/spras/issues/341
 
-        rep_pathways = []
+        rep_pathways: list[str] = []
 
         for coordinates_file in coordinates_files:
             coord_df = pd.read_csv(coordinates_file, delimiter='\t', header=0)
@@ -356,7 +356,7 @@ def pca_chosen_pathway(coordinates_files: Iterable[Union[str, PathLike]], pathwa
         return rep_pathways
 
     @staticmethod
-    def edge_frequency_node_ensemble(node_table: pd.DataFrame, ensemble_files: Iterable[Union[str, PathLike]], dataset_file: str) -> dict:
+    def edge_frequency_node_ensemble(node_table: pd.DataFrame, ensemble_files: Iterable[Union[str, PathLike]], dataset_file: str | PathLike) -> dict:
         """
         Generates a dictionary of node ensembles using edge frequency data from a list of ensemble files.
         A list of ensemble files can contain an aggregated ensemble or algorithm-specific ensembles per dataset
@@ -387,11 +387,11 @@ def edge_frequency_node_ensemble(node_table: pd.DataFrame, ensemble_files: Itera
 
         if interactome.empty:
             raise ValueError(
-                f"Cannot compute PR curve or generate node ensemble. Input network for dataset \"{dataset_file.split('-')[0]}\" is empty."
+                f"Cannot compute PR curve or generate node ensemble. Input network for dataset \"{Path(dataset_file).name.split('-')[0]}\" is empty."
             )
         if node_table.empty:
             raise ValueError(
-                f"Cannot compute PR curve or generate node ensemble. Gold standard associated with dataset \"{dataset_file.split('-')[0]}\" is empty."
+                f"Cannot compute PR curve or generate node ensemble. Gold standard associated with dataset \"{Path(dataset_file).name.split('-')[0]}\" is empty."
             )
 
         # set the initial default frequencies to 0 for all interactome and gold standard nodes
diff --git a/spras/meo.py b/spras/meo.py
index 6fe06e058..e6c40a326 100644
--- a/spras/meo.py
+++ b/spras/meo.py
@@ -215,7 +215,7 @@ def parse_output(raw_pathway_file, standardized_pathway_file, params):
             # Would need to load the paths output file to rank edges correctly
             df = add_rank_column(df)
             df = reinsert_direction_col_directed(df)
-            df.drop(columns=['Type', 'Oriented', 'Weight'], inplace=True)
+            df = df.drop(columns=['Type', 'Oriented', 'Weight'])
             df.columns = ['Node1', 'Node2', 'Rank', "Direction"]
             df, has_duplicates = duplicate_edges(df)
             if has_duplicates:
diff --git a/spras/omicsintegrator1.py b/spras/omicsintegrator1.py
index 1bfa277e9..4cd7e9cf8 100644
--- a/spras/omicsintegrator1.py
+++ b/spras/omicsintegrator1.py
@@ -235,7 +235,7 @@ def parse_output(raw_pathway_file, standardized_pathway_file, params):
             df.columns = ["Edge1", "InteractionType", "Edge2"]
             df = add_rank_column(df)
             df = reinsert_direction_col_mixed(df, "InteractionType", "pd", "pp")
-            df.drop(columns=['InteractionType'], inplace=True)
+            df = df.drop(columns=['InteractionType'])
             df.columns = ['Node1', 'Node2', 'Rank', 'Direction']
             df, has_duplicates = duplicate_edges(df)
             if has_duplicates:
diff --git a/test/evaluate/expected/expected-pca-coordinates.txt b/test/evaluate/expected/expected-pca-coordinates.txt
new file mode 100644
index 000000000..786861cc1
--- /dev/null
+++ b/test/evaluate/expected/expected-pca-coordinates.txt
@@ -0,0 +1,6 @@
+datapoint_labels	PC1	PC2
+data-test-params-123	0.52704628	0.70710678
+data-test-params-456	-1.05409255	-0.0
+data-test-params-789	0.52704628	-0.70710678
+centroid	0.0	-0.0
+kde_peak	0.11419336	-0.00785674
diff --git a/test/evaluate/test_evaluate.py b/test/evaluate/test_evaluate.py
index ce50350e5..aa6fcb020 100644
--- a/test/evaluate/test_evaluate.py
+++ b/test/evaluate/test_evaluate.py
@@ -9,11 +9,11 @@
 from spras.dataset import Dataset
 from spras.evaluation import Evaluation
 
-INPUT_DIR = 'test/evaluate/input/'
-OUT_DIR = 'test/evaluate/output/'
-EXPECT_DIR = 'test/evaluate/expected/'
-GS_NODE_TABLE = pd.read_csv(INPUT_DIR + 'gs_node_table.csv', header=0)
-SUMMARY_FILE = INPUT_DIR + 'example_summary.txt'
+INPUT_DIR = Path('test', 'evaluate', 'input')
+OUT_DIR = Path('test', 'evaluate', 'output')
+EXPECT_DIR = Path('test', 'evaluate', 'expected')
+GS_NODE_TABLE = pd.read_csv(INPUT_DIR / 'gs_node_table.csv', header=0)
+SUMMARY_FILE = INPUT_DIR / 'example_summary.txt'
 
 
 class TestEvaluate:
@@ -39,9 +39,9 @@ def setup_class(cls):
             pickle.dump(dataset, f)
 
     def test_node_precision_recall_per_pathway(self):
-        file_paths = [INPUT_DIR + 'data-test-params-123/pathway.txt', INPUT_DIR + 'data-test-params-456/pathway.txt',  INPUT_DIR + 'data-test-params-789/pathway.txt',  INPUT_DIR + 'data-test-params-empty/pathway.txt']
-        output_file = Path(OUT_DIR + 'pr-per-pathway.txt')
-        output_png = Path(OUT_DIR + 'pr-per-pathway.png')
+        file_paths = [INPUT_DIR / 'data-test-params-123/pathway.txt', INPUT_DIR / 'data-test-params-456/pathway.txt', INPUT_DIR / 'data-test-params-789/pathway.txt',  INPUT_DIR / 'data-test-params-empty/pathway.txt']
+        output_file = Path(OUT_DIR, 'pr-per-pathway.txt')
+        output_png = Path(OUT_DIR, 'pr-per-pathway.png')
         output_file.unlink(missing_ok=True)
         output_png.unlink(missing_ok=True)
 
@@ -49,16 +49,16 @@ def test_node_precision_recall_per_pathway(self):
         Evaluation.precision_and_recall_per_pathway(pr_df, output_file, output_png, True)
 
         output = pd.read_csv(output_file, sep='\t', header=0).round(8)
-        expected = pd.read_csv(EXPECT_DIR + 'expected-pr-per-pathway.txt', sep='\t',  header=0).round(8)
+        expected = pd.read_csv(EXPECT_DIR / 'expected-pr-per-pathway.txt', sep='\t',  header=0).round(8)
 
         assert output.equals(expected)
         assert output_png.exists()
 
     def test_node_precision_recall_per_pathway_empty(self):
 
-        file_paths = [INPUT_DIR + 'data-test-params-empty/pathway.txt']
-        output_file = Path(OUT_DIR + 'pr-per-pathway-empty.txt')
-        output_png = Path(OUT_DIR + 'pr-per-pathway-empty.png')
+        file_paths = [INPUT_DIR / 'data-test-params-empty/pathway.txt']
+        output_file = Path(OUT_DIR, 'pr-per-pathway-empty.txt')
+        output_png = Path(OUT_DIR, 'pr-per-pathway-empty.png')
         output_file.unlink(missing_ok=True)
         output_png.unlink(missing_ok=True)
 
@@ -66,14 +66,14 @@ def test_node_precision_recall_per_pathway_empty(self):
         Evaluation.precision_and_recall_per_pathway(pr_df, output_file, output_png, True)
 
         output = pd.read_csv(output_file, sep='\t', header=0).round(8)
-        expected = pd.read_csv(EXPECT_DIR + 'expected-pr-per-pathway-empty.txt', sep='\t',  header=0).round(8)
+        expected = pd.read_csv(EXPECT_DIR / 'expected-pr-per-pathway-empty.txt', sep='\t',  header=0).round(8)
 
         assert output.equals(expected)
         assert output_png.exists()
 
     def test_node_precision_recall_per_pathway_not_provided(self):
-        output_file = OUT_DIR + 'pr-per-pathway-not-provided.txt'
-        output_png = OUT_DIR + 'pr-per-pathway-not-provided.png'
+        output_file = OUT_DIR / 'pr-per-pathway-not-provided.txt'
+        output_png = OUT_DIR / 'pr-per-pathway-not-provided.png'
         file_paths = []
 
         pr_df = Evaluation.node_precision_and_recall(file_paths, GS_NODE_TABLE)
@@ -81,9 +81,9 @@ def test_node_precision_recall_per_pathway_not_provided(self):
             Evaluation.precision_and_recall_per_pathway(pr_df, output_file, output_png)
 
     def test_node_precision_recall_pca_chosen_pathway_not_provided(self):
-        output_file = Path( OUT_DIR + 'pr-per-pathway-pca-chosen-not-provided.txt')
+        output_file = Path(OUT_DIR, 'pr-per-pathway-pca-chosen-not-provided.txt')
         output_file.unlink(missing_ok=True)
-        output_png = Path(OUT_DIR + 'pr-per-pathway-pca-chosen-not-provided.png')
+        output_png = Path(OUT_DIR, 'pr-per-pathway-pca-chosen-not-provided.png')
         output_png.unlink(missing_ok=True)
         file_paths = []
 
@@ -91,102 +91,106 @@ def test_node_precision_recall_pca_chosen_pathway_not_provided(self):
         Evaluation.precision_and_recall_pca_chosen_pathway(pr_df, output_file, output_png)
 
         output = pd.read_csv(output_file, sep='\t', header=0).round(8)
-        expected = pd.read_csv(EXPECT_DIR + 'expected-pr-pca-chosen-not-provided.txt', sep='\t',  header=0).round(8)
+        expected = pd.read_csv(EXPECT_DIR / 'expected-pr-pca-chosen-not-provided.txt', sep='\t',  header=0).round(8)
 
         assert output.equals(expected)
         assert output_png.exists()
 
     def test_node_precision_recall_pca_chosen_pathway(self):
-        output_file = Path(OUT_DIR + 'pr-per-pathway-pca-chosen.txt')
+        output_file = Path(OUT_DIR / 'pr-per-pathway-pca-chosen.txt')
         output_file.unlink(missing_ok=True)
-        output_png = Path(OUT_DIR + 'pr-per-pathway-pca-chosen.png')
+        output_png = Path(OUT_DIR / 'pr-per-pathway-pca-chosen.png')
         output_png.unlink(missing_ok=True)
-        output_coordinates = Path(OUT_DIR + 'pca-coordinates.tsv')
+        output_coordinates = Path(OUT_DIR / 'pca-coordinates.tsv')
         output_coordinates.unlink(missing_ok=True)
 
-        file_paths = [INPUT_DIR + 'data-test-params-123/pathway.txt', INPUT_DIR + 'data-test-params-456/pathway.txt',
-                      INPUT_DIR + 'data-test-params-789/pathway.txt',  INPUT_DIR + 'data-test-params-empty/pathway.txt']
+        file_paths = [INPUT_DIR / 'data-test-params-123' / 'pathway.txt', INPUT_DIR / 'data-test-params-456' / 'pathway.txt',
+                      INPUT_DIR / 'data-test-params-789' / 'pathway.txt', INPUT_DIR / 'data-test-params-empty' / 'pathway.txt']
 
         dataframe = ml.summarize_networks(file_paths)
-        ml.pca(dataframe, OUT_DIR + 'pca.png', OUT_DIR + 'pca-variance.txt', str(output_coordinates), kde=True, remove_empty_pathways=True)
+        ml.pca(dataframe, OUT_DIR / 'pca.png', OUT_DIR / 'pca-variance.txt', output_coordinates, kde=True, remove_empty_pathways=True)
 
-        pathway = Evaluation.pca_chosen_pathway([output_coordinates], SUMMARY_FILE, INPUT_DIR)
+        pathways = Evaluation.pca_chosen_pathway([output_coordinates], SUMMARY_FILE, INPUT_DIR)
+        assert len(pathways) == 1, f"There must only be one pathway, but got {len(pathways)} instead! ({pathways})"
+        pd.testing.assert_frame_equal(
+            pd.read_csv(output_coordinates, sep='\t', header=0),
+            pd.read_csv(EXPECT_DIR / 'expected-pca-coordinates.txt', sep='\t', header=0)
+        )
 
-        pr_df = Evaluation.node_precision_and_recall(pathway, GS_NODE_TABLE)
+        pr_df = Evaluation.node_precision_and_recall(pathways, GS_NODE_TABLE)
         Evaluation.precision_and_recall_pca_chosen_pathway(pr_df, output_file, output_png, True)
 
-
         chosen = pd.read_csv(output_file, sep='\t', header=0).round(8)
-        expected = pd.read_csv(EXPECT_DIR + 'expected-pr-per-pathway-pca-chosen.txt', sep='\t',  header=0).round(8)
+        expected = pd.read_csv(EXPECT_DIR / 'expected-pr-per-pathway-pca-chosen.txt', sep='\t', header=0).round(8)
 
-        assert chosen.equals(expected)
+        pd.testing.assert_frame_equal(chosen, expected)
         assert output_png.exists()
 
     def test_node_ensemble(self):
-        out_path_file = Path(OUT_DIR + 'node-ensemble.csv')
+        out_path_file = Path(OUT_DIR, 'node-ensemble.csv')
         out_path_file.unlink(missing_ok=True)
-        ensemble_network = [INPUT_DIR + 'ensemble-network.tsv']
-        input_network = OUT_DIR + 'data.pickle'
+        ensemble_network = [INPUT_DIR / 'ensemble-network.tsv']
+        input_network = OUT_DIR / 'data.pickle'
         node_ensemble_dict = Evaluation.edge_frequency_node_ensemble(GS_NODE_TABLE, ensemble_network, input_network)
         node_ensemble_dict['ensemble'].to_csv(out_path_file, sep='\t', index=False)
-        assert filecmp.cmp(out_path_file, EXPECT_DIR + 'expected-node-ensemble.csv', shallow=False)
+        assert filecmp.cmp(out_path_file, EXPECT_DIR / 'expected-node-ensemble.csv', shallow=False)
 
     def test_empty_node_ensemble(self):
-        out_path_file = Path(OUT_DIR + 'empty-node-ensemble.csv')
+        out_path_file = Path(OUT_DIR, 'empty-node-ensemble.csv')
         out_path_file.unlink(missing_ok=True)
-        empty_ensemble_network = [INPUT_DIR + 'empty-ensemble-network.tsv']
-        input_network = OUT_DIR + 'data.pickle'
+        empty_ensemble_network = [INPUT_DIR / 'empty-ensemble-network.tsv']
+        input_network = OUT_DIR / 'data.pickle'
         node_ensemble_dict = Evaluation.edge_frequency_node_ensemble(GS_NODE_TABLE, empty_ensemble_network,
                                                                      input_network)
         node_ensemble_dict['empty'].to_csv(out_path_file, sep='\t', index=False)
-        assert filecmp.cmp(out_path_file, EXPECT_DIR + 'expected-empty-node-ensemble.csv', shallow=False)
+        assert filecmp.cmp(out_path_file, EXPECT_DIR / 'expected-empty-node-ensemble.csv', shallow=False)
 
     def test_multiple_node_ensemble(self):
-        out_path_file = Path(OUT_DIR + 'node-ensemble.csv')
+        out_path_file = Path(OUT_DIR / 'node-ensemble.csv')
         out_path_file.unlink(missing_ok=True)
-        out_path_empty_file = Path(OUT_DIR + 'empty-node-ensemble.csv')
+        out_path_empty_file = Path(OUT_DIR / 'empty-node-ensemble.csv')
         out_path_empty_file.unlink(missing_ok=True)
-        ensemble_networks = [INPUT_DIR + 'ensemble-network.tsv', INPUT_DIR + 'empty-ensemble-network.tsv']
-        input_network = OUT_DIR + 'data.pickle'
+        ensemble_networks = [INPUT_DIR / 'ensemble-network.tsv', INPUT_DIR / 'empty-ensemble-network.tsv']
+        input_network = OUT_DIR / 'data.pickle'
         node_ensemble_dict = Evaluation.edge_frequency_node_ensemble(GS_NODE_TABLE, ensemble_networks, input_network)
         node_ensemble_dict['ensemble'].to_csv(out_path_file, sep='\t', index=False)
-        assert filecmp.cmp(out_path_file, EXPECT_DIR + 'expected-node-ensemble.csv', shallow=False)
+        assert filecmp.cmp(out_path_file, EXPECT_DIR / 'expected-node-ensemble.csv', shallow=False)
         node_ensemble_dict['empty'].to_csv(out_path_empty_file, sep='\t', index=False)
-        assert filecmp.cmp(out_path_empty_file, EXPECT_DIR + 'expected-empty-node-ensemble.csv', shallow=False)
+        assert filecmp.cmp(out_path_empty_file, EXPECT_DIR / 'expected-empty-node-ensemble.csv', shallow=False)
 
     def test_precision_recall_curve_ensemble_nodes(self):
-        out_path_png = Path(OUT_DIR + 'pr-curve-ensemble-nodes.png')
+        out_path_png = Path(OUT_DIR, 'pr-curve-ensemble-nodes.png')
         out_path_png.unlink(missing_ok=True)
-        out_path_file = Path(OUT_DIR + 'pr-curve-ensemble-nodes.txt')
+        out_path_file = Path(OUT_DIR, 'pr-curve-ensemble-nodes.txt')
         out_path_file.unlink(missing_ok=True)
-        ensemble_file = pd.read_csv(INPUT_DIR + 'node-ensemble.csv', sep='\t', header=0)
+        ensemble_file = pd.read_csv(INPUT_DIR / 'node-ensemble.csv', sep='\t', header=0)
         node_ensembles_dict = {'ensemble': ensemble_file}
         Evaluation.precision_recall_curve_node_ensemble(node_ensembles_dict, GS_NODE_TABLE, out_path_png,
                                                         out_path_file)
         assert out_path_png.exists()
-        assert filecmp.cmp(out_path_file, EXPECT_DIR + 'expected-pr-curve-ensemble-nodes.txt', shallow=False)
+        assert filecmp.cmp(out_path_file, EXPECT_DIR / 'expected-pr-curve-ensemble-nodes.txt', shallow=False)
 
     def test_precision_recall_curve_ensemble_nodes_empty(self):
-        out_path_png = Path(OUT_DIR + 'pr-curve-ensemble-nodes-empty.png')
+        out_path_png = Path(OUT_DIR, 'pr-curve-ensemble-nodes-empty.png')
         out_path_png.unlink(missing_ok=True)
-        out_path_file = Path(OUT_DIR + 'pr-curve-ensemble-nodes-empty.txt')
+        out_path_file = Path(OUT_DIR, 'pr-curve-ensemble-nodes-empty.txt')
         out_path_file.unlink(missing_ok=True)
-        empty_ensemble_file = pd.read_csv(INPUT_DIR + 'node-ensemble-empty.csv', sep='\t', header=0)
+        empty_ensemble_file = pd.read_csv(INPUT_DIR / 'node-ensemble-empty.csv', sep='\t', header=0)
         node_ensembles_dict = {'ensemble': empty_ensemble_file}
         Evaluation.precision_recall_curve_node_ensemble(node_ensembles_dict, GS_NODE_TABLE, out_path_png,
                                                         out_path_file)
         assert out_path_png.exists()
-        assert filecmp.cmp(out_path_file, EXPECT_DIR + 'expected-pr-curve-ensemble-nodes-empty.txt', shallow=False)
+        assert filecmp.cmp(out_path_file, EXPECT_DIR / 'expected-pr-curve-ensemble-nodes-empty.txt', shallow=False)
 
     def test_precision_recall_curve_multiple_ensemble_nodes(self):
-        out_path_png = Path(OUT_DIR + 'pr-curve-multiple-ensemble-nodes.png')
+        out_path_png = Path(OUT_DIR, 'pr-curve-multiple-ensemble-nodes.png')
         out_path_png.unlink(missing_ok=True)
-        out_path_file = Path(OUT_DIR + 'pr-curve-multiple-ensemble-nodes.txt')
+        out_path_file = Path(OUT_DIR, 'pr-curve-multiple-ensemble-nodes.txt')
         out_path_file.unlink(missing_ok=True)
-        ensemble_file = pd.read_csv(INPUT_DIR + 'node-ensemble.csv', sep='\t', header=0)
-        empty_ensemble_file = pd.read_csv(INPUT_DIR + 'node-ensemble-empty.csv', sep='\t', header=0)
+        ensemble_file = pd.read_csv(INPUT_DIR / 'node-ensemble.csv', sep='\t', header=0)
+        empty_ensemble_file = pd.read_csv(INPUT_DIR / 'node-ensemble-empty.csv', sep='\t', header=0)
         node_ensembles_dict = {'ensemble1': ensemble_file, 'ensemble2': ensemble_file, 'ensemble3': empty_ensemble_file}
         Evaluation.precision_recall_curve_node_ensemble(node_ensembles_dict, GS_NODE_TABLE, out_path_png,
                                                         out_path_file, True)
         assert out_path_png.exists()
-        assert filecmp.cmp(out_path_file, EXPECT_DIR + 'expected-pr-curve-multiple-ensemble-nodes.txt', shallow=False)
+        assert filecmp.cmp(out_path_file, EXPECT_DIR / 'expected-pr-curve-multiple-ensemble-nodes.txt', shallow=False)
diff --git a/test/ml/expected/expected-pca-coordinates-kde-negated.tsv b/test/ml/expected/expected-pca-coordinates-kde-negated.tsv
deleted file mode 100644
index 3c13c8c4e..000000000
--- a/test/ml/expected/expected-pca-coordinates-kde-negated.tsv
+++ /dev/null
@@ -1,7 +0,0 @@
-datapoint_labels	PC1	PC2
-test-data-s1	-1.01220906	0.05003395
-test-data-s2	-0.84372464	-0.59953316
-test-data-s3	1.56185985	-0.48650911
-test-data-empty	0.29407385	1.03600832
-centroid	0.0	0.0
-kde_peak	0.65469949	0.06343901
diff --git a/test/ml/expected/expected-pca-coordinates-sorted-negated.tsv b/test/ml/expected/expected-pca-coordinates-sorted-negated.tsv
deleted file mode 100644
index 4ccadef05..000000000
--- a/test/ml/expected/expected-pca-coordinates-sorted-negated.tsv
+++ /dev/null
@@ -1,5 +0,0 @@
-datapoint_labels	PC1	PC2
-centroid	0.0	0.0
-test-data-s1	0.94594398	-0.46508182
-test-data-s2	0.72014153	0.5090913
-test-data-s3	-1.66608552	-0.04400948
diff --git a/test/ml/test_ml.py b/test/ml/test_ml.py
index cc6620164..6b74cd681 100644
--- a/test/ml/test_ml.py
+++ b/test/ml/test_ml.py
@@ -99,23 +99,17 @@ def test_pca_kernel_density(self):
                OUT_DIR / 'pca-coordinates-kde.tsv', kde=True)
         coord = pd.read_table(OUT_DIR / 'pca-coordinates-kde.tsv')
         expected = pd.read_table(EXPECT_DIR / 'expected-pca-coordinates-kde.tsv')
-        expected_negated = pd.read_table(EXPECT_DIR / 'expected-pca-coordinates-kde-negated.tsv')
         coord_kde_peak = coord.loc[coord['datapoint_labels'] == 'kde_peak'].round(5)
         expected_kde_peak = expected.loc[expected['datapoint_labels'] == 'kde_peak'].round(5)
-        expected_kde_peak_negated = expected_negated.loc[expected_negated['datapoint_labels'] == 'kde_peak'].round(5)
 
-        assert coord_kde_peak.equals(expected_kde_peak) or coord_kde_peak.equals(expected_kde_peak_negated)
+        pd.testing.assert_frame_equal(coord_kde_peak, expected_kde_peak)
 
     def test_pca_robustness(self):
         dataframe = ml.summarize_networks([INPUT_DIR / 'test-data-s1/s1.txt', INPUT_DIR / 'test-data-s2/s2.txt',
                                            INPUT_DIR / 'test-data-s3/s3.txt'])
-        # PCA signage now depends on the input data: we need two differently signed PCA coordinate files.
-        # See https://scikit-learn.org/stable/whats_new/v1.5.html#changed-models for more info.
         expected = pd.read_table(EXPECT_DIR / 'expected-pca-coordinates-sorted.tsv')
-        expected_other = pd.read_table(EXPECT_DIR / 'expected-pca-coordinates-sorted-negated.tsv')
         expected = expected.round(5)
-        expected_other = expected_other.round(5)
-        expected.sort_values(by='datapoint_labels', ignore_index=True, inplace=True)
+        expected = expected.sort_values(by='datapoint_labels', ignore_index=True)
 
         for _ in range(5):
             dataframe_shuffled = dataframe.sample(frac=1, axis=1)  # permute the columns
@@ -123,8 +117,8 @@ def test_pca_robustness(self):
                 OUT_DIR / 'pca-shuffled-columns-coordinates.tsv')
             coord = pd.read_table(OUT_DIR / 'pca-shuffled-columns-coordinates.tsv')
             coord = coord.round(5)  # round values to 5 digits to account for numeric differences across machines
-            coord.sort_values(by='datapoint_labels', ignore_index=True, inplace=True)
-            assert coord.equals(expected) or coord.equals(expected_other)
+            coord = coord.sort_values(by='datapoint_labels', ignore_index=True)
+            pd.testing.assert_frame_equal(coord, expected)
 
         for _ in range(5):
             dataframe_shuffled = dataframe.sample(frac=1, axis=0)  # permute the rows
@@ -132,9 +126,9 @@ def test_pca_robustness(self):
                     OUT_DIR / 'pca-shuffled-rows-coordinates.tsv')
             coord = pd.read_table(OUT_DIR / 'pca-shuffled-rows-coordinates.tsv')
             coord = coord.round(5)  # round values to 5 digits to account for numeric differences across machines
-            coord.sort_values(by='datapoint_labels', ignore_index=True, inplace=True)
+            coord = coord.sort_values(by='datapoint_labels', ignore_index=True)
 
-            assert coord.equals(expected) or coord.equals(expected_other)
+            pd.testing.assert_frame_equal(coord, expected)
 
     def test_hac_horizontal(self):
         dataframe = ml.summarize_networks([INPUT_DIR / 'test-data-s1/s1.txt', INPUT_DIR / 'test-data-s2/s2.txt', INPUT_DIR / 'test-data-s3/s3.txt'])