changed > to < for peak_distance

willros · willros · commit 35528d08fd7e · 2024-04-11T16:28:14.000+02:00
diff --git a/README.md b/README.md
@@ -58,10 +58,10 @@ To generate peak area reports and a peak table for all input files, use the `fra
 - `name`: Name of the assay
 - `start`: Start of the assay in basepairs
 - `stop`: Stop of the assay in basepairs
-- `amount`: Can be left empty. Amount of peaks in assay. If left empty every peak in the interval is included. 
-- `min_ratio`: Can be left empty. only peaks with the a ratio of the `min_ratio` of the highest peak is included, *e.g.* if `min_ratio == .02`, only peaks with a height of 20 is included, if the highest peak is 100 units
+- `amount`: Optional. Amount of peaks in assay. If left empty every peak in the interval is included. 
+- `min_ratio`: Optional. Only peaks with the a ratio of the `min_ratio` of the highest peak is included, *e.g.* if `min_ratio == .02`, only peaks with a height of 20 is included, if the highest peak is 100 units
 - `which`: `LARGEST | FIRST`. Can be left empty. Which peak should be included if there are more peaks than the `amount`. if `FIRST` is set, then the two first peaks are chosen. If `LARGEST` are set, then the two largests peaks in the area are chosen. Defaults to `LARGEST`
-- `peak_distance`: Can be left empty. Distance between peaks must be above this value.
+- `peak_distance`: Optional. Distance between peaks must be under this value.
 
 
 #### Positional Arguments
diff --git a/fraggler/utils/peak_finder.py b/fraggler/utils/peak_finder.py
@@ -214,12 +214,12 @@ def find_peaks_customized(
                         .loc[lambda x: x.rank_peak <= assay.amount]
                         .drop(columns=["rank_peak"])
                     )
-                    if assay.peak_distance != "":
+                    if assay.peak_distance != 0:
                         df = (
                             df
                             .assign(distance=lambda x: x.basepairs.diff())
-                            .assign(distance=lambda x: x.distance.fillna(999))
-                            .loc[lambda x: x.distance >= assay.peak_distance]
+                            .assign(distance=lambda x: x.distance.fillna(0))
+                            .loc[lambda x: x.distance <= assay.peak_distance]
                             .drop(columns=["distance"])
                         )
                     
@@ -232,12 +232,12 @@ def find_peaks_customized(
                         .sort_values("basepairs", ascending=True)
                         .head(assay.amount)
                     )
-                    if assay.peak_distance != "":
+                    if assay.peak_distance != 0:
                         df = (
                             df
                             .assign(distance=lambda x: x.basepairs.diff())
-                            .assign(distance=lambda x: x.distance.fillna(999))
-                            .loc[lambda x: x.distance >= assay.peak_distance]
+                            .assign(distance=lambda x: x.distance.fillna(0))
+                            .loc[lambda x: x.distance <= assay.peak_distance]
                             .drop(columns=["distance"])
                         )
                 else:
diff --git a/tests/pytest/test_peak_finder.py b/tests/pytest/test_peak_finder.py
@@ -0,0 +1,70 @@
+import pandas as pd
+import numpy as np
+from fraggler.ladder_fitting.fit_ladder_model import FitLadderModel
+from fraggler.ladder_fitting.peak_ladder_assigner import PeakLadderAssigner
+from fraggler.utils.fsa_file import FsaFile
+
+from fraggler.utils.peak_finder import is_overlapping, has_columns, PeakFinder
+
+
+def test_is_overlapping_no_overlap():
+    df = pd.DataFrame({"start": [1, 4, 8], "stop": [3, 6, 10]})
+    assert not is_overlapping(df)
+
+
+def test_is_overlapping_do_overlap():
+    df = pd.DataFrame({"start": [1, 4, 5], "stop": [3, 6, 10]})
+    assert is_overlapping(df)
+
+
+def test_has_columns_correct():
+    df = pd.DataFrame(
+        {
+            "name": [],
+            "start": [],
+            "stop": [],
+            "amount": [],
+            "min_ratio": [],
+            "which": [],
+            "peak_distance": []
+        }
+    )
+    assert has_columns(df) == True
+
+
+def test_has_columns_missing_one():
+    df = pd.DataFrame(
+        {"name": [], "start": [], "stop": [], "amount": [], "min_ratio": []}
+    )
+    assert has_columns(df) == False
+
+
+def test_has_columns_extra_one():
+    df = pd.DataFrame(
+        {
+            "name": [],
+            "start": [],
+            "stop": [],
+            "amount": [],
+            "min_ratio": [],
+            "which": [],
+            "extra": [],
+        }
+    )
+    assert has_columns(df) == False
+
+
+##### Peak finder testing
+fsa_multiplex = FsaFile(
+    file="../../demo/multiplex.fsa", 
+    ladder="LIZ",
+)
+
+ladder_assigner_multiplex = PeakLadderAssigner(fsa_multiplex)
+model_multiplex = FitLadderModel(ladder_assigner_multiplex)
+pf_multiplex = PeakFinder(model_multiplex)
+
+
+def test_peak_finder():
+    global pf_multiplex
+    pass