diff --git a/IBDReduce/ibdreduce_v3.py b/IBDReduce/ibdreduce_v3.py index b637bb8..9c4af6e 100755 --- a/IBDReduce/ibdreduce_v3.py +++ b/IBDReduce/ibdreduce_v3.py @@ -83,6 +83,30 @@ def parse_line(line: str) -> Tuple[int, int, np.ndarray, np.ndarray]: return chrom, pos, obs_parts, vals +def write_permutation_values( + data: np.ndarray, idx: int, offset: int, vals: np.ndarray, nperm: int +) -> None: + """Insert observed and permutation statistics into the shared matrix. + + Args: + data: The matrix tracking observed and permutation statistics for all + breakpoints. + idx: Row index of the breakpoint being updated. + offset: Zero-based permutation run offset. + vals: Array containing the observed value followed by permutation + statistics for the current run. + nperm: Number of permutations produced per run. + """ + + if offset == 0: + data[idx, 0] = vals[0] + data[idx, 1 : (nperm + 1)] = vals[1:] + else: + start = 1 + offset * nperm + end = 1 + (offset + 1) * nperm + data[idx, start:end] = vals[1:] + + def compute_fdr_adjusted_pvalues(empp: np.ndarray, permutation_pvalues: np.ndarray) -> np.ndarray: """Calculate FDR-adjusted p-values from permutation p-values. @@ -267,11 +291,7 @@ def main(): ) ibdfrac[idx] = (float(dis) - float(predis)) / total predis = dis - data[idx, offset:(args.nperm + 1)] = vals - else: - data[ - idx, (offset * args.nperm):((offset + 1) * args.nperm) - ] = vals[1:] + write_permutation_values(data, idx, offset, vals, args.nperm) except ValueError as e: print(f"Error at {i} {j} {idx} {offset} {fpath}", file=sys.stderr) raise e diff --git a/tests/data/sample_chr1_run1.txt b/tests/data/sample_chr1_run1.txt new file mode 100644 index 0000000..43a246e --- /dev/null +++ b/tests/data/sample_chr1_run1.txt @@ -0,0 +1,3 @@ +1 1000 0.1 0.2 0.3 0.9 0.8 0.7 +1 2000 0.2 0.1 0.3 0.6 0.5 0.4 +1 3000 0.1 0.1 0.1 0.3 0.2 0.1 diff --git a/tests/test_permutation_columns.py b/tests/test_permutation_columns.py new file mode 100644 index 0000000..db19c59 --- /dev/null +++ b/tests/test_permutation_columns.py @@ -0,0 +1,42 @@ +import pathlib +import sys + +import numpy as np + + +PROJECT_ROOT = pathlib.Path(__file__).resolve().parents[1] +sys.path.append(str(PROJECT_ROOT / "IBDReduce")) + +from ibdreduce_v3 import parse_line, write_permutation_values + + +def test_permutation_columns_do_not_overlap(): + data_dir = PROJECT_ROOT / "tests" / "data" + run_paths = [data_dir / f"sample_chr1_run{j}.txt" for j in range(2)] + + first_line = run_paths[0].read_text(encoding="utf-8").splitlines()[0] + _, _, _, first_vals = parse_line(first_line) + nperm = len(first_vals) - 1 + + with run_paths[0].open("r", encoding="utf-8") as handle: + breakpoints = sum(1 for _ in handle) + + data = np.zeros((breakpoints, nperm * len(run_paths) + 1), dtype=float) + + for offset, path in enumerate(run_paths): + idx = 0 + with path.open("r", encoding="utf-8") as handle: + for line in handle: + _, _, _, vals = parse_line(line) + write_permutation_values(data, idx, offset, vals, nperm) + idx += 1 + + for offset, path in enumerate(run_paths): + start = 1 + offset * nperm + end = 1 + (offset + 1) * nperm + idx = 0 + with path.open("r", encoding="utf-8") as handle: + for line in handle: + _, _, _, vals = parse_line(line) + np.testing.assert_allclose(data[idx, start:end], vals[1:]) + idx += 1