-
Notifications
You must be signed in to change notification settings - Fork 27
/
Copy pathcandidate_filters.py
154 lines (126 loc) · 5.85 KB
/
candidate_filters.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
# Copyright 2019
# Author: Fabio Gutmann <https://github.com/fabio-gut>
from typing import Tuple, List
from copomus.indexing import idx_to_array_index
def _filter_bp_type(bps: List[Tuple[int, int]], q: str, t: str, qidxpos0: int, tidxpos0: int, bp_type='') -> \
List[Tuple[int, int]]:
"""
Filters out base pairs of a specified type
:param bps: List of tuples in form (q_index, t_index) representing the base pairs
:param q: The query sequence
:param t: The target sequence
:param qidxpos0: Starting index for the query
:param tidxpos0: Starting index for the target
:param bp_type: The type of base pairs to filter out, for example GU, CG, AU, GC, ...
:return: List of tuples in form (q_index, t_index) representing the base pairs
>>> q, t = 'GCUACGAUC', 'UUUGCGAGCAGCUAGG'
>>> bps = [(1,1),(2,2),(6,13),(8,15),(9,16)]
>>> _filter_bp_type(bps, q, t, 1, 1, 'GU')
[(2, 2), (9, 16)]
>>> _filter_bp_type(bps, q, t, 1, 1, 'GC')
[(1, 1), (2, 2), (6, 13), (8, 15)]
"""
new_bps = []
for q_index, t_index in bps:
q_array_index = idx_to_array_index(q_index, qidxpos0)
t_array_index = idx_to_array_index(t_index, tidxpos0)
if f'{q[q_array_index]}{t[t_array_index]}' in [bp_type, bp_type[::-1]]:
continue
new_bps.append((q_index, t_index))
return new_bps
def _neighbors_in_mfe(q_index: int, t_index: int, bps: List[Tuple[int, int]]) -> int:
"""
Counts how many neighboring base pairs are in the MFE region
:param q_index: Base pair index on the query
:param t_index: Base pair index on the target
:param bps: List of tuples in form (q_index, t_index) representing the base pairs
:return: Count of neighboring base pairs that are in the MFE region
>>> _neighbors_in_mfe(3, 5, [(1, 7), (6, 9), (2, 6), (4, 4), (7, 4)])
2
>>> _neighbors_in_mfe(3, 5, [(1, 7), (6, 9), (4, 4), (7, 4)])
1
"""
count = 0
for q_o, t_o in [(+1, -1), (-1, +1)]: # get neighbors by offset
if (q_index+q_o, t_index+t_o) in bps:
count += 1
return count
def _neighbors_can_pair(q_index: int, t_index: int, q: str, t: str, qidxpos0: int, tidxpos0: int) -> int:
"""
Counts how many neighboring base pairs can pair
:param q_index: Base pair index on the query
:param t_index: Base pair index on the target
:param q: The query sequence
:param t: The target sequence
:param qidxpos0: Starting index for the query
:param tidxpos0: Starting index for the target
:return: Count of neighboring base pairs that can pair
>>> _neighbors_can_pair(1, 5, 'GCAUCGAUC', 'CGUACGAUCGAUCC', 1, 1)
0
>>> _neighbors_can_pair(3, 3, 'GCAUCGAUC', 'CGUACGAUCGAUCC', 1, 1)
1
>>> _neighbors_can_pair(6, 9, 'GCAUCGAUC', 'CGUACGAUCGAUCC', 1, 1)
2
"""
count = 0
q_array_index = idx_to_array_index(q_index, qidxpos0)
t_array_index = idx_to_array_index(t_index, tidxpos0)
pairable_bps = ['GC', 'CG', 'AU', 'UA', 'GU', 'UG']
for q_o, t_o in [(+1, -1), (-1, +1)]: # get neighbors by offset
if q_array_index+q_o in range(len(q)) and t_array_index+t_o in range(len(t)):
if f'{q[q_array_index+q_o]}{t[t_array_index+t_o]}' in pairable_bps:
count += 1
return count
def filter_gu(bps: List[Tuple[int, int]], q: str, t: str, qidxpos0: int, tidxpos0: int) -> List[Tuple[int, int]]:
"""Filters any GU or UG base pair"""
return _filter_bp_type(bps, q, t, qidxpos0, tidxpos0, bp_type='GU')
def filter_au(bps: List[Tuple[int, int]], q: str, t: str, qidxpos0: int, tidxpos0: int) -> List[Tuple[int, int]]:
"""Filters any AU or UA base pair"""
return _filter_bp_type(bps, q, t, qidxpos0, tidxpos0, bp_type='AU')
def filter_gc(bps: List[Tuple[int, int]], q: str, t: str, qidxpos0: int, tidxpos0: int) -> List[Tuple[int, int]]:
"""Filters ay GC or CG base pair"""
return _filter_bp_type(bps, q, t, qidxpos0, tidxpos0, bp_type='GC')
def filter_lp(bps: List[Tuple[int, int]], q: str, t: str, qidxpos0: int, tidxpos0: int) -> List[Tuple[int, int]]:
"""Filters lonely base pairs that can not stack (both neighbors not in MFE and both neighbors cant pair)"""
new_bps = []
for q_index, t_index in bps:
if not _neighbors_in_mfe(q_index, t_index, bps) and \
not _neighbors_can_pair(q_index, t_index, q, t, qidxpos0, tidxpos0):
continue
new_bps.append((q_index, t_index))
return new_bps
def filter_lp_mfe(bps: List[Tuple[int, int]], q: str, t: str, qidxpos0: int, tidxpos0: int) -> List[Tuple[int, int]]:
"""Filters lonely base pairs in MFE interaction (both neighbors not in MFE)"""
new_bps = []
for q_index, t_index in bps:
if not _neighbors_in_mfe(q_index, t_index, bps):
continue
new_bps.append((q_index, t_index))
return new_bps
def filter_he(bps: List[Tuple[int, int]], q: str, t: str, qidxpos0: int, tidxpos0: int) -> List[Tuple[int, int]]:
"""Filters helix ends (One neighbor can pair, the other one can not)"""
new_bps = []
for q_index, t_index in bps:
if _neighbors_can_pair(q_index, t_index, q, t, qidxpos0, tidxpos0) == 1:
continue
new_bps.append((q_index, t_index))
return new_bps
def filter_he_mfe(bps: List[Tuple[int, int]], q: str, t: str, qidxpos0: int, tidxpos0: int) -> List[Tuple[int, int]]:
"""Filters helix ends (One neighbor in MFE, the other one is not)"""
new_bps = []
for q_index, t_index in bps:
if _neighbors_in_mfe(q_index, t_index, bps) == 1:
continue
new_bps.append((q_index, t_index))
return new_bps
def get_filter(string: str) -> any:
candidate_filters = {
'GU': filter_gu,
'AU': filter_au,
'GC': filter_gc,
'lp': filter_lp,
'lpMfe': filter_lp_mfe,
'he': filter_he,
'heMfe': filter_he_mfe
}
return candidate_filters.get(string)