|
| 1 | +#!/usr/bin/env python3 |
| 2 | + |
1 | 3 | import argparse |
2 | 4 | import sys |
3 | 5 | from Bio.Emboss import PrimerSearch |
@@ -43,13 +45,13 @@ def parse_args(): |
43 | 45 | "-c", |
44 | 46 | "--cut", |
45 | 47 | action="store_true", |
46 | | - help="Cut primers from amplicons sequences", |
| 48 | + help="Cut primers from amplicon sequences (default: False)", |
47 | 49 | ) |
48 | 50 | parser.add_argument( |
49 | | - "-r", |
50 | | - "--orient", |
| 51 | + "-k", |
| 52 | + "--keep-orient", |
51 | 53 | action="store_true", |
52 | | - help="Orient reverse sequences to forward", |
| 54 | + help="Kepp original sequence orientation (default: reverse complement if forward primer is on reverse strand)", |
53 | 55 | ) |
54 | 56 | parser.add_argument( |
55 | 57 | "-m", |
@@ -80,7 +82,7 @@ def parse_args(): |
80 | 82 | action="store_true", |
81 | 83 | help="Print summary table to stderr", |
82 | 84 | ) |
83 | | - parser.set_defaults(orient=False, cut=False, log=False) |
| 85 | + parser.set_defaults(keep_orient=False, cut=False, log=False) |
84 | 86 | return parser.parse_args() |
85 | 87 |
|
86 | 88 |
|
@@ -140,6 +142,11 @@ def parse_primersearch(results, seq2strand, minlen, maxlen): |
140 | 142 | continue |
141 | 143 | info = hit.hit_info.split("\n\t") |
142 | 144 | mismatches = 0 |
| 145 | + fw_primer = None |
| 146 | + rv_primer = None |
| 147 | + start = None |
| 148 | + end = None |
| 149 | + forward_on_reverse = None |
143 | 150 | for match in info[1:]: |
144 | 151 | mismatches += int(match.split("with ")[1].split(" ")[0]) |
145 | 152 | if "forward" in match: |
@@ -178,15 +185,19 @@ def parse_primersearch(results, seq2strand, minlen, maxlen): |
178 | 185 | return pd.DataFrame.from_records(d) |
179 | 186 |
|
180 | 187 |
|
181 | | -def get_amplicon_record(row, seqid2record, cut, orient): |
182 | | - amp_id = f"{row.seq_id}_{row.amplimer}_{row.primer}" |
183 | | - amp_seq = seqid2record[row.seq_id].seq[row.start_forward - 1 : -row.end_reverse + 1] |
| 188 | +def get_amplicon_record(row, seqid2record, cut, keep_orient): |
| 189 | + amp_id = f"{row['seq_id']}_{row['amplimer']}_{row['primer']}" |
| 190 | + amp_seq = seqid2record[row["seq_id"]].seq[ |
| 191 | + row["start_forward"] - 1 : -row["end_reverse"] + 1 |
| 192 | + ] |
184 | 193 |
|
185 | 194 | if cut: |
186 | | - amp_seq = amp_seq[len(row.fw_primer) : len(amp_seq) - len(row.rv_primer)] |
| 195 | + amp_seq = amp_seq[len(row["fw_primer"]) : len(amp_seq) - len(row["rv_primer"])] |
| 196 | + |
| 197 | + if row["forward_match_on_reverse"]: |
| 198 | + if not keep_orient: |
| 199 | + amp_seq = amp_seq.reverse_complement() |
187 | 200 |
|
188 | | - if orient and row.forward_match_on_reverse: |
189 | | - amp_seq = amp_seq.reverse_complement() |
190 | 201 | return SeqRecord(amp_seq, id=amp_id, description=amp_id) |
191 | 202 |
|
192 | 203 |
|
@@ -220,20 +231,20 @@ def main(): |
220 | 231 | ) |
221 | 232 | ) |
222 | 233 | df = parse_primersearch(results, seq2strand, args.minlen, args.maxlen) |
223 | | - if df.empty: |
| 234 | + if df is None or df.empty: |
224 | 235 | amplicons = SeqRecord(Seq(""), id="no_amps", description="no_amps") |
225 | 236 | else: |
226 | | - amplicons = df.apply( |
227 | | - lambda row: get_amplicon_record(row, seqid2record, args.cut, args.orient), |
228 | | - axis=1, |
229 | | - ).tolist() |
| 237 | + amplicons = [ |
| 238 | + get_amplicon_record(row, seqid2record, args.cut, args.keep_orient) |
| 239 | + for _, row in df.iterrows() |
| 240 | + ] |
230 | 241 | SeqIO.write( |
231 | 242 | amplicons, |
232 | 243 | args.output, |
233 | 244 | "fasta", |
234 | 245 | ) |
235 | 246 | if args.log: |
236 | | - if df.empty: |
| 247 | + if df is None or df.empty: |
237 | 248 | print( |
238 | 249 | "No amplicons found ! \n" |
239 | 250 | "Try less stringent mismatches, minlen or maxlen parameters", |
|
0 commit comments