Skip to content

Commit d3fd914

Browse files
authored
Merge pull request #49 from metagenlab/amplicon
feat: amplicon simulation on input fasta
2 parents 3a0645d + 2c1125e commit d3fd914

7 files changed

Lines changed: 59 additions & 41 deletions

File tree

mess/__main__.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@
6464
],
6565
},
6666
{
67-
"name": "Amplicons options",
67+
"name": "Amplicon extraction options",
6868
"options": [
6969
"--fw",
7070
"--rv",
@@ -73,17 +73,18 @@
7373
"--amp-minlen",
7474
"--amp-maxlen",
7575
"--cut",
76-
"--orient",
76+
"--keep-orient",
77+
"--amp",
7778
],
7879
},
7980
{
8081
"name": "art_illumina options",
8182
"options": [
8283
"--custom-err",
83-
"--errfree",
8484
"--paired",
8585
"--frag-len",
8686
"--frag-sd",
87+
"--errfree",
8788
],
8889
},
8990
{

mess/util.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -295,13 +295,18 @@ def sim_options(func):
295295
show_default=True,
296296
),
297297
click.option(
298-
"--orient",
299-
help="Orient reverse amplicon strands in the forward orientation",
298+
"--keep-orient",
299+
help="Keep original sequence orientation (default: reverse complement if forward primer is on reverse strand)",
300300
type=bool,
301-
default=True,
301+
default=False,
302302
is_flag=True,
303303
show_default=True,
304304
),
305+
click.option(
306+
"--amp",
307+
help="Fasta files are amplicons (skips primersearch)",
308+
is_flag=True,
309+
),
305310
click.option(
306311
"--tech",
307312
help="Sequencing technology",
@@ -354,7 +359,7 @@ def sim_options(func):
354359
click.option(
355360
"--rep-sd",
356361
help="Standard deviation between replicate coverages (0: no deviation)",
357-
type=int,
362+
type=float,
358363
default=0,
359364
show_default=True,
360365
),

mess/workflow/rules/preflight/functions.smk

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,7 @@ def fasta_input(wildcards):
130130

131131
def list_fastas(wildcards):
132132
df = get_fasta_table(wildcards)
133-
if AMPLICONS:
133+
if PRIMERSEARCH:
134134
return expand(
135135
os.path.join(dir.out.processing, "{fasta}.amplicons.fasta"),
136136
fasta=list(set(df.index)),
@@ -183,7 +183,7 @@ def get_value(value, wildcards):
183183

184184

185185
def get_asm_summary(wildcards):
186-
if AMPLICONS or FASTA_DIR or FASTA_PATH:
186+
if PRIMERSEARCH or FASTA_DIR or FASTA_PATH:
187187
return os.path.join(dir.out.processing, "seqkit_stats.tsv")
188188
else:
189189
try:

mess/workflow/rules/processing/coverages.smk

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ checkpoint calculate_genome_coverages:
3131
read_len=MEAN_LEN,
3232
pairing=PAIRED,
3333
seed=SEED,
34-
amplicons=AMPLICONS,
34+
amplicons=PRIMERSEARCH,
3535
resources:
3636
mem_mb=config.resources.sml.mem,
3737
mem=str(config.resources.sml.mem) + "MB",

mess/workflow/rules/processing/fastas.smk

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ rule rename_fastas:
2020
"""
2121

2222

23-
if AMPLICONS:
23+
if PRIMERSEARCH:
2424

2525
if PRIMERS:
2626
primers = os.path.abspath(PRIMERS)
@@ -58,8 +58,8 @@ if AMPLICONS:
5858
amp_args = ""
5959
if CUT:
6060
amp_args += "--cut "
61-
if ORIENT:
62-
amp_args += "--orient "
61+
if KEEP_ORIENT:
62+
amp_args += "--keep-orient "
6363

6464
rule get_amplicons:
6565
input:
@@ -77,12 +77,12 @@ if AMPLICONS:
7777
os.path.join(dir.out.logs, "primersearch", "{fasta}_summary.log"),
7878
shell:
7979
"""
80-
python {params.script} \\
81-
--input {input.search} \\
82-
--fasta {input.fasta} \\
83-
--primers {input.primers} \\
84-
--minlen {params.minlen} \\
85-
--maxlen {params.maxlen} \\
80+
{params.script} \\
81+
-i {input.search} \\
82+
-f {input.fasta} \\
83+
-p {input.primers} \\
84+
-m {params.minlen} \\
85+
-M {params.maxlen} \\
8686
{params.args} \\
8787
--output {output} \\
8888
--log 2> {log}
@@ -96,7 +96,7 @@ rule get_fasta_stats:
9696
os.path.join(dir.out.processing, "seqkit_stats.tsv"),
9797
params:
9898
os.path.join(dir.out.processing, "*.fasta")
99-
if not AMPLICONS
99+
if not PRIMERSEARCH
100100
else os.path.join(dir.out.processing, "*.amplicons.fasta"),
101101
log:
102102
os.path.join(dir.out.logs, "seqkit", "stats.log"),
@@ -129,7 +129,7 @@ checkpoint split_contigs:
129129
params:
130130
circular=CIRCULAR,
131131
rotate=ROTATE,
132-
amplicons=AMPLICONS,
132+
amplicons=PRIMERSEARCH,
133133
read_len=MEAN_LEN,
134134
resources:
135135
mem_mb=config.resources.sml.mem,

mess/workflow/scripts/get_amplicons.py

Lines changed: 28 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
#!/usr/bin/env python3
2+
13
import argparse
24
import sys
35
from Bio.Emboss import PrimerSearch
@@ -43,13 +45,13 @@ def parse_args():
4345
"-c",
4446
"--cut",
4547
action="store_true",
46-
help="Cut primers from amplicons sequences",
48+
help="Cut primers from amplicon sequences (default: False)",
4749
)
4850
parser.add_argument(
49-
"-r",
50-
"--orient",
51+
"-k",
52+
"--keep-orient",
5153
action="store_true",
52-
help="Orient reverse sequences to forward",
54+
help="Kepp original sequence orientation (default: reverse complement if forward primer is on reverse strand)",
5355
)
5456
parser.add_argument(
5557
"-m",
@@ -80,7 +82,7 @@ def parse_args():
8082
action="store_true",
8183
help="Print summary table to stderr",
8284
)
83-
parser.set_defaults(orient=False, cut=False, log=False)
85+
parser.set_defaults(keep_orient=False, cut=False, log=False)
8486
return parser.parse_args()
8587

8688

@@ -140,6 +142,11 @@ def parse_primersearch(results, seq2strand, minlen, maxlen):
140142
continue
141143
info = hit.hit_info.split("\n\t")
142144
mismatches = 0
145+
fw_primer = None
146+
rv_primer = None
147+
start = None
148+
end = None
149+
forward_on_reverse = None
143150
for match in info[1:]:
144151
mismatches += int(match.split("with ")[1].split(" ")[0])
145152
if "forward" in match:
@@ -178,15 +185,19 @@ def parse_primersearch(results, seq2strand, minlen, maxlen):
178185
return pd.DataFrame.from_records(d)
179186

180187

181-
def get_amplicon_record(row, seqid2record, cut, orient):
182-
amp_id = f"{row.seq_id}_{row.amplimer}_{row.primer}"
183-
amp_seq = seqid2record[row.seq_id].seq[row.start_forward - 1 : -row.end_reverse + 1]
188+
def get_amplicon_record(row, seqid2record, cut, keep_orient):
189+
amp_id = f"{row['seq_id']}_{row['amplimer']}_{row['primer']}"
190+
amp_seq = seqid2record[row["seq_id"]].seq[
191+
row["start_forward"] - 1 : -row["end_reverse"] + 1
192+
]
184193

185194
if cut:
186-
amp_seq = amp_seq[len(row.fw_primer) : len(amp_seq) - len(row.rv_primer)]
195+
amp_seq = amp_seq[len(row["fw_primer"]) : len(amp_seq) - len(row["rv_primer"])]
196+
197+
if row["forward_match_on_reverse"]:
198+
if not keep_orient:
199+
amp_seq = amp_seq.reverse_complement()
187200

188-
if orient and row.forward_match_on_reverse:
189-
amp_seq = amp_seq.reverse_complement()
190201
return SeqRecord(amp_seq, id=amp_id, description=amp_id)
191202

192203

@@ -220,20 +231,20 @@ def main():
220231
)
221232
)
222233
df = parse_primersearch(results, seq2strand, args.minlen, args.maxlen)
223-
if df.empty:
234+
if df is None or df.empty:
224235
amplicons = SeqRecord(Seq(""), id="no_amps", description="no_amps")
225236
else:
226-
amplicons = df.apply(
227-
lambda row: get_amplicon_record(row, seqid2record, args.cut, args.orient),
228-
axis=1,
229-
).tolist()
237+
amplicons = [
238+
get_amplicon_record(row, seqid2record, args.cut, args.keep_orient)
239+
for _, row in df.iterrows()
240+
]
230241
SeqIO.write(
231242
amplicons,
232243
args.output,
233244
"fasta",
234245
)
235246
if args.log:
236-
if df.empty:
247+
if df is None or df.empty:
237248
print(
238249
"No amplicons found ! \n"
239250
"Try less stringent mismatches, minlen or maxlen parameters",

mess/workflow/simulate.smk

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,16 +27,17 @@ SEQ_TECH = config.args.tech
2727
PRIMERS = config.args.primers
2828
FORWARD_PRIMER = config.args.fw
2929
REVERSE_PRIMER = config.args.rv
30-
31-
AMPLICONS = False
30+
AMPLICONS = config.args.amp
31+
PRIMERSEARCH = False
3232
if PRIMERS or (FORWARD_PRIMER and REVERSE_PRIMER):
33+
PRIMERSEARCH = True
3334
AMPLICONS = True
3435

3536
AMP_MINLEN = config.args.amp_minlen
3637
AMP_MAXLEN = config.args.amp_maxlen
3738
MISMATCH = config.args.mismatch
3839
CUT = config.args.cut
39-
ORIENT = config.args.orient
40+
KEEP_ORIENT = config.args.keep_orient
4041

4142
BASES = config.args.bases
4243
PAIRED = config.args.paired

0 commit comments

Comments
 (0)