Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions mess/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@
],
},
{
"name": "Amplicons options",
"name": "Amplicon extraction options",
"options": [
"--fw",
"--rv",
Expand All @@ -73,17 +73,18 @@
"--amp-minlen",
"--amp-maxlen",
"--cut",
"--orient",
"--keep-orient",
"--amp",
],
},
{
"name": "art_illumina options",
"options": [
"--custom-err",
"--errfree",
"--paired",
"--frag-len",
"--frag-sd",
"--errfree",
],
},
{
Expand Down
13 changes: 9 additions & 4 deletions mess/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -295,13 +295,18 @@ def sim_options(func):
show_default=True,
),
click.option(
"--orient",
help="Orient reverse amplicon strands in the forward orientation",
"--keep-orient",
help="Keep original sequence orientation (default: reverse complement if forward primer is on reverse strand)",
type=bool,
default=True,
default=False,
is_flag=True,
show_default=True,
),
click.option(
"--amp",
help="Fasta files are amplicons (skips primersearch)",
is_flag=True,
),
click.option(
"--tech",
help="Sequencing technology",
Expand Down Expand Up @@ -354,7 +359,7 @@ def sim_options(func):
click.option(
"--rep-sd",
help="Standard deviation between replicate coverages (0: no deviation)",
type=int,
type=float,
default=0,
show_default=True,
),
Expand Down
4 changes: 2 additions & 2 deletions mess/workflow/rules/preflight/functions.smk
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ def fasta_input(wildcards):

def list_fastas(wildcards):
df = get_fasta_table(wildcards)
if AMPLICONS:
if PRIMERSEARCH:
return expand(
os.path.join(dir.out.processing, "{fasta}.amplicons.fasta"),
fasta=list(set(df.index)),
Expand Down Expand Up @@ -183,7 +183,7 @@ def get_value(value, wildcards):


def get_asm_summary(wildcards):
if AMPLICONS or FASTA_DIR or FASTA_PATH:
if PRIMERSEARCH or FASTA_DIR or FASTA_PATH:
return os.path.join(dir.out.processing, "seqkit_stats.tsv")
else:
try:
Expand Down
2 changes: 1 addition & 1 deletion mess/workflow/rules/processing/coverages.smk
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ checkpoint calculate_genome_coverages:
read_len=MEAN_LEN,
pairing=PAIRED,
seed=SEED,
amplicons=AMPLICONS,
amplicons=PRIMERSEARCH,
resources:
mem_mb=config.resources.sml.mem,
mem=str(config.resources.sml.mem) + "MB",
Expand Down
22 changes: 11 additions & 11 deletions mess/workflow/rules/processing/fastas.smk
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ rule rename_fastas:
"""


if AMPLICONS:
if PRIMERSEARCH:

if PRIMERS:
primers = os.path.abspath(PRIMERS)
Expand Down Expand Up @@ -58,8 +58,8 @@ if AMPLICONS:
amp_args = ""
if CUT:
amp_args += "--cut "
if ORIENT:
amp_args += "--orient "
if KEEP_ORIENT:
amp_args += "--keep-orient "

rule get_amplicons:
input:
Expand All @@ -77,12 +77,12 @@ if AMPLICONS:
os.path.join(dir.out.logs, "primersearch", "{fasta}_summary.log"),
shell:
"""
python {params.script} \\
--input {input.search} \\
--fasta {input.fasta} \\
--primers {input.primers} \\
--minlen {params.minlen} \\
--maxlen {params.maxlen} \\
{params.script} \\
-i {input.search} \\
-f {input.fasta} \\
-p {input.primers} \\
-m {params.minlen} \\
-M {params.maxlen} \\
{params.args} \\
--output {output} \\
--log 2> {log}
Expand All @@ -96,7 +96,7 @@ rule get_fasta_stats:
os.path.join(dir.out.processing, "seqkit_stats.tsv"),
params:
os.path.join(dir.out.processing, "*.fasta")
if not AMPLICONS
if not PRIMERSEARCH
else os.path.join(dir.out.processing, "*.amplicons.fasta"),
log:
os.path.join(dir.out.logs, "seqkit", "stats.log"),
Expand Down Expand Up @@ -129,7 +129,7 @@ checkpoint split_contigs:
params:
circular=CIRCULAR,
rotate=ROTATE,
amplicons=AMPLICONS,
amplicons=PRIMERSEARCH,
read_len=MEAN_LEN,
resources:
mem_mb=config.resources.sml.mem,
Expand Down
45 changes: 28 additions & 17 deletions mess/workflow/scripts/get_amplicons.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
#!/usr/bin/env python3

import argparse
import sys
from Bio.Emboss import PrimerSearch
Expand Down Expand Up @@ -43,13 +45,13 @@ def parse_args():
"-c",
"--cut",
action="store_true",
help="Cut primers from amplicons sequences",
help="Cut primers from amplicon sequences (default: False)",
)
parser.add_argument(
"-r",
"--orient",
"-k",
"--keep-orient",
action="store_true",
help="Orient reverse sequences to forward",
help="Kepp original sequence orientation (default: reverse complement if forward primer is on reverse strand)",
)
parser.add_argument(
"-m",
Expand Down Expand Up @@ -80,7 +82,7 @@ def parse_args():
action="store_true",
help="Print summary table to stderr",
)
parser.set_defaults(orient=False, cut=False, log=False)
parser.set_defaults(keep_orient=False, cut=False, log=False)
return parser.parse_args()


Expand Down Expand Up @@ -140,6 +142,11 @@ def parse_primersearch(results, seq2strand, minlen, maxlen):
continue
info = hit.hit_info.split("\n\t")
mismatches = 0
fw_primer = None
rv_primer = None
start = None
end = None
forward_on_reverse = None
for match in info[1:]:
mismatches += int(match.split("with ")[1].split(" ")[0])
if "forward" in match:
Expand Down Expand Up @@ -178,15 +185,19 @@ def parse_primersearch(results, seq2strand, minlen, maxlen):
return pd.DataFrame.from_records(d)


def get_amplicon_record(row, seqid2record, cut, orient):
amp_id = f"{row.seq_id}_{row.amplimer}_{row.primer}"
amp_seq = seqid2record[row.seq_id].seq[row.start_forward - 1 : -row.end_reverse + 1]
def get_amplicon_record(row, seqid2record, cut, keep_orient):
amp_id = f"{row['seq_id']}_{row['amplimer']}_{row['primer']}"
amp_seq = seqid2record[row["seq_id"]].seq[
row["start_forward"] - 1 : -row["end_reverse"] + 1
]

if cut:
amp_seq = amp_seq[len(row.fw_primer) : len(amp_seq) - len(row.rv_primer)]
amp_seq = amp_seq[len(row["fw_primer"]) : len(amp_seq) - len(row["rv_primer"])]

if row["forward_match_on_reverse"]:
if not keep_orient:
amp_seq = amp_seq.reverse_complement()

if orient and row.forward_match_on_reverse:
amp_seq = amp_seq.reverse_complement()
return SeqRecord(amp_seq, id=amp_id, description=amp_id)


Expand Down Expand Up @@ -220,20 +231,20 @@ def main():
)
)
df = parse_primersearch(results, seq2strand, args.minlen, args.maxlen)
if df.empty:
if df is None or df.empty:
amplicons = SeqRecord(Seq(""), id="no_amps", description="no_amps")
else:
amplicons = df.apply(
lambda row: get_amplicon_record(row, seqid2record, args.cut, args.orient),
axis=1,
).tolist()
amplicons = [
get_amplicon_record(row, seqid2record, args.cut, args.keep_orient)
for _, row in df.iterrows()
]
SeqIO.write(
amplicons,
args.output,
"fasta",
)
if args.log:
if df.empty:
if df is None or df.empty:
print(
"No amplicons found ! \n"
"Try less stringent mismatches, minlen or maxlen parameters",
Expand Down
7 changes: 4 additions & 3 deletions mess/workflow/simulate.smk
Original file line number Diff line number Diff line change
Expand Up @@ -27,16 +27,17 @@ SEQ_TECH = config.args.tech
PRIMERS = config.args.primers
FORWARD_PRIMER = config.args.fw
REVERSE_PRIMER = config.args.rv

AMPLICONS = False
AMPLICONS = config.args.amp
PRIMERSEARCH = False
if PRIMERS or (FORWARD_PRIMER and REVERSE_PRIMER):
PRIMERSEARCH = True
AMPLICONS = True

AMP_MINLEN = config.args.amp_minlen
AMP_MAXLEN = config.args.amp_maxlen
MISMATCH = config.args.mismatch
CUT = config.args.cut
ORIENT = config.args.orient
KEEP_ORIENT = config.args.keep_orient

BASES = config.args.bases
PAIRED = config.args.paired
Expand Down