Skip to content

Commit

Permalink
Draft VWF to BED7 conversion
Browse files Browse the repository at this point in the history
  • Loading branch information
bede committed Jan 31, 2025
1 parent 5f2cfc3 commit fcafa57
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 0 deletions.
12 changes: 12 additions & 0 deletions src/primaschema/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,17 @@ def six_to_seven(bed_path: Path, fasta_path: Path):
print(bed_str)


def vwf_to_bed(vwf_path: Path, chrom: str = "chrom"):
"""
Convert a Viridian VWF scheme TSV to a 7 column primer.bed
:arg vwf_path: path of scheme.bed file
:arg chrom: name of reference chromosome
"""
bed_str = lib.convert_vwf_to_primer_bed(vwf_path=vwf_path, chrom=chrom)
print(bed_str)


def diff(bed1_path: Path, bed2_path: Path, only_positions: bool = False):
"""
Show the symmetric difference of records in two bed files
Expand Down Expand Up @@ -215,6 +226,7 @@ def main():
"diff": diff,
"6to7": six_to_seven,
"7to6": seven_to_six,
"vwftobed": vwf_to_bed,
"plot": plot,
"show-intervals": amplicon_intervals,
"show-discordant-primers": discordant_primers,
Expand Down
32 changes: 32 additions & 0 deletions src/primaschema/lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,38 @@ def convert_scheme_bed_to_primer_bed(bed_path: Path, fasta_path: Path) -> str:
return df.to_csv(sep="\t", header=False, index=False)


def convert_vwf_to_primer_bed(vwf_path: Path, chrom: str = "chrom") -> str:
vwf_df = pd.read_csv(vwf_path, sep="\t")
bed_records = []
pool_counter = {}

for r in vwf_df.to_records("dict"):
amplicon_name = r["Amplicon_name"]
primer_name = r["Primer_name"]
orientation = r["Left_or_right"]
amplicon_number = int(amplicon_name.split("_")[-1])
pool_name = 1 if amplicon_number % 2 != 0 else 2
if amplicon_name not in pool_counter:
pool_counter[amplicon_name] = 1
else:
pool_counter[amplicon_name] += 1
strand = "+" if orientation == "left" else "-"
sequence = r["Sequence"]
start_pos = r["Position"]
bed_record = {}
bed_record["chrom"] = chrom
bed_record["chromStart"] = start_pos
bed_record["chromEnd"] = start_pos + len(sequence)
bed_record["name"] = primer_name
bed_record["poolName"] = str(pool_name)
bed_record["strand"] = strand
bed_record["sequence"] = sequence
bed_records.append(bed_record)

bed_df = pd.DataFrame(bed_records)
return bed_df.to_csv(sep="\t", header=False, index=False)


def hash_bed(bed_path: Path) -> str:
bed_type = infer_bed_type(bed_path)
if bed_type == "primer":
Expand Down

0 comments on commit fcafa57

Please sign in to comment.