Skip to content

Commit 8e068dc

Browse files
committed
vcfallelicprimitives: added shell output support for tests and show single example
1 parent 11d71e3 commit 8e068dc

5 files changed

Lines changed: 67 additions & 5 deletions

File tree

samples/10158243.vcf

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
##fileformat=VCFv4.2
2+
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
3+
##INFO=<ID=CONFLICT,Number=.,Type=String,Description="Sample names for which there are multiple paths in the graph with conflicting alleles">
4+
##INFO=<ID=AC,Number=A,Type=Integer,Description="Total number of alternate alleles in called genotypes">
5+
##INFO=<ID=AF,Number=A,Type=Float,Description="Estimated allele frequency in the range (0,1]">
6+
##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of samples with data">
7+
##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
8+
##INFO=<ID=LV,Number=1,Type=Integer,Description="Level in the snarl tree (0=top level)">
9+
##INFO=<ID=PS,Number=1,Type=String,Description="ID of variant corresponding to parent snarl">
10+
##INFO=<ID=AT,Number=R,Type=String,Description="Allele Traversal as path in graph">
11+
##contig=<ID=grch38#chr4,length=97395>
12+
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT HG00438 HG00621 HG00673 HG00733 HG00735 HG00741 HG01071 HG01106 HG01109 HG01123 HG01175 HG01243 HG01258 HG01358 HG01361 HG01891 HG01928 HG01952 HG01978 HG02055 HG02080 HG02109 HG02145 HG02148 HG02257 HG02486 HG02559 HG02572 HG02622 HG02630 HG02717 HG02723 HG02818 HG02886 HG03098 HG03453 HG03486 HG03492 HG03516 HG03540 HG03579 NA18906 NA20129 NA21309 chm13
13+
grch38#chr4 10158243 >3655>3662 ACCCCCACCCCCACC ACC,AC,ACCCCCACCCCCAC,ACCCCCACC,ACA 60 . AC=64,3,2,3,1;AF=0.719101,0.0337079,0.0224719,0.0337079,0.011236;AN=89;AT=>3655>3656>3657>3658>3659>3660>3662,>3655>3656>3660>3662,>3655>3660>3662,>3655>3656>3657>3658>3660>3662,>3655>3656>3657>3660>3662,>3655>3656>3661>3662;NS=45;LV=0 GT 0|0 1|1 1|1 1|0 5|1 0|4 0|1 0|1 1|1 1|1 1|1 1|1 1|1 1|1 1|1 4|3 1|1 1|1 1|1 1|0 1|0 1|0 1|0 1|1 1|1 1|4 1|1 1|1 3|0 1|0 1|1 0|1 1|1 1|1 2|1 1|2 1|1 1|1 0|1 1|1 1|1 1|0 1|2 1|1 0
File renamed without changes.
File renamed without changes.

test/pytest/rtest.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,18 @@ def head(cmd, lines=4):
3636
header = [l.replace("../build/", "") for l in header]
3737
print("\n".join(header))
3838

39+
def sh(cmd):
40+
cmds = ['bash','-c',cmd]
41+
p = Popen(cmds, stdout=PIPE, stderr=PIPE, close_fds=True)
42+
output = p.communicate()
43+
out = output[0]
44+
if len(out) == 0:
45+
# if stdout is empty fetch stderr
46+
out = output[1]
47+
header = out.decode().split("\n")
48+
header = out.decode().expandtabs(tabsize=8).split("\n")
49+
print("\n".join(header))
50+
3951
def run_stdout(cmd, ext = "vcf"):
4052
os.makedirs(tmpdir,exist_ok=True)
4153
curframe = inspect.currentframe()

test/pytest/vcfallelicprimitives.md

Lines changed: 42 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@ multiple SNPs unless the -m flag is provided.
2525

2626
: shows help message and exits.
2727

28+
See more below.
29+
2830
# EXIT VALUES
2931

3032
**0**
@@ -38,19 +40,54 @@ multiple SNPs unless the -m flag is provided.
3840

3941
<!--
4042
41-
>>> from pytest.rtest import run_stdout, head, cat
43+
>>> from pytest.rtest import run_stdout, head, cat, sh
4244
4345
-->
4446

4547
```
4648
47-
>>> head("vcfallelicprimitives -h",1)
49+
>>> head("vcfallelicprimitives -h",20)
4850
usage: vcfallelicprimitives [options] [file]
51+
>
52+
If multiple allelic primitives (gaps or mismatches) are specified in
53+
a single VCF record, split the record into multiple lines, but drop all
54+
INFO fields. Does not handle genotypes (yet). MNPs are split into
55+
multiple SNPs unless the -m flag is provided. Records generated by splits have the
56+
options:
57+
-m, --use-mnps Retain MNPs as separate events (default: false).
58+
-t, --tag-parsed FLAG Tag records which are split apart of a complex allele with this flag.
59+
-L, --max-length LEN Do not manipulate records in which either the ALT or
60+
REF is longer than LEN (default: 200).
61+
-k, --keep-info Maintain site and allele-level annotations when decomposing.
62+
Note that in many cases, such as multisample VCFs, these won't
63+
be valid post-decomposition. For biallelic loci in single-sample
64+
VCFs, they should be usable with caution.
65+
-g, --keep-geno Maintain genotype-level annotations when decomposing. Similar
66+
caution should be used for this as for --keep-info.
67+
>
68+
Type: transformation
69+
>
4970
5071
```
5172

52-
vcfallelicprimitives picks complex regions and simplifies nested alignments
73+
vcfallelicprimitives picks complex regions and simplifies nested alignments. For example:
74+
75+
```python
76+
77+
>>> sh("grep 10158243 ../samples/10158243.vcf")
78+
grch38#chr4 10158243 >3655>3662 ACCCCCACCCCCACC ACC,AC,ACCCCCACCCCCAC,ACCCCCACC,ACA 60 . AC=64,3,2,3,1;AF=0.719101,0.0337079,0.0224719,0.0337079,0.011236;AN=89;AT=>3655>3656>3657>3658>3659>3660>3662,>3655>3656>3660>3662,>3655>3660>3662,>3655>3656>3657>3658>3660>3662,>3655>3656>3657>3660>3662,>3655>3656>3661>3662;NS=45;LV=0 GT 0|0 1|1 1|1 1|0 5|1 0|4 0|1 0|1 1|1 1|1 1|1 1|1 1|1 1|1 1|1 4|3 1|1 1|1 1|1 1|0 1|0 1|0 1|0 1|1 1|1 1|4 1|1 1|1 3|0 1|0 1|1 0|1 1|1 1|1 2|1 1|2 1|1 1|1 0|1 1|1 1|1 1|0 1|2 1|1 0
79+
80+
```
5381

82+
After aligning it reduces into two records and adjusts the genotypes accordingly:
83+
84+
```python
85+
86+
>>> sh("../build/vcfallelicprimitives -m -L 1000 ../samples/10158243.vcf|grep -v ^\#")
87+
grch38#chr4 10158243 . ACCCCCACCCCCAC ACCCCCAC,ACAC,AC,A 60 . AC=3,1,64,3;AF=0.0337079,0.011236,0.719101,0.0337079;LEN=6,10,12,13;TYPE=del,del,del,del GT 0|0 3|3 3|3 3|0 2|3 0|1 0|3 0|3 3|3 3|3 3|3 3|3 3|3 3|3 3|3 1|0 3|3 3|3 3|3 3|0 3|0 3|0 3|0 3|3 3|3 3|1 3|3 3|3 0|0 3|0 3|3 0|3 3|3 3|3 4|3 3|4 3|3 3|3 0|3 3|3 3|3 3|0 3|4 3|3 0
88+
grch38#chr4 10158255 . ACC AC,A 60 . AC=2,1;AF=0.0224719,0.011236;LEN=1,2;TYPE=del,del GT 0|0 0|0 0|0 0|0 2|0 0|0 0|0 0|0 0|0 0|0 0|0 0|0 0|0 0|0 0|0 0|1 0|0 0|0 0|0 0|0 0|0 0|0 0|0 0|0 0|0 0|0 0|0 0|0 1|0 0|0 0|0 0|0 0|0 0|0 0|0 0|0 0|0 0|0 0|0 0|0 0|0 0|0 0|0 0|0 0
89+
90+
```
5491

5592
## Source code
5693

@@ -60,10 +97,10 @@ vcfallelicprimitives picks complex regions and simplifies nested alignments
6097

6198
```python
6299
>>> run_stdout("vcfallelicprimitives -m -L 1000 ../samples/grch38#chr8_36353854-36453166.vcf", ext="vcf")
63-
output in <a href="../data/regression/vcfallelicprimitives_2.vcf">vcfallelicprimitives_2.vcf</a>
100+
output in <a href="../data/regression/vcfallelicprimitives_4.vcf">vcfallelicprimitives_4.vcf</a>
64101

65102
>>> run_stdout("vcfallelicprimitives -m -L 1000 ../samples/grch38#chr4_10083863-10181258.vcf", ext="vcf")
66-
output in <a href="../data/regression/vcfallelicprimitives_3.vcf">vcfallelicprimitives_3.vcf</a>
103+
output in <a href="../data/regression/vcfallelicprimitives_5.vcf">vcfallelicprimitives_5.vcf</a>
67104

68105
```
69106

0 commit comments

Comments
 (0)