From a7734b8abd262122043f6a0f12faae853baad8da Mon Sep 17 00:00:00 2001 From: Chris Kachulis Date: Fri, 19 Dec 2025 14:18:02 -0500 Subject: [PATCH 1/6] fix bugs --- .../LeftAlignAndTrimVariants.java | 34 ++++++++++++++----- .../variant/GATKVariantContextUtils.java | 11 +++--- .../expected_left_align_hg38.vcf | 9 ++++- ...pected_left_align_hg38_maxIndelSize296.vcf | 9 ++++- ...pected_left_align_hg38_maxIndelSize342.vcf | 9 ++++- .../expected_left_align_hg38_notrim.vcf | 9 ++++- ..._align_hg38_notrim_split_multiallelics.vcf | 11 +++++- ...ed_left_align_hg38_split_multiallelics.vcf | 11 +++++- ...gn_hg38_split_multiallelics_keepOrigAC.vcf | 11 +++++- .../test_left_align_hg38.vcf | 7 ++++ 10 files changed, 99 insertions(+), 22 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants.java index 70c4c46b724..fa6e5ca4a24 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants.java @@ -28,11 +28,11 @@ * Left-align indels in a variant callset * *

- * This tool takes a VCF file, left-aligns the indels and trims common bases from indels, + * This tool takes a VCF file, left-aligns the indels and trims common bases from all variants, * leaving them with a minimum representation. The same indel can often be placed at multiple positions and still * represent the same haplotype. While the standard convention with VCF is to place an indel at the left-most position * this isn't always done, so this tool can be used to left-align them. This tool optionally splits multiallelic - * sites into biallelics and left-aligns individual alleles. Optionally, the tool will not trim common bases from indels. + * sites into biallelics and left-aligns individual alleles. Optionally, the tool will not trim common bases from variants. *

* *

Input

@@ -172,8 +172,10 @@ public class LeftAlignAndTrimVariants extends VariantWalker { private VariantContextWriter vcfWriter = null; private VCFHeader vcfHeader = null; - VariantContext lastVariant; - + private int thisVariantGroupStart = 0; + private String thisVariantGroupContig = null; + private VariantContext lastVariantWritten = null; + private final List realignedVariants = new ArrayList<>(); @Override public void onTraversalStart() { final Map vcfHeaders = Collections.singletonMap(getDrivingVariantsFeatureInput().getName(), getHeaderForVariants()); @@ -213,6 +215,17 @@ private Set createVCFHeaderLineList(Map vcfHea */ @Override public void apply(VariantContext vc, ReadsContext readsContext, ReferenceContext ref, FeatureContext featureContext) { + if (vc.getContig() != thisVariantGroupContig || vc.getStart() > thisVariantGroupStart) { + realignedVariants.sort(Comparator.comparingInt(VariantContext::getStart)); + for (VariantContext realignedVariant : realignedVariants) { + vcfWriter.add(realignedVariant); + } + thisVariantGroupStart = vc.getStart(); + thisVariantGroupContig = vc.getContig(); + lastVariantWritten = realignedVariants.isEmpty() ? lastVariantWritten : realignedVariants.get(realignedVariants.size() - 1); + realignedVariants.clear(); + } + final List vcList; if (splitMultiallelics) { if (vc.getGenotypes().stream().anyMatch(g -> g.hasAnyAttribute(GATKVCFConstants.ALLELE_FRACTION_KEY))) { @@ -231,12 +244,10 @@ public void apply(VariantContext vc, ReadsContext readsContext, ReferenceContext if (indelLength > maxIndelSize) { logger.info(String.format("%s (%d) at position %s:%d; skipping that record. Set --max-indel-length >= %d", "Indel is too long", indelLength, splitVariant.getContig(), splitVariant.getStart(), indelLength)); - lastVariant = splitVariant; - vcfWriter.add(splitVariant); + realignedVariants.add(splitVariant); } else { - final int distanceToLastVariant = (lastVariant != null && splitVariant.contigsMatch(lastVariant)) ? splitVariant.getStart() - lastVariant.getEnd() : Integer.MAX_VALUE; - lastVariant = GATKVariantContextUtils.leftAlignAndTrim(splitVariant, ref, Math.min(maxLeadingBases, distanceToLastVariant - 1), !dontTrimAlleles); - vcfWriter.add(lastVariant); + final int distanceToLastVariant = (lastVariantWritten != null && splitVariant.contigsMatch(lastVariantWritten)) ? splitVariant.getStart() - lastVariantWritten.getEnd() : Integer.MAX_VALUE; + realignedVariants.add(GATKVariantContextUtils.leftAlignAndTrim(splitVariant, ref, Math.min(maxLeadingBases, distanceToLastVariant), !dontTrimAlleles)); } } } @@ -256,6 +267,11 @@ public boolean requiresReference() { */ @Override public Object onTraversalSuccess() { + //write out remaining variants + realignedVariants.sort(Comparator.comparingInt(VariantContext::getStart)); + for (VariantContext realignedVariant : realignedVariants) { + vcfWriter.add(realignedVariant); + } return "SUCCESS"; } diff --git a/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVariantContextUtils.java b/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVariantContextUtils.java index 2df684087e4..6dd20811d72 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVariantContextUtils.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVariantContextUtils.java @@ -2220,15 +2220,13 @@ public static List removeItemsByIndex(List data, List indexes * By definition, it will only take biallelic vc's. Splitting into multiple alleles has to be * handled by calling routine. * - * @param vc Input VC with variants to left align + * @param vc Input VC with variants to left align and trim * @param ref Reference context * @return new VC. */ - public static VariantContext leftAlignAndTrim(final VariantContext vc, final ReferenceContext ref, final int maxLeadingBases, final boolean trim) { - if (!vc.isIndel() || maxLeadingBases <= 0) { - return vc; - } + public static VariantContext leftAlignAndTrim(final VariantContext vc, final ReferenceContext ref, final int maxLeadingBasesIndel, final boolean trim) { + final int maxLeadingBases = vc.isIndel() ? maxLeadingBasesIndel : 0; for(int leadingBases = Math.min(maxLeadingBases, 10); leadingBases <= maxLeadingBases; leadingBases = Math.min(2*leadingBases, maxLeadingBases)) { final int refStart = Math.max(vc.getStart() - leadingBases, 1); @@ -2245,8 +2243,9 @@ public static VariantContext leftAlignAndTrim(final VariantContext vc, final Ref return result; }).collect(Collectors.toList()); + final int boundStart = vc.isSNP() || vc.isMNP() ? variantOffsetInRef : variantOffsetInRef + 1; // +1 to ignore the shared base in front for indels final List alleleRanges = vc.getAlleles().stream() - .map(a -> new IndexRange(variantOffsetInRef + 1, variantOffsetInRef + a.length())) // +1 to ignore the shared base in front + .map(a -> new IndexRange(boundStart, variantOffsetInRef + a.length())) .collect(Collectors.toList()); // note that this also shifts the index ranges as a side effect, so below they can be used to output allele bases diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants/expected_left_align_hg38.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants/expected_left_align_hg38.vcf index 05df02709e0..5b3ab8afcc9 100755 --- a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants/expected_left_align_hg38.vcf +++ b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants/expected_left_align_hg38.vcf @@ -35,5 +35,12 @@ chr21 10382389 . A ATT 30 . AC=2;AF=0.500;AN=2 GT 1/1 ./. chr21 10388233 . GGAA G 30 . AC=1;AF=0.250;AN=2 GT 0/1 ./. chr21 10804284 . T TGC 30 . AC=1;AF=0.250;AN=2 GT 0/1 ./. chr21 13255296 . A G 30 . AC=2;AF=0.500;AN=2 GT 1/1 ./. -chr21 13255297 . AAA A 30 . AC=2;AF=0.500;AN=2 GT 1/1 ./. +chr21 13255296 . AAA A 30 . AC=2;AF=0.500;AN=2 GT 1/1 ./. chr21 39583817 . CTCCCTTCCCTTCCCTTCCCTTCCCTTCCCTTCCCTTCCCTTCCCT C 30 . AC=3;AF=0.750;AN=4 GT 1/1 0/1 +chr21 39584859 . C G 30 . AC=3;AF=0.750;AN=4 GT 1/1 0/1 +chr21 39584950 . C G,CC 30 . AC=2,1;AF=0.500,0.250;AN=4 GT 1/1 0/2 +chr21 39584953 . AA AG,A 30 . AC=2,1;AF=0.500,0.250;AN=4 GT 1/1 0/2 +chr21 39586243 . TAAAA T 30 . AC=1;AF=0.250;AN=4 GT 0/1 0/0 +chr21 39586243 . TA T 30 . AC=1;AF=0.250;AN=4 GT 0/0 0/1 +chr21 39586243 . T TA 30 . AC=1;AF=0.250;AN=4 GT 0/0 0/1 +chr21 39586245 . A G 30 . AC=1;AF=0.250;AN=4 GT 0/1 0/0 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants/expected_left_align_hg38_maxIndelSize296.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants/expected_left_align_hg38_maxIndelSize296.vcf index ee3325842fd..94be82d58e5 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants/expected_left_align_hg38_maxIndelSize296.vcf +++ b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants/expected_left_align_hg38_maxIndelSize296.vcf @@ -35,5 +35,12 @@ chr21 10382389 . A ATT 30 . AC=2;AF=0.500;AN=2 GT 1/1 ./. chr21 10388233 . GGAA G 30 . AC=1;AF=0.250;AN=2 GT 0/1 ./. chr21 10804284 . T TGC 30 . AC=1;AF=0.250;AN=2 GT 0/1 ./. chr21 13255296 . A G 30 . AC=2;AF=0.500;AN=2 GT 1/1 ./. -chr21 13255297 . AAA A 30 . AC=2;AF=0.500;AN=2 GT 1/1 ./. +chr21 13255296 . AAA A 30 . AC=2;AF=0.500;AN=2 GT 1/1 ./. chr21 39583817 . CTCCCTTCCCTTCCCTTCCCTTCCCTTCCCTTCCCTTCCCTTCCCT C 30 . AC=3;AF=0.750;AN=4 GT 1/1 0/1 +chr21 39584859 . C G 30 . AC=3;AF=0.750;AN=4 GT 1/1 0/1 +chr21 39584950 . C G,CC 30 . AC=2,1;AF=0.500,0.250;AN=4 GT 1/1 0/2 +chr21 39584953 . AA AG,A 30 . AC=2,1;AF=0.500,0.250;AN=4 GT 1/1 0/2 +chr21 39586243 . TAAAA T 30 . AC=1;AF=0.250;AN=4 GT 0/1 0/0 +chr21 39586243 . TA T 30 . AC=1;AF=0.250;AN=4 GT 0/0 0/1 +chr21 39586243 . T TA 30 . AC=1;AF=0.250;AN=4 GT 0/0 0/1 +chr21 39586245 . A G 30 . AC=1;AF=0.250;AN=4 GT 0/1 0/0 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants/expected_left_align_hg38_maxIndelSize342.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants/expected_left_align_hg38_maxIndelSize342.vcf index 107ff0811e9..4ccb33ea38a 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants/expected_left_align_hg38_maxIndelSize342.vcf +++ b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants/expected_left_align_hg38_maxIndelSize342.vcf @@ -35,5 +35,12 @@ chr21 10382389 . A ATT 30 . AC=2;AF=0.500;AN=2 GT 1/1 ./. chr21 10388233 . GGAA G 30 . AC=1;AF=0.250;AN=2 GT 0/1 ./. chr21 10804284 . T TGC 30 . AC=1;AF=0.250;AN=2 GT 0/1 ./. chr21 13255296 . A G 30 . AC=2;AF=0.500;AN=2 GT 1/1 ./. -chr21 13255297 . AAA A 30 . AC=2;AF=0.500;AN=2 GT 1/1 ./. +chr21 13255296 . AAA A 30 . AC=2;AF=0.500;AN=2 GT 1/1 ./. chr21 39583817 . CTCCCTTCCCTTCCCTTCCCTTCCCTTCCCTTCCCTTCCCTTCCCT C 30 . AC=3;AF=0.750;AN=4 GT 1/1 0/1 +chr21 39584859 . C G 30 . AC=3;AF=0.750;AN=4 GT 1/1 0/1 +chr21 39584950 . C G,CC 30 . AC=2,1;AF=0.500,0.250;AN=4 GT 1/1 0/2 +chr21 39584953 . AA AG,A 30 . AC=2,1;AF=0.500,0.250;AN=4 GT 1/1 0/2 +chr21 39586243 . TAAAA T 30 . AC=1;AF=0.250;AN=4 GT 0/1 0/0 +chr21 39586243 . TA T 30 . AC=1;AF=0.250;AN=4 GT 0/0 0/1 +chr21 39586243 . T TA 30 . AC=1;AF=0.250;AN=4 GT 0/0 0/1 +chr21 39586245 . A G 30 . AC=1;AF=0.250;AN=4 GT 0/1 0/0 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants/expected_left_align_hg38_notrim.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants/expected_left_align_hg38_notrim.vcf index 3c5f0086bf6..cfd092b0892 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants/expected_left_align_hg38_notrim.vcf +++ b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants/expected_left_align_hg38_notrim.vcf @@ -35,5 +35,12 @@ chr21 10382389 . A ATT 30 . AC=2;AF=0.500;AN=2 GT 1/1 ./. chr21 10388233 . GGAA G 30 . AC=1;AF=0.250;AN=2 GT 0/1 ./. chr21 10804284 . T TGC 30 . AC=1;AF=0.250;AN=2 GT 0/1 ./. chr21 13255296 . A G 30 . AC=2;AF=0.500;AN=2 GT 1/1 ./. -chr21 13255297 . AAA A 30 . AC=2;AF=0.500;AN=2 GT 1/1 ./. +chr21 13255296 . AAA A 30 . AC=2;AF=0.500;AN=2 GT 1/1 ./. chr21 39583817 . CTCCCTTCCCTTCCCTTCCCTTCCCTTCCCTTCCCTTCCCTTCCCT C 30 . AC=3;AF=0.750;AN=4 GT 1/1 0/1 +chr21 39584859 . CAA GAA 30 . AC=3;AF=0.750;AN=4 GT 1/1 0/1 +chr21 39584950 . C G,CC 30 . AC=2,1;AF=0.500,0.250;AN=4 GT 1/1 0/2 +chr21 39584953 . AA AG,A 30 . AC=2,1;AF=0.500,0.250;AN=4 GT 1/1 0/2 +chr21 39586239 . CAATT CAATTA 30 . AC=1;AF=0.250;AN=4 GT 0/0 0/1 +chr21 39586240 . AATTA AATT 30 . AC=1;AF=0.250;AN=4 GT 0/0 0/1 +chr21 39586243 . TAAAA TAGAA 30 . AC=1;AF=0.250;AN=4 GT 0/1 0/0 +chr21 39586243 . TAAAA T 30 . AC=1;AF=0.250;AN=4 GT 0/1 0/0 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants/expected_left_align_hg38_notrim_split_multiallelics.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants/expected_left_align_hg38_notrim_split_multiallelics.vcf index 80ea5bb296d..d88b09e05ff 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants/expected_left_align_hg38_notrim_split_multiallelics.vcf +++ b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants/expected_left_align_hg38_notrim_split_multiallelics.vcf @@ -37,5 +37,14 @@ chr21 10382389 . A ATT 30 . AC=2;AF=0.500;AN=2 GT 1/1 ./. chr21 10388233 . GGAA G 30 . AC=1;AF=0.250;AN=2 GT 0/1 ./. chr21 10804284 . T TGC 30 . AC=1;AF=0.250;AN=2 GT 0/1 ./. chr21 13255296 . A G 30 . AC=2;AF=0.500;AN=2 GT 1/1 ./. -chr21 13255297 . AAA A 30 . AC=2;AF=0.500;AN=2 GT 1/1 ./. +chr21 13255296 . AAA A 30 . AC=2;AF=0.500;AN=2 GT 1/1 ./. chr21 39583817 . CTCCCTTCCCTTCCCTTCCCTTCCCTTCCCTTCCCTTCCCTTCCCT C 30 . AC=3;AF=0.750;AN=4 GT 1/1 0/1 +chr21 39584859 . CAA GAA 30 . AC=3;AF=0.750;AN=4 GT 1/1 0/1 +chr21 39584948 . A AC 30 . AC=1;AF=0.250;AN=4 GT 0/0 0/1 +chr21 39584950 . C G 30 . AC=2;AF=0.500;AN=4 GT 1/1 0/0 +chr21 39584951 . CA C 30 . AC=1;AF=0.250;AN=4 GT 0/0 0/1 +chr21 39584953 . AA AG 30 . AC=2;AF=0.500;AN=4 GT 1/1 0/0 +chr21 39586239 . CAATT CAATTA 30 . AC=1;AF=0.250;AN=4 GT 0/0 0/1 +chr21 39586240 . AATTA AATT 30 . AC=1;AF=0.250;AN=4 GT 0/0 0/1 +chr21 39586243 . TAAAA TAGAA 30 . AC=1;AF=0.250;AN=4 GT 0/1 0/0 +chr21 39586243 . TAAAA T 30 . AC=1;AF=0.250;AN=4 GT 0/1 0/0 \ No newline at end of file diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants/expected_left_align_hg38_split_multiallelics.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants/expected_left_align_hg38_split_multiallelics.vcf index 52f63f0419b..7d3b81cc99f 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants/expected_left_align_hg38_split_multiallelics.vcf +++ b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants/expected_left_align_hg38_split_multiallelics.vcf @@ -37,5 +37,14 @@ chr21 10382389 . A ATT 30 . AC=2;AF=0.500;AN=2 GT 1/1 ./. chr21 10388233 . GGAA G 30 . AC=1;AF=0.250;AN=2 GT 0/1 ./. chr21 10804284 . T TGC 30 . AC=1;AF=0.250;AN=2 GT 0/1 ./. chr21 13255296 . A G 30 . AC=2;AF=0.500;AN=2 GT 1/1 ./. -chr21 13255297 . AAA A 30 . AC=2;AF=0.500;AN=2 GT 1/1 ./. +chr21 13255296 . AAA A 30 . AC=2;AF=0.500;AN=2 GT 1/1 ./. chr21 39583817 . CTCCCTTCCCTTCCCTTCCCTTCCCTTCCCTTCCCTTCCCTTCCCT C 30 . AC=3;AF=0.750;AN=4 GT 1/1 0/1 +chr21 39584859 . C G 30 . AC=3;AF=0.750;AN=4 GT 1/1 0/1 +chr21 39584948 . A AC 30 . AC=1;AF=0.250;AN=4 GT 0/0 0/1 +chr21 39584950 . C G 30 . AC=2;AF=0.500;AN=4 GT 1/1 0/0 +chr21 39584951 . CA C 30 . AC=1;AF=0.250;AN=4 GT 0/0 0/1 +chr21 39584954 . A G 30 . AC=2;AF=0.500;AN=4 GT 1/1 0/0 +chr21 39586243 . TAAAA T 30 . AC=1;AF=0.250;AN=4 GT 0/1 0/0 +chr21 39586243 . TA T 30 . AC=1;AF=0.250;AN=4 GT 0/0 0/1 +chr21 39586243 . T TA 30 . AC=1;AF=0.250;AN=4 GT 0/0 0/1 +chr21 39586245 . A G 30 . AC=1;AF=0.250;AN=4 GT 0/1 0/0 \ No newline at end of file diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants/expected_left_align_hg38_split_multiallelics_keepOrigAC.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants/expected_left_align_hg38_split_multiallelics_keepOrigAC.vcf index 4007fb39696..59491829659 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants/expected_left_align_hg38_split_multiallelics_keepOrigAC.vcf +++ b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants/expected_left_align_hg38_split_multiallelics_keepOrigAC.vcf @@ -40,5 +40,14 @@ chr21 10382389 . A ATT 30 . AC=2;AF=0.500;AN=2 GT 1/1 ./. chr21 10388233 . GGAA G 30 . AC=1;AF=0.250;AN=2 GT 0/1 ./. chr21 10804284 . T TGC 30 . AC=1;AF=0.250;AN=2 GT 0/1 ./. chr21 13255296 . A G 30 . AC=2;AF=0.500;AN=2 GT 1/1 ./. -chr21 13255297 . AAA A 30 . AC=2;AF=0.500;AN=2 GT 1/1 ./. +chr21 13255296 . AAA A 30 . AC=2;AF=0.500;AN=2 GT 1/1 ./. chr21 39583817 . CTCCCTTCCCTTCCCTTCCCTTCCCTTCCCTTCCCTTCCCTTCCCT C 30 . AC=3;AF=0.750;AN=4 GT 1/1 0/1 +chr21 39584859 . C G 30 . AC=3;AF=0.750;AN=4 GT 1/1 0/1 +chr21 39584948 . A AC 30 . AC=1;AC_Orig=1;AF=0.250;AF_Orig=0.250;AN=4;AN_Orig=4 GT 0/0 0/1 +chr21 39584950 . C G 30 . AC=2;AC_Orig=2;AF=0.500;AF_Orig=0.500;AN=4;AN_Orig=4 GT 1/1 0/0 +chr21 39584951 . CA C 30 . AC=1;AC_Orig=1;AF=0.250;AF_Orig=0.250;AN=4;AN_Orig=4 GT 0/0 0/1 +chr21 39584954 . A G 30 . AC=2;AC_Orig=2;AF=0.500;AF_Orig=0.500;AN=4;AN_Orig=4 GT 1/1 0/0 +chr21 39586243 . TAAAA T 30 . AC=1;AF=0.250;AN=4 GT 0/1 0/0 +chr21 39586243 . TA T 30 . AC=1;AF=0.250;AN=4 GT 0/0 0/1 +chr21 39586243 . T TA 30 . AC=1;AF=0.250;AN=4 GT 0/0 0/1 +chr21 39586245 . A G 30 . AC=1;AF=0.250;AN=4 GT 0/1 0/0 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants/test_left_align_hg38.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants/test_left_align_hg38.vcf index 410fa01cd77..87e4f647d85 100755 --- a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants/test_left_align_hg38.vcf +++ b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants/test_left_align_hg38.vcf @@ -37,3 +37,10 @@ chr21 10804284 . T TGC 30 . AC=1;AF=0.250;AN=2 GT 0/1 ./. chr21 13255296 . A G 30 . AC=2;AF=0.500;AN=2 GT 1/1 ./. chr21 13255301 . AAA A 30 . AC=2;AF=0.500;AN=2 GT 1/1 ./. chr21 39584006 . CTTCCCTTCCCTTCCCTTCCCTTCCCTTCCCTTCCCTTCCCTTCCC C 30 . AC=3;AF=0.750;AN=4 GT 1/1 0/1 +chr21 39584859 . CAA GAA 30 . AC=3;AF=0.750;AN=4 GT 1/1 0/1 +chr21 39584950 . C G,CC 30 . AC=2,1;AF=0.500,0.250;AN=4 GT 1/1 0/2 +chr21 39584953 . AA AG,A 30 . AC=2,1;AF=0.500,0.250;AN=4 GT 1/1 0/2 +chr21 39586243 . TAAAA TAGAA 30 . AC=1;AF=0.250;AN=4 GT 0/1 0/0 +chr21 39586243 . TAAAA T 30 . AC=1;AF=0.250;AN=4 GT 0/1 0/0 +chr21 39586243 . TAAAA TAAA 30 . AC=1;AF=0.250;AN=4 GT 0/0 0/1 +chr21 39586243 . TAAAA TAAAAA 30 . AC=1;AF=0.250;AN=4 GT 0/0 0/1 From 554373afa1bc371b907b4fca9995a9a86ede2f0e Mon Sep 17 00:00:00 2001 From: Chris Kachulis Date: Fri, 19 Dec 2025 21:33:31 -0500 Subject: [PATCH 2/6] . --- .../tools/walkers/variantutils/LeftAlignAndTrimVariants.java | 2 +- .../LeftAlignAndTrimVariants/expected_left_align_hg38.vcf | 2 ++ .../expected_left_align_hg38_maxIndelSize296.vcf | 2 ++ .../expected_left_align_hg38_maxIndelSize342.vcf | 2 ++ .../expected_left_align_hg38_notrim.vcf | 2 ++ .../expected_left_align_hg38_notrim_split_multiallelics.vcf | 4 +++- .../expected_left_align_hg38_split_multiallelics.vcf | 4 +++- ...xpected_left_align_hg38_split_multiallelics_keepOrigAC.vcf | 2 ++ .../LeftAlignAndTrimVariants/test_left_align_hg38.vcf | 2 ++ 9 files changed, 19 insertions(+), 3 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants.java index fa6e5ca4a24..b38853e6134 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants.java @@ -246,7 +246,7 @@ public void apply(VariantContext vc, ReadsContext readsContext, ReferenceContext "Indel is too long", indelLength, splitVariant.getContig(), splitVariant.getStart(), indelLength)); realignedVariants.add(splitVariant); } else { - final int distanceToLastVariant = (lastVariantWritten != null && splitVariant.contigsMatch(lastVariantWritten)) ? splitVariant.getStart() - lastVariantWritten.getEnd() : Integer.MAX_VALUE; + final int distanceToLastVariant = (lastVariantWritten != null && splitVariant.contigsMatch(lastVariantWritten)) ? splitVariant.getStart() - lastVariantWritten.getStart() : Integer.MAX_VALUE; realignedVariants.add(GATKVariantContextUtils.leftAlignAndTrim(splitVariant, ref, Math.min(maxLeadingBases, distanceToLastVariant), !dontTrimAlleles)); } } diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants/expected_left_align_hg38.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants/expected_left_align_hg38.vcf index 5b3ab8afcc9..d87372b77ca 100755 --- a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants/expected_left_align_hg38.vcf +++ b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants/expected_left_align_hg38.vcf @@ -44,3 +44,5 @@ chr21 39586243 . TAAAA T 30 . AC=1;AF=0.250;AN=4 GT 0/1 0/0 chr21 39586243 . TA T 30 . AC=1;AF=0.250;AN=4 GT 0/0 0/1 chr21 39586243 . T TA 30 . AC=1;AF=0.250;AN=4 GT 0/0 0/1 chr21 39586245 . A G 30 . AC=1;AF=0.250;AN=4 GT 0/1 0/0 +chr21 39586743 . TACC T 30 . AC=1;AF=0.250;AN=4 GT 0/1 0/0 +chr21 39586744 . AC A 30 . AC=1;AF=0.250;AN=4 GT 0/1 0/0 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants/expected_left_align_hg38_maxIndelSize296.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants/expected_left_align_hg38_maxIndelSize296.vcf index 94be82d58e5..20289443963 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants/expected_left_align_hg38_maxIndelSize296.vcf +++ b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants/expected_left_align_hg38_maxIndelSize296.vcf @@ -44,3 +44,5 @@ chr21 39586243 . TAAAA T 30 . AC=1;AF=0.250;AN=4 GT 0/1 0/0 chr21 39586243 . TA T 30 . AC=1;AF=0.250;AN=4 GT 0/0 0/1 chr21 39586243 . T TA 30 . AC=1;AF=0.250;AN=4 GT 0/0 0/1 chr21 39586245 . A G 30 . AC=1;AF=0.250;AN=4 GT 0/1 0/0 +chr21 39586743 . TACC T 30 . AC=1;AF=0.250;AN=4 GT 0/1 0/0 +chr21 39586744 . AC A 30 . AC=1;AF=0.250;AN=4 GT 0/1 0/0 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants/expected_left_align_hg38_maxIndelSize342.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants/expected_left_align_hg38_maxIndelSize342.vcf index 4ccb33ea38a..4d533e66553 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants/expected_left_align_hg38_maxIndelSize342.vcf +++ b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants/expected_left_align_hg38_maxIndelSize342.vcf @@ -44,3 +44,5 @@ chr21 39586243 . TAAAA T 30 . AC=1;AF=0.250;AN=4 GT 0/1 0/0 chr21 39586243 . TA T 30 . AC=1;AF=0.250;AN=4 GT 0/0 0/1 chr21 39586243 . T TA 30 . AC=1;AF=0.250;AN=4 GT 0/0 0/1 chr21 39586245 . A G 30 . AC=1;AF=0.250;AN=4 GT 0/1 0/0 +chr21 39586743 . TACC T 30 . AC=1;AF=0.250;AN=4 GT 0/1 0/0 +chr21 39586744 . AC A 30 . AC=1;AF=0.250;AN=4 GT 0/1 0/0 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants/expected_left_align_hg38_notrim.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants/expected_left_align_hg38_notrim.vcf index cfd092b0892..bc3b85de197 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants/expected_left_align_hg38_notrim.vcf +++ b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants/expected_left_align_hg38_notrim.vcf @@ -44,3 +44,5 @@ chr21 39586239 . CAATT CAATTA 30 . AC=1;AF=0.250;AN=4 GT 0/0 0/1 chr21 39586240 . AATTA AATT 30 . AC=1;AF=0.250;AN=4 GT 0/0 0/1 chr21 39586243 . TAAAA TAGAA 30 . AC=1;AF=0.250;AN=4 GT 0/1 0/0 chr21 39586243 . TAAAA T 30 . AC=1;AF=0.250;AN=4 GT 0/1 0/0 +chr21 39586743 . TACC T 30 . AC=1;AF=0.250;AN=4 GT 0/1 0/0 +chr21 39586744 . AC A 30 . AC=1;AF=0.250;AN=4 GT 0/1 0/0 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants/expected_left_align_hg38_notrim_split_multiallelics.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants/expected_left_align_hg38_notrim_split_multiallelics.vcf index d88b09e05ff..75d2f190dd8 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants/expected_left_align_hg38_notrim_split_multiallelics.vcf +++ b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants/expected_left_align_hg38_notrim_split_multiallelics.vcf @@ -47,4 +47,6 @@ chr21 39584953 . AA AG 30 . AC=2;AF=0.500;AN=4 GT 1/1 0/0 chr21 39586239 . CAATT CAATTA 30 . AC=1;AF=0.250;AN=4 GT 0/0 0/1 chr21 39586240 . AATTA AATT 30 . AC=1;AF=0.250;AN=4 GT 0/0 0/1 chr21 39586243 . TAAAA TAGAA 30 . AC=1;AF=0.250;AN=4 GT 0/1 0/0 -chr21 39586243 . TAAAA T 30 . AC=1;AF=0.250;AN=4 GT 0/1 0/0 \ No newline at end of file +chr21 39586243 . TAAAA T 30 . AC=1;AF=0.250;AN=4 GT 0/1 0/0 +chr21 39586743 . TACC T 30 . AC=1;AF=0.250;AN=4 GT 0/1 0/0 +chr21 39586744 . AC A 30 . AC=1;AF=0.250;AN=4 GT 0/1 0/0 \ No newline at end of file diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants/expected_left_align_hg38_split_multiallelics.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants/expected_left_align_hg38_split_multiallelics.vcf index 7d3b81cc99f..7721f6271a1 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants/expected_left_align_hg38_split_multiallelics.vcf +++ b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants/expected_left_align_hg38_split_multiallelics.vcf @@ -47,4 +47,6 @@ chr21 39584954 . A G 30 . AC=2;AF=0.500;AN=4 GT 1/1 0/0 chr21 39586243 . TAAAA T 30 . AC=1;AF=0.250;AN=4 GT 0/1 0/0 chr21 39586243 . TA T 30 . AC=1;AF=0.250;AN=4 GT 0/0 0/1 chr21 39586243 . T TA 30 . AC=1;AF=0.250;AN=4 GT 0/0 0/1 -chr21 39586245 . A G 30 . AC=1;AF=0.250;AN=4 GT 0/1 0/0 \ No newline at end of file +chr21 39586245 . A G 30 . AC=1;AF=0.250;AN=4 GT 0/1 0/0 +chr21 39586743 . TACC T 30 . AC=1;AF=0.250;AN=4 GT 0/1 0/0 +chr21 39586744 . AC A 30 . AC=1;AF=0.250;AN=4 GT 0/1 0/0 \ No newline at end of file diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants/expected_left_align_hg38_split_multiallelics_keepOrigAC.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants/expected_left_align_hg38_split_multiallelics_keepOrigAC.vcf index 59491829659..80194bc7d8c 100644 --- a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants/expected_left_align_hg38_split_multiallelics_keepOrigAC.vcf +++ b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants/expected_left_align_hg38_split_multiallelics_keepOrigAC.vcf @@ -51,3 +51,5 @@ chr21 39586243 . TAAAA T 30 . AC=1;AF=0.250;AN=4 GT 0/1 0/0 chr21 39586243 . TA T 30 . AC=1;AF=0.250;AN=4 GT 0/0 0/1 chr21 39586243 . T TA 30 . AC=1;AF=0.250;AN=4 GT 0/0 0/1 chr21 39586245 . A G 30 . AC=1;AF=0.250;AN=4 GT 0/1 0/0 +chr21 39586743 . TACC T 30 . AC=1;AF=0.250;AN=4 GT 0/1 0/0 +chr21 39586744 . AC A 30 . AC=1;AF=0.250;AN=4 GT 0/1 0/0 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants/test_left_align_hg38.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants/test_left_align_hg38.vcf index 87e4f647d85..26690f59eaf 100755 --- a/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants/test_left_align_hg38.vcf +++ b/src/test/resources/org/broadinstitute/hellbender/tools/walkers/variantutils/LeftAlignAndTrimVariants/test_left_align_hg38.vcf @@ -44,3 +44,5 @@ chr21 39586243 . TAAAA TAGAA 30 . AC=1;AF=0.250;AN=4 GT 0/1 0/0 chr21 39586243 . TAAAA T 30 . AC=1;AF=0.250;AN=4 GT 0/1 0/0 chr21 39586243 . TAAAA TAAA 30 . AC=1;AF=0.250;AN=4 GT 0/0 0/1 chr21 39586243 . TAAAA TAAAAA 30 . AC=1;AF=0.250;AN=4 GT 0/0 0/1 +chr21 39586743 . TACC T 30 . AC=1;AF=0.250;AN=4 GT 0/1 0/0 +chr21 39586745 . CC C 30 . AC=1;AF=0.250;AN=4 GT 0/1 0/0 From 3a390fa1d9171269896a50614a8c359366bc56aa Mon Sep 17 00:00:00 2001 From: Chris Kachulis Date: Mon, 5 Jan 2026 12:18:11 -0500 Subject: [PATCH 3/6] ubuntu-slim --- .github/workflows/gatk-tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/gatk-tests.yml b/.github/workflows/gatk-tests.yml index 54f0945106c..18b21f6680f 100644 --- a/.github/workflows/gatk-tests.yml +++ b/.github/workflows/gatk-tests.yml @@ -288,7 +288,7 @@ jobs: #Run our various targeted medium scale wdl wiring tests wdlTests: - runs-on: ubuntu-latest + runs-on: ubuntu-slim strategy: matrix: wdlTest: [ 'RUN_CNV_GERMLINE_COHORT_WDL', 'RUN_CNV_GERMLINE_CASE_WDL', 'RUN_CNV_SOMATIC_WDL', 'RUN_M2_WDL', 'RUN_VCF_SITE_LEVEL_FILTERING_WDL' ] From 4375c5a699006407a52a9631a3102c9769ba4f1e Mon Sep 17 00:00:00 2001 From: Chris Kachulis Date: Mon, 5 Jan 2026 12:29:24 -0500 Subject: [PATCH 4/6] purge more disk --- .github/actions/purge-runner-disk/action.yml | 3 +++ .github/workflows/gatk-tests.yml | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/actions/purge-runner-disk/action.yml b/.github/actions/purge-runner-disk/action.yml index d9bd987bbe9..12b69a88923 100644 --- a/.github/actions/purge-runner-disk/action.yml +++ b/.github/actions/purge-runner-disk/action.yml @@ -9,4 +9,7 @@ runs: run: | sudo rm -rf /usr/share/dotnet sudo rm -rf "$AGENT_TOOLSDIRECTORY" + sudo rm -rf /usr/local/lib/android + sudo rm -rf /opt/ghc + sudo docker image prune --all --force shell: bash diff --git a/.github/workflows/gatk-tests.yml b/.github/workflows/gatk-tests.yml index 18b21f6680f..54f0945106c 100644 --- a/.github/workflows/gatk-tests.yml +++ b/.github/workflows/gatk-tests.yml @@ -288,7 +288,7 @@ jobs: #Run our various targeted medium scale wdl wiring tests wdlTests: - runs-on: ubuntu-slim + runs-on: ubuntu-latest strategy: matrix: wdlTest: [ 'RUN_CNV_GERMLINE_COHORT_WDL', 'RUN_CNV_GERMLINE_CASE_WDL', 'RUN_CNV_SOMATIC_WDL', 'RUN_M2_WDL', 'RUN_VCF_SITE_LEVEL_FILTERING_WDL' ] From 9278074eed5826bdc6754e34b522578357101a66 Mon Sep 17 00:00:00 2001 From: Chris Kachulis Date: Mon, 5 Jan 2026 13:20:28 -0500 Subject: [PATCH 5/6] debug purge-runner-disk --- .github/actions/purge-runner-disk/action.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/actions/purge-runner-disk/action.yml b/.github/actions/purge-runner-disk/action.yml index 12b69a88923..dc86fbd7794 100644 --- a/.github/actions/purge-runner-disk/action.yml +++ b/.github/actions/purge-runner-disk/action.yml @@ -7,9 +7,12 @@ runs: #https://github.com/actions/runner-images/issues/2840 - name: Reclaim some runner disk space run: | + sudo df -h + echo "$AGENT_TOOLSDIRECTORY" sudo rm -rf /usr/share/dotnet sudo rm -rf "$AGENT_TOOLSDIRECTORY" sudo rm -rf /usr/local/lib/android sudo rm -rf /opt/ghc sudo docker image prune --all --force + sudo df -h shell: bash From ca6a6c7a04dfa26536ecffb83da574388f2c165b Mon Sep 17 00:00:00 2001 From: Chris Kachulis Date: Wed, 7 Jan 2026 09:31:54 -0500 Subject: [PATCH 6/6] debug --- .../germline/run_cnv_germline_workflows.sh | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/scripts/cnv_cromwell_tests/germline/run_cnv_germline_workflows.sh b/scripts/cnv_cromwell_tests/germline/run_cnv_germline_workflows.sh index 97305996d54..50dd3bcd1f0 100644 --- a/scripts/cnv_cromwell_tests/germline/run_cnv_germline_workflows.sh +++ b/scripts/cnv_cromwell_tests/germline/run_cnv_germline_workflows.sh @@ -1,5 +1,5 @@ #!/bin/bash -l -set -e +#set -e MODE=$1 # We split up the test into CASE in COHORT to reduce overall test runtime @@ -42,7 +42,10 @@ echo "Running ========" # Cohort WES w/ explicit GC correction if [[ "$MODE" == "COHORT" ]]; then java -jar ${CROMWELL_JAR} run $WORKING_DIR/gatk/scripts/cnv_wdl/germline/cnv_germline_cohort_workflow.wdl -i cnv_germline_cohort_workflow_mod.json ; - find /home/runner/work/gatk/gatk/scripts/cnv_cromwell_tests/germline/cromwell-executions/CNVGermlineCohortWorkflow/ | grep 'stdout\|stderr' | xargs cat ; + if [[ "$?" != 0 ]]; then + find /home/runner/work/gatk/gatk/scripts/cnv_cromwell_tests/germline/cromwell-executions/CNVGermlineCohortWorkflow/ | grep 'stdout\|stderr' | xargs cat ; + exit 1 ; + fi fi # Scattered case WES w/ explicit GC correction