Skip to content

Commit d5784b5

Browse files
authored
feat: add input assembly parameter for /gnomad_vcf_to_protein (#626)
close #625
1 parent 4029c9e commit d5784b5

File tree

3 files changed

+43
-5
lines changed

3 files changed

+43
-5
lines changed

src/variation/gnomad_vcf_to_protein_variation.py

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
"""Module for translating VCF-like to protein VRS Allele representation"""
22

33
import datetime
4+
from typing import Literal
45

56
from cool_seq_tool.handlers import SeqRepoAccess
67
from cool_seq_tool.mappers import ManeTranscript
@@ -16,6 +17,7 @@
1617
from variation.schemas.classification_response_schema import Nomenclature
1718
from variation.schemas.gnomad_vcf_to_protein_schema import GnomadVcfToProteinService
1819
from variation.schemas.normalize_response_schema import ServiceMeta
20+
from variation.schemas.service_schema import ClinVarAssembly
1921
from variation.schemas.token_response_schema import AltType
2022
from variation.schemas.validation_response_schema import ValidationResult
2123
from variation.tokenize import Tokenize
@@ -183,12 +185,16 @@ def __init__(
183185
self.gene_normalizer = gene_normalizer
184186

185187
async def _get_valid_result(
186-
self, vcf_query: str, warnings: list
188+
self,
189+
vcf_query: str,
190+
warnings: list,
191+
input_assembly: Literal[ClinVarAssembly.GRCH37, ClinVarAssembly.GRCH38] | None,
187192
) -> list[ValidationResult]:
188193
"""Get gnomad vcf validation summary
189194
190195
:param vcf_query: gnomad vcf input query
191196
:param warnings: List of warnings
197+
:param input_assembly: Assembly used for `q`.
192198
:raises GnomadVcfToProteinError: If no tokens, classifications, or valid results
193199
are found. Also if ``vcf_query`` is not a gnomAD VCF-like query.
194200
:return: List of valid results for a gnomad VCF query
@@ -207,7 +213,9 @@ async def _get_valid_result(
207213
msg = f"{vcf_query} is not a gnomAD VCF-like query (`chr-pos-ref-alt`)"
208214
raise GnomadVcfToProteinError(msg)
209215

210-
validation_summary = await self.validator.perform(classification)
216+
validation_summary = await self.validator.perform(
217+
classification, input_assembly=input_assembly
218+
)
211219
valid_results = validation_summary.valid_results
212220
if valid_results:
213221
# Temporary work around until issue-490 complete
@@ -434,12 +442,18 @@ def _get_gene_context(self, gene: str) -> MappableConcept | None:
434442
else None
435443
)
436444

437-
async def gnomad_vcf_to_protein(self, vcf_query: str) -> GnomadVcfToProteinService:
445+
async def gnomad_vcf_to_protein(
446+
self,
447+
vcf_query: str,
448+
input_assembly: Literal[ClinVarAssembly.GRCH37, ClinVarAssembly.GRCH38]
449+
| None = None,
450+
) -> GnomadVcfToProteinService:
438451
"""Get protein consequence for gnomAD-VCF like expression
439452
Assumes input query uses GRCh38 representation
440453
441454
:param vcf_query: gnomAD VCF-like expression (``chr-pos-ref-alt``) on the GRCh38
442455
assembly
456+
:param input_assembly: Assembly used for `q`.
443457
:return: GnomadVcfToProteinService containing protein VRS Allele, if translation
444458
was successful
445459
"""
@@ -448,7 +462,9 @@ async def gnomad_vcf_to_protein(self, vcf_query: str) -> GnomadVcfToProteinServi
448462

449463
# First we need to validate the input query
450464
try:
451-
valid_result = await self._get_valid_result(vcf_query, warnings)
465+
valid_result = await self._get_valid_result(
466+
vcf_query, warnings, input_assembly=input_assembly
467+
)
452468
except GnomadVcfToProteinError as e:
453469
warnings.append(str(e))
454470
return GnomadVcfToProteinService(

src/variation/main.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -365,14 +365,24 @@ def vrs_python_translate_from(
365365
)
366366
async def gnomad_vcf_to_protein(
367367
q: Annotated[str, Query(description=q_description)],
368+
input_assembly: Annotated[
369+
Literal[ClinVarAssembly.GRCH37] | Literal[ClinVarAssembly.GRCH38] | None,
370+
Query(
371+
description="Assembly used for `q`.",
372+
),
373+
] = None,
368374
) -> GnomadVcfToProteinService:
369375
"""Return VRS representation for variation on protein coordinate.
370376
371377
:param q: gnomad VCF to normalize to protein variation.
378+
:param input_assembly: Assembly used for `q`.
372379
:return: GnomadVcfToProteinService for variation
373380
"""
374381
q = unquote(q.strip())
375-
return await query_handler.gnomad_vcf_to_protein_handler.gnomad_vcf_to_protein(q)
382+
return await query_handler.gnomad_vcf_to_protein_handler.gnomad_vcf_to_protein(
383+
q,
384+
input_assembly=input_assembly,
385+
)
376386

377387

378388
hgvs_dup_del_mode_decsr = (

tests/test_gnomad_vcf_to_protein.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from ga4gh.vrs import models
55

66
from tests.conftest import assertion_checks
7+
from variation.schemas.service_schema import ClinVarAssembly
78

89

910
@pytest.fixture(scope="module")
@@ -377,6 +378,17 @@ async def test_delins(test_handler, delins_pos, delins_neg):
377378
assert resp.warnings == []
378379

379380

381+
@pytest.mark.asyncio
382+
async def test_input_assembly(test_handler):
383+
"""Test that input assembly works correctly (issue #625)"""
384+
resp = await test_handler.gnomad_vcf_to_protein(
385+
"1-35227334-G-A", input_assembly=ClinVarAssembly.GRCH37
386+
)
387+
assert resp.variation
388+
assert resp.gene_context
389+
assert resp.gene_context.name == "GJB4"
390+
391+
380392
@pytest.mark.asyncio
381393
async def test_invalid(test_handler):
382394
"""Test that invalid queries return correct response"""

0 commit comments

Comments
 (0)