Skip to content

Commit da230a5

Browse files
authored
feat: expose cool-seq-tool feature overlap endpoint (#523)
For issue #521
1 parent dda730e commit da230a5

File tree

4 files changed

+141
-6
lines changed

4 files changed

+141
-6
lines changed

Pipfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,4 +24,4 @@ pydantic = "==1.*"
2424
gene-normalizer = "~=0.1.36"
2525
boto3 = "*"
2626
"ga4gh.vrsatile.pydantic" = "~=0.0.13"
27-
cool-seq-tool = "~=0.1.14.dev0"
27+
cool-seq-tool = "~=0.1.14.dev3"

setup.cfg

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ install_requires =
3737
gene-normalizer ~= 0.1.36
3838
boto3
3939
ga4gh.vrsatile.pydantic ~= 0.0.13
40-
cool-seq-tool ~= 0.1.14.dev0
40+
cool-seq-tool ~= 0.1.14.dev3
4141

4242
tests_require =
4343
pytest

variation/main.py

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,10 @@
88
import pkg_resources
99
import python_jsonschema_objects
1010
from bioutils.exceptions import BioutilsError
11+
from cool_seq_tool.data_sources.feature_overlap import (
12+
FeatureOverlap,
13+
FeatureOverlapError,
14+
)
1115
from cool_seq_tool.schemas import Assembly, ResidueMode
1216
from fastapi import FastAPI, Query
1317
from ga4gh.vrs import models
@@ -35,6 +39,7 @@
3539
)
3640
from variation.schemas.service_schema import (
3741
ClinVarAssembly,
42+
FeatureOverlapService,
3843
ToCdnaService,
3944
ToGenomicService,
4045
)
@@ -59,9 +64,11 @@ class Tag(Enum):
5964
VRS_PYTHON = "VRS-Python"
6065
TO_COPY_NUMBER_VARIATION = "To Copy Number Variation"
6166
ALIGNMENT_MAPPER = "Alignment Mapper"
67+
FEATURE_OVERLAP = "Feature Overlap"
6268

6369

6470
query_handler = QueryHandler()
71+
feature_overlap = FeatureOverlap(query_handler.seqrepo_access)
6572

6673

6774
app = FastAPI(
@@ -841,3 +848,70 @@ async def p_to_g(
841848
warnings=[w] if w else [],
842849
service_meta=ServiceMeta(version=__version__, response_datetime=datetime.now()),
843850
)
851+
852+
853+
@app.get(
854+
"/variation/feature_overlap",
855+
summary="Given GRCh38 genomic data, find the overlapping MANE features (gene and cds)",
856+
response_description="A response to a validly-formed query.",
857+
description="The genomic data is specified as a sequence location by `chromosome`, `start`, `end`. All CDS regions with which the input sequence location has nonzero base pair overlap will be returned.",
858+
response_model=FeatureOverlapService,
859+
tags=[Tag.FEATURE_OVERLAP],
860+
)
861+
def get_feature_overlap(
862+
start: int = Query(..., description="GRCh38 start position"),
863+
end: int = Query(..., description="GRCh38 end position"),
864+
chromosome: Optional[str] = Query(
865+
None,
866+
description="Chromosome. 1..22, X, or Y. If not provided, must provide `identifier`. If both `chromosome` and `identifier` are provided, `chromosome` will be used.",
867+
),
868+
identifier: Optional[str] = Query(
869+
None,
870+
description="Genomic identifier on GRCh38 assembly. If not provided, must provide `chromosome`. If both `chromosome` and `identifier` are provided, `chromosome` will be used.",
871+
),
872+
residue_mode: ResidueMode = Query(
873+
ResidueMode.RESIDUE, description="Residue mode for `start` and `end`"
874+
),
875+
) -> FeatureOverlapService:
876+
"""Given GRCh38 genomic data, find the overlapping MANE features (gene and cds)
877+
The genomic data is specified as a sequence location by `chromosome`, `start`,
878+
`end`. All CDS regions with which the input sequence location has nonzero base
879+
pair overlap will be returned.
880+
881+
:param start: GRCh38 start position
882+
:param end: GRCh38 end position
883+
:param chromosome: Chromosome. 1..22, X, or Y. If not provided, must provide
884+
`identifier`. If both `chromosome` and `identifier` are provided,
885+
`chromosome` will be used.
886+
:param identifier: Genomic identifier on GRCh38 assembly. If not provided, must
887+
provide `chromosome`. If both `chromosome` and `identifier` are provided,
888+
`chromosome` will be used.
889+
:param residue_mode: Residue mode for `start` and `end`
890+
:return: MANE feature (gene/cds) overlap data represented as a dict. The
891+
dictionary will be keyed by genes which overlap the input sequence location.
892+
Each gene contains a list of the overlapping CDS regions with the beginning
893+
and end of the input sequence location's overlap with each
894+
"""
895+
try:
896+
overlap_data = feature_overlap.get_grch38_mane_gene_cds_overlap(
897+
start=start,
898+
end=end,
899+
chromosome=chromosome,
900+
identifier=identifier,
901+
residue_mode=residue_mode,
902+
)
903+
errors = []
904+
except FeatureOverlapError as e:
905+
errors = [str(e)]
906+
overlap_data = None
907+
except Exception as e:
908+
logger.error("Unhandled exception: %s", str(e))
909+
errors = ["Unhandled exception. See logs for more information."]
910+
overlap_data = None
911+
return FeatureOverlapService(
912+
feature_overlap=overlap_data,
913+
warnings=errors,
914+
service_meta_=ServiceMeta(
915+
version=__version__, response_datetime=datetime.now()
916+
),
917+
)

variation/schemas/service_schema.py

Lines changed: 65 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,18 @@
11
"""Module containing schemas for services"""
22
from enum import Enum
3-
from typing import Any, Dict, Type
3+
from typing import Any, Dict, List, Optional, Type
44

5-
from cool_seq_tool.schemas import ToCdnaService as ToCdna
6-
from cool_seq_tool.schemas import ToGenomicService as ToGenomic
5+
from cool_seq_tool.schemas import (
6+
CdsOverlap,
7+
)
8+
from cool_seq_tool.schemas import (
9+
ToCdnaService as ToCdna,
10+
)
11+
from cool_seq_tool.schemas import (
12+
ToGenomicService as ToGenomic,
13+
)
714

8-
from variation.schemas.normalize_response_schema import ServiceMeta
15+
from variation.schemas.normalize_response_schema import ServiceMeta, ServiceResponse
916

1017

1118
class ClinVarAssembly(str, Enum):
@@ -82,3 +89,57 @@ def schema_extra(schema: Dict[str, Any], model: Type["ToCdnaService"]) -> None:
8289
"url": "https://github.com/cancervariants/variation-normalization",
8390
},
8491
}
92+
93+
94+
class FeatureOverlapService(ServiceResponse):
95+
"""Define model for representing Feature Overlap response"""
96+
97+
feature_overlap: Optional[Dict[str, List[CdsOverlap]]] = None
98+
99+
class Config:
100+
"""Configure model."""
101+
102+
@staticmethod
103+
def schema_extra(
104+
schema: Dict[str, Any], model: Type["FeatureOverlapService"]
105+
) -> None:
106+
"""Configure OpenAPI schema."""
107+
if "title" in schema.keys():
108+
schema.pop("title", None)
109+
for prop in schema.get("properties", {}).values():
110+
prop.pop("title", None)
111+
schema["example"] = {
112+
"feature_overlap": {
113+
"BRAF": [
114+
{
115+
"cds": {
116+
"_id": "ga4gh:VSL._H2ST69A4RkWCSRHOoMv-edt-R45fPdq",
117+
"type": "SequenceLocation",
118+
"sequence_id": "ga4gh:SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul",
119+
"interval": {
120+
"type": "SequenceInterval",
121+
"start": {"value": 140726493, "type": "Number"},
122+
"end": {"value": 140726516, "type": "Number"},
123+
},
124+
},
125+
"overlap": {
126+
"_id": "ga4gh:VSL._H2ST69A4RkWCSRHOoMv-edt-R45fPdq",
127+
"type": "SequenceLocation",
128+
"sequence_id": "ga4gh:SQ.F-LrLMe1SRpfUZHkQmvkVKFEGaoDeHul",
129+
"interval": {
130+
"type": "SequenceInterval",
131+
"start": {"value": 140726493, "type": "Number"},
132+
"end": {"value": 140726516, "type": "Number"},
133+
},
134+
},
135+
}
136+
]
137+
},
138+
"warnings": [],
139+
"service_meta": {
140+
"version": "0.5.4",
141+
"response_datetime": "2022-09-29T15:08:18.696882",
142+
"name": "variation-normalizer",
143+
"url": "https://github.com/cancervariants/variation-normalization",
144+
},
145+
}

0 commit comments

Comments
 (0)