-
Notifications
You must be signed in to change notification settings - Fork 8
Expand file tree
/
Copy pathncbi_links_service.py
More file actions
74 lines (59 loc) · 2.42 KB
/
ncbi_links_service.py
File metadata and controls
74 lines (59 loc) · 2.42 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
"""Service for generating NCBI link data from BRC Analytics catalog."""
import json
import logging
from pathlib import Path
from typing import Any, Dict, List
from app.core.config import get_settings
logger = logging.getLogger(__name__)
class NCBILinksService:
"""Service to generate link files for NCBI cross-referencing."""
def __init__(self, catalog_path: str | None = None):
settings = get_settings()
self.catalog_path = Path(catalog_path or settings.CATALOG_PATH)
self.base_url = "https://brc-analytics.org"
def _load_json_file(self, filename: str) -> List[Dict[str, Any]]:
file_path = self.catalog_path / filename
try:
with open(file_path, "r") as f:
return json.load(f)
except FileNotFoundError:
logger.error(f"Catalog file not found: {file_path}")
return []
except json.JSONDecodeError as e:
logger.error(f"Error parsing JSON from {file_path}: {e}")
return []
def get_organism_links(self) -> List[Dict[str, str]]:
organisms = self._load_json_file("organisms.json")
links = []
for org in organisms:
taxonomy_id = org.get("ncbiTaxonomyId")
if not taxonomy_id:
continue
links.append(
{
"ncbiTaxonomyId": taxonomy_id,
"url": f"{self.base_url}/data/organisms/{taxonomy_id}",
"scientificName": org.get("taxonomicLevelSpecies"),
"commonName": org.get("commonName"),
}
)
logger.info(f"Generated {len(links)} organism links")
return links
def get_assembly_links(self) -> List[Dict[str, str]]:
assemblies = self._load_json_file("assemblies.json")
links = []
for assembly in assemblies:
accession = assembly.get("accession")
if not accession:
continue
url_accession = accession.replace(".", "_")
links.append(
{
"accession": accession,
"url": f"{self.base_url}/data/assemblies/{url_accession}",
"ncbiTaxonomyId": assembly.get("ncbiTaxonomyId"),
"scientificName": assembly.get("taxonomicLevelSpecies"),
}
)
logger.info(f"Generated {len(links)} assembly links")
return links