diff --git a/backend/api/app/api/v1/links.py b/backend/api/app/api/v1/links.py new file mode 100644 index 000000000..97506380b --- /dev/null +++ b/backend/api/app/api/v1/links.py @@ -0,0 +1,85 @@ +"""API endpoints for assemblies and organisms links.""" + +from fastapi import APIRouter, Depends, HTTPException + +from app.core.cache import CacheService, CacheTTL +from app.core.dependencies import get_cache_service +from app.services.links_service import LinksService + +router = APIRouter() + + +@router.get("/assemblies/links") +async def get_assemblies_links(cache: CacheService = Depends(get_cache_service)): + """Get all assembly links for cross-referencing.""" + cache_key = "v1:assemblies:links" + + cached = await cache.get(cache_key) + if cached is not None: + return cached + + service = LinksService() + response = service.get_assemblies_links() + await cache.set(cache_key, response, ttl=CacheTTL.ONE_DAY) + + return response + + +@router.get("/assemblies/links/{accession}") +async def get_assembly_link( + accession: str, cache: CacheService = Depends(get_cache_service) +): + """Get a single assembly link by accession.""" + cache_key = f"v1:assemblies:links:{accession}" + + cached = await cache.get(cache_key) + if cached is not None: + return cached + + service = LinksService() + result = service.get_assembly_link(accession) + + if result is None: + raise HTTPException(status_code=404, detail=f"Assembly {accession} not found") + + await cache.set(cache_key, result, ttl=CacheTTL.ONE_DAY) + + return result + + +@router.get("/organisms/links") +async def get_organisms_links(cache: CacheService = Depends(get_cache_service)): + """Get all organism links for cross-referencing.""" + cache_key = "v1:organisms:links" + + cached = await cache.get(cache_key) + if cached is not None: + return cached + + service = LinksService() + response = service.get_organisms_links() + await cache.set(cache_key, response, ttl=CacheTTL.ONE_DAY) + + return response + + +@router.get("/organisms/links/{taxon_id}") +async def get_organism_link( + taxon_id: int, cache: CacheService = Depends(get_cache_service) +): + """Get a single organism link by NCBI taxonomy ID.""" + cache_key = f"v1:organisms:links:{taxon_id}" + + cached = await cache.get(cache_key) + if cached is not None: + return cached + + service = LinksService() + result = service.get_organism_link(taxon_id) + + if result is None: + raise HTTPException(status_code=404, detail=f"Organism {taxon_id} not found") + + await cache.set(cache_key, result, ttl=CacheTTL.ONE_DAY) + + return result diff --git a/backend/api/app/api/v1/ncbi_links.py b/backend/api/app/api/v1/ncbi_links.py deleted file mode 100644 index e89f66a43..000000000 --- a/backend/api/app/api/v1/ncbi_links.py +++ /dev/null @@ -1,41 +0,0 @@ -"""API endpoints for NCBI cross-linking.""" - -from fastapi import APIRouter, Depends - -from app.core.cache import CacheService, CacheTTL -from app.core.dependencies import get_cache_service -from app.services.ncbi_links_service import NCBILinksService - -router = APIRouter() - - -@router.get("/organism-links.json") -async def get_organism_links(cache: CacheService = Depends(get_cache_service)): - """Get organism links by taxonomy ID for NCBI cross-referencing""" - cache_key = "ncbi_links:organisms" - - cached = await cache.get(cache_key) - if cached is not None: - return cached - - service = NCBILinksService() - links = service.get_organism_links() - await cache.set(cache_key, links, ttl=CacheTTL.ONE_DAY) - - return links - - -@router.get("/assembly-links.json") -async def get_assembly_links(cache: CacheService = Depends(get_cache_service)): - """Get assembly links by accession for NCBI cross-referencing""" - cache_key = "ncbi_links:assemblies" - - cached = await cache.get(cache_key) - if cached is not None: - return cached - - service = NCBILinksService() - links = service.get_assembly_links() - await cache.set(cache_key, links, ttl=CacheTTL.ONE_DAY) - - return links diff --git a/backend/api/app/main.py b/backend/api/app/main.py index 75023dde4..58f506367 100644 --- a/backend/api/app/main.py +++ b/backend/api/app/main.py @@ -1,7 +1,7 @@ from fastapi import FastAPI from fastapi.middleware.cors import CORSMiddleware -from app.api.v1 import cache, health, ncbi_links, version +from app.api.v1 import cache, health, links, version from app.core.config import get_settings settings = get_settings() @@ -9,6 +9,7 @@ app = FastAPI( title="BRC Analytics API", version=settings.APP_VERSION, + openapi_url="/api/openapi.json", docs_url="/api/docs", redoc_url="/api/redoc", ) @@ -25,7 +26,7 @@ app.include_router(health.router, prefix="/api/v1", tags=["health"]) app.include_router(cache.router, prefix="/api/v1/cache", tags=["cache"]) app.include_router(version.router, prefix="/api/v1/version", tags=["version"]) -app.include_router(ncbi_links.router, prefix="/api/v1/links", tags=["ncbi-links"]) +app.include_router(links.router, prefix="/api/v1", tags=["links"]) @app.get("/") diff --git a/backend/api/app/services/links_service.py b/backend/api/app/services/links_service.py new file mode 100644 index 000000000..7608c095b --- /dev/null +++ b/backend/api/app/services/links_service.py @@ -0,0 +1,98 @@ +"""Service for generating link data from BRC Analytics catalog.""" + +import json +import logging +from pathlib import Path +from typing import Any, Dict, List, Optional + +from app.core.config import get_settings + +logger = logging.getLogger(__name__) + + +class LinksService: + """Service to generate link files for cross-referencing.""" + + def __init__(self, catalog_path: str | None = None): + settings = get_settings() + self.catalog_path = Path(catalog_path or settings.CATALOG_PATH) + + def _load_json_file(self, filename: str) -> List[Dict[str, Any]]: + file_path = self.catalog_path / filename + try: + with open(file_path, "r") as f: + return json.load(f) + except FileNotFoundError: + logger.error(f"Catalog file not found: {file_path}") + return [] + except json.JSONDecodeError as e: + logger.error(f"Error parsing JSON from {file_path}: {e}") + return [] + + def _build_assembly_link(self, accession: str) -> Dict[str, str]: + """Build a single assembly link dict.""" + url_accession = accession.replace(".", "_") + return { + "assemblyAccession": accession, + "relativePath": f"/data/assemblies/{url_accession}", + } + + def _build_organism_link(self, taxonomy_id: int) -> Dict[str, Any]: + """Build a single organism link dict.""" + return { + "ncbiTaxonomyId": taxonomy_id, + "relativePath": f"/data/organisms/{taxonomy_id}", + } + + def get_assemblies_links(self) -> Dict[str, Any]: + """Get all assembly links in v1 format.""" + assemblies = self._load_json_file("assemblies.json") + links = [] + + for assembly in assemblies: + accession = assembly.get("accession") + if not accession: + continue + links.append(self._build_assembly_link(accession)) + + logger.info(f"Generated {len(links)} assembly links") + return { + "assemblies": links, + } + + def get_assembly_link(self, accession: str) -> Optional[Dict[str, str]]: + """Get a single assembly link by accession.""" + assemblies = self._load_json_file("assemblies.json") + + for assembly in assemblies: + if assembly.get("accession") == accession: + return self._build_assembly_link(accession) + + return None + + def get_organisms_links(self) -> Dict[str, Any]: + """Get all organism links in v1 format.""" + organisms = self._load_json_file("organisms.json") + links = [] + + for org in organisms: + taxonomy_id = org.get("ncbiTaxonomyId") + if not taxonomy_id: + continue + links.append(self._build_organism_link(int(taxonomy_id))) + + logger.info(f"Generated {len(links)} organism links") + return { + "organisms": links, + } + + def get_organism_link(self, taxon_id: int) -> Optional[Dict[str, Any]]: + """Get a single organism link by NCBI taxonomy ID.""" + organisms = self._load_json_file("organisms.json") + + for org in organisms: + taxonomy_id = org.get("ncbiTaxonomyId") + if taxonomy_id is not None and int(taxonomy_id) == taxon_id: + return self._build_organism_link(taxon_id) + + return None diff --git a/backend/api/app/services/ncbi_links_service.py b/backend/api/app/services/ncbi_links_service.py deleted file mode 100644 index 9ecfbbde0..000000000 --- a/backend/api/app/services/ncbi_links_service.py +++ /dev/null @@ -1,74 +0,0 @@ -"""Service for generating NCBI link data from BRC Analytics catalog.""" - -import json -import logging -from pathlib import Path -from typing import Any, Dict, List - -from app.core.config import get_settings - -logger = logging.getLogger(__name__) - - -class NCBILinksService: - """Service to generate link files for NCBI cross-referencing.""" - - def __init__(self, catalog_path: str | None = None): - settings = get_settings() - self.catalog_path = Path(catalog_path or settings.CATALOG_PATH) - self.base_url = "https://brc-analytics.org" - - def _load_json_file(self, filename: str) -> List[Dict[str, Any]]: - file_path = self.catalog_path / filename - try: - with open(file_path, "r") as f: - return json.load(f) - except FileNotFoundError: - logger.error(f"Catalog file not found: {file_path}") - return [] - except json.JSONDecodeError as e: - logger.error(f"Error parsing JSON from {file_path}: {e}") - return [] - - def get_organism_links(self) -> List[Dict[str, str]]: - organisms = self._load_json_file("organisms.json") - links = [] - - for org in organisms: - taxonomy_id = org.get("ncbiTaxonomyId") - if not taxonomy_id: - continue - - links.append( - { - "ncbiTaxonomyId": taxonomy_id, - "url": f"{self.base_url}/data/organisms/{taxonomy_id}", - "scientificName": org.get("taxonomicLevelSpecies"), - "commonName": org.get("commonName"), - } - ) - - logger.info(f"Generated {len(links)} organism links") - return links - - def get_assembly_links(self) -> List[Dict[str, str]]: - assemblies = self._load_json_file("assemblies.json") - links = [] - - for assembly in assemblies: - accession = assembly.get("accession") - if not accession: - continue - - url_accession = accession.replace(".", "_") - links.append( - { - "accession": accession, - "url": f"{self.base_url}/data/assemblies/{url_accession}", - "ncbiTaxonomyId": assembly.get("ncbiTaxonomyId"), - "scientificName": assembly.get("taxonomicLevelSpecies"), - } - ) - - logger.info(f"Generated {len(links)} assembly links") - return links diff --git a/backend/api/tests/test_links.py b/backend/api/tests/test_links.py new file mode 100644 index 000000000..6c511d926 --- /dev/null +++ b/backend/api/tests/test_links.py @@ -0,0 +1,120 @@ +"""Smoke tests for links API endpoints. + +These tests run against a live backend instance (via Docker). +Set API_BASE_URL environment variable to override the default endpoint. +""" + +import os + +import httpx +import pytest + +BASE_URL = os.getenv("API_BASE_URL", "http://localhost:8080") + + +@pytest.fixture +def client(): + return httpx.Client(base_url=BASE_URL, timeout=30.0) + + +def test_health_endpoint(client): + """Health endpoint returns healthy status.""" + response = client.get("/api/v1/health") + assert response.status_code == 200 + data = response.json() + assert data["status"] == "healthy" + + +def test_assemblies_links_returns_expected_format(client): + """Assemblies links endpoint returns expected format.""" + response = client.get("/api/v1/assemblies/links") + assert response.status_code == 200 + data = response.json() + assert "assemblies" in data + assert isinstance(data["assemblies"], list) + assert len(data["assemblies"]) > 0 + + +def test_assemblies_links_has_required_fields(client): + """Assembly links contain required fields.""" + response = client.get("/api/v1/assemblies/links") + data = response.json() + first_item = data["assemblies"][0] + assert "assemblyAccession" in first_item + assert "relativePath" in first_item + assert first_item["relativePath"].startswith("/data/assemblies/") + + +def test_assembly_link_by_accession(client): + """Single assembly link endpoint returns correct data.""" + # First get a valid accession from the list + response = client.get("/api/v1/assemblies/links") + data = response.json() + accession = data["assemblies"][0]["assemblyAccession"] + + # Then fetch that specific accession + response = client.get(f"/api/v1/assemblies/links/{accession}") + assert response.status_code == 200 + item = response.json() + assert item["assemblyAccession"] == accession + assert "relativePath" in item + + +def test_assembly_link_not_found(client): + """Single assembly link endpoint returns 404 for unknown accession.""" + response = client.get("/api/v1/assemblies/links/INVALID_ACCESSION") + assert response.status_code == 404 + + +def test_organisms_links_returns_expected_format(client): + """Organisms links endpoint returns expected format.""" + response = client.get("/api/v1/organisms/links") + assert response.status_code == 200 + data = response.json() + assert "organisms" in data + assert isinstance(data["organisms"], list) + assert len(data["organisms"]) > 0 + + +def test_organisms_links_has_required_fields(client): + """Organism links contain required fields.""" + response = client.get("/api/v1/organisms/links") + data = response.json() + first_item = data["organisms"][0] + assert "ncbiTaxonomyId" in first_item + assert "relativePath" in first_item + assert "/data/organisms/" in first_item["relativePath"] + + +def test_organism_link_by_taxon_id(client): + """Single organism link endpoint returns correct data.""" + # First get a valid taxon ID from the list + response = client.get("/api/v1/organisms/links") + data = response.json() + taxon_id = data["organisms"][0]["ncbiTaxonomyId"] + + # Then fetch that specific taxon ID + response = client.get(f"/api/v1/organisms/links/{taxon_id}") + assert response.status_code == 200 + item = response.json() + assert item["ncbiTaxonomyId"] == taxon_id + assert "relativePath" in item + + +def test_organism_link_not_found(client): + """Single organism link endpoint returns 404 for unknown taxon ID.""" + response = client.get("/api/v1/organisms/links/999999999") + assert response.status_code == 404 + + +def test_assembly_url_format(client): + """Assembly relative paths use underscore format for accessions.""" + response = client.get("/api/v1/assemblies/links") + data = response.json() + for item in data["assemblies"][:5]: + accession = item["assemblyAccession"] + path = item["relativePath"] + expected_url_accession = accession.replace(".", "_") + assert expected_url_accession in path, ( + f"Path {path} should contain {expected_url_accession}" + ) diff --git a/backend/api/tests/test_ncbi_links.py b/backend/api/tests/test_ncbi_links.py deleted file mode 100644 index 631c270d1..000000000 --- a/backend/api/tests/test_ncbi_links.py +++ /dev/null @@ -1,78 +0,0 @@ -"""Smoke tests for NCBI links API endpoints. - -These tests run against a live backend instance (via Docker). -Set BASE_URL environment variable to override the default endpoint. -""" - -import os - -import httpx -import pytest - -BASE_URL = os.getenv("API_BASE_URL", "http://localhost:8080") - - -@pytest.fixture -def client(): - return httpx.Client(base_url=BASE_URL, timeout=30.0) - - -def test_health_endpoint(client): - """Health endpoint returns healthy status.""" - response = client.get("/api/v1/health") - assert response.status_code == 200 - data = response.json() - assert data["status"] == "healthy" - - -def test_organism_links_returns_list(client): - """Organism links endpoint returns a non-empty list.""" - response = client.get("/api/v1/links/organism-links.json") - assert response.status_code == 200 - data = response.json() - assert isinstance(data, list) - assert len(data) > 0 - - -def test_organism_links_has_required_fields(client): - """Organism links contain required fields.""" - response = client.get("/api/v1/links/organism-links.json") - data = response.json() - first_item = data[0] - assert "ncbiTaxonomyId" in first_item - assert "url" in first_item - assert "scientificName" in first_item - assert "brc-analytics.org" in first_item["url"] - - -def test_assembly_links_returns_list(client): - """Assembly links endpoint returns a non-empty list.""" - response = client.get("/api/v1/links/assembly-links.json") - assert response.status_code == 200 - data = response.json() - assert isinstance(data, list) - assert len(data) > 0 - - -def test_assembly_links_has_required_fields(client): - """Assembly links contain required fields.""" - response = client.get("/api/v1/links/assembly-links.json") - data = response.json() - first_item = data[0] - assert "accession" in first_item - assert "url" in first_item - assert "ncbiTaxonomyId" in first_item - assert "brc-analytics.org" in first_item["url"] - - -def test_assembly_url_format(client): - """Assembly URLs use underscore format for accessions.""" - response = client.get("/api/v1/links/assembly-links.json") - data = response.json() - for item in data[:5]: - accession = item["accession"] - url = item["url"] - expected_url_accession = accession.replace(".", "_") - assert expected_url_accession in url, ( - f"URL {url} should contain {expected_url_accession}" - )