Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
19c19de
added function to generate sequencing projects
mflynn-lanl Oct 22, 2025
2ff3203
added model for SequencingProject
mflynn-lanl Oct 22, 2025
dec1a93
added get endpoints to list and retrieve sequencing projects and post…
mflynn-lanl Oct 22, 2025
99bbb8f
added tests for get and post sequencing project endpoints
mflynn-lanl Oct 22, 2025
1819634
Merge branch '1289-create-globus-table-related-endpoints' into 1291-c…
mflynn-lanl Oct 22, 2025
e6bd45a
added Field() to SequencingProject
mflynn-lanl Oct 22, 2025
a61f8b1
modified generate_sequencing_projects to match changes to model
mflynn-lanl Oct 22, 2025
efcba98
fixed print statement
mflynn-lanl Oct 22, 2025
63ce827
added missing comma
mflynn-lanl Oct 23, 2025
15bb8ac
fixed url of endpoints
mflynn-lanl Oct 23, 2025
31ca5e4
fixed tests
mflynn-lanl Oct 23, 2025
13dd3f9
style: reformat
invalid-email-address Oct 23, 2025
2560f37
Merge branch 'main' into 1291-create-sequencing-project-endpoints
mflynn-lanl Oct 30, 2025
08ba380
Added get, post and list endpoints for sequencing projects
mflynn-lanl Oct 30, 2025
01b2d99
changed project_name to sequencing_project_name and description to se…
mflynn-lanl Oct 30, 2025
4ebf966
changed project_name to sequencing_project_name
mflynn-lanl Oct 30, 2025
f93e412
added tests for creating, getting and listing sequencing projects
mflynn-lanl Oct 30, 2025
bef8a89
style: reformat
invalid-email-address Oct 30, 2025
dac20e6
Delete redundant function definition
eecavanna Nov 1, 2025
3e1dd6b
Rename `SequencingProject` class to emphasize relationship to JGI
eecavanna Nov 1, 2025
eb195a1
Remove redundant `import` statement
eecavanna Nov 1, 2025
849ad96
Rename endpoint to follow convention( `jgi_` prefix and plural)
eecavanna Nov 1, 2025
5cf5842
Remove `print` statements following initial prototyping
eecavanna Nov 1, 2025
ef00f4a
Add a description to each new endpoint
eecavanna Nov 1, 2025
8d5fb22
Standardize name of MongoDB collection used for `JGISequencingProject`s
eecavanna Nov 1, 2025
89d825e
style: reformat
invalid-email-address Nov 1, 2025
8cb594c
Make assertions about resource list instead of response payload wrapper
eecavanna Nov 1, 2025
4cd8ffd
Fix bug where test was trying to serialize dicts
eecavanna Nov 1, 2025
db58f17
makes sure duplicated sequencing_projects cannot be created
mflynn-lanl Nov 3, 2025
967f735
Adds check for existing jgi sequencing project name
mflynn-lanl Nov 3, 2025
09c7081
use enum for sequencing project collection, create index for sequenci…
mflynn-lanl Nov 3, 2025
050391b
fix type in error message
mflynn-lanl Nov 3, 2025
e6c3b05
fix test to create unique name for sequencing project
mflynn-lanl Nov 4, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 75 additions & 3 deletions nmdc_runtime/api/endpoints/wf_file_staging.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from fastapi import APIRouter, Depends
from fastapi import APIRouter, Depends, Query
from pymongo.database import Database
from typing import Annotated
from fastapi import APIRouter, Depends, Query
from toolz import merge
import logging

Expand All @@ -16,8 +15,9 @@
GlobusTaskStatus,
JDPFileStatus,
JGISample,
JGISequencingProject,
WorkflowFileStagingCollectionName as CollectionName,
)
from nmdc_runtime.api.models.user import User
from nmdc_runtime.api.endpoints.util import check_action_permitted

router = APIRouter()
Expand All @@ -44,6 +44,8 @@ def create_globus_tasks(
mdb: Database = Depends(get_mongo_db),
user: User = Depends(get_current_active_user),
):
"""Create a `GlobusTask`."""

# check for permissions first
check_can_run_wf_file_staging_endpoints(user)
# check if record with same task_id already exists
Expand Down Expand Up @@ -72,6 +74,8 @@ def get_globus_tasks(
mdb: Database = Depends(get_mongo_db),
user: User = Depends(get_current_active_user),
):
"""Retrieve a `GlobusTask`."""

# check for permissions first
check_can_run_wf_file_staging_endpoints(user)
return raise404_if_none(
Expand All @@ -86,6 +90,8 @@ def update_globus_tasks(
mdb: Database = Depends(get_mongo_db),
user: User = Depends(get_current_active_user),
):
"""Update a `GlobusTask`."""

# check for permissions first
check_can_run_wf_file_staging_endpoints(user)

Expand Down Expand Up @@ -116,6 +122,7 @@ def list_globus_tasks(
user: User = Depends(get_current_active_user),
):
"""Get a list of `GlobusTask`s."""

# check for permissions first
check_can_run_wf_file_staging_endpoints(user)
rv = list_resources(req, mdb, "wf_file_staging.globus_tasks")
Expand Down Expand Up @@ -220,3 +227,68 @@ def update_jgi_samples(
{"jdp_file_id": jdp_file_id}, doc_jgi_sample_patched
)
return doc_jgi_sample_patched


@router.get(
"/wf_file_staging/jgi_sequencing_projects",
response_model=ListResponse[JGISequencingProject],
response_model_exclude_unset=True,
)
def list_sequencing_project_records(
req: Annotated[ListRequest, Query()],
mdb: Database = Depends(get_mongo_db),
user: User = Depends(get_current_active_user),
):
"""Get a list of `JGISequencingProject`s."""

check_can_run_wf_file_staging_endpoints(user)

return list_resources(req, mdb, CollectionName.JGI_SEQUENCING_PROJECTS.value)


@router.post(
"/wf_file_staging/jgi_sequencing_projects",
status_code=status.HTTP_201_CREATED,
response_model=JGISequencingProject,
)
def create_sequencing_record(
sequencing_project_in: JGISequencingProject,
mdb: Database = Depends(get_mongo_db),
user: User = Depends(get_current_active_user),
):
"""Create a `JGISequencingProject`."""

check_can_run_wf_file_staging_endpoints(user)
existing = mdb[CollectionName.JGI_SEQUENCING_PROJECTS.value].find_one(
{"sequencing_project_name": sequencing_project_in.sequencing_project_name}
)
if existing is not None:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=f"JGISequencingProject with project name {sequencing_project_in.sequencing_project_name} already exists.",
)
sequencing_project_dict = sequencing_project_in.model_dump()
mdb[CollectionName.JGI_SEQUENCING_PROJECTS.value].insert_one(
sequencing_project_dict
)
return sequencing_project_dict


@router.get(
"/wf_file_staging/jgi_sequencing_projects/{sequencing_project_name}",
response_model=JGISequencingProject,
)
def get_sequencing_project(
sequencing_project_name: str,
mdb: Database = Depends(get_mongo_db),
user: User = Depends(get_current_active_user),
):
"""Retrieve a `JGISequencingProject`."""

check_can_run_wf_file_staging_endpoints(user)

return raise404_if_none(
mdb[CollectionName.JGI_SEQUENCING_PROJECTS.value].find_one(
{"sequencing_project_name": sequencing_project_name}
)
)
14 changes: 13 additions & 1 deletion nmdc_runtime/api/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from starlette.responses import RedirectResponse, HTMLResponse, FileResponse
from refscan.lib.helpers import get_collection_names_from_schema
from scalar_fastapi import get_scalar_api_reference

from nmdc_runtime.api.models.wfe_file_stages import WorkflowFileStagingCollectionName
from nmdc_runtime import config
from nmdc_runtime.api.analytics import Analytics
from nmdc_runtime.api.middleware import PyinstrumentMiddleware
Expand Down Expand Up @@ -202,6 +202,17 @@ def ensure_jgi_samples_id_is_indexed():
)


def ensure_sequencing_project_name_is_indexed():
"""
Ensures that the `wf_file_staging.sequencing_projects` collection has an index on its `sequencing_project_name` field and that the index is unique.
"""

mdb = get_mongo_db()
mdb[WorkflowFileStagingCollectionName.JGI_SEQUENCING_PROJECTS.value].create_index(
"sequencing_project_name", background=True, unique=True
)


def ensure_default_api_perms():
"""
Ensures that specific users (currently only "admin") are allowed to perform
Expand Down Expand Up @@ -255,6 +266,7 @@ async def lifespan(app: FastAPI):
ensure_type_field_is_indexed()
ensure_default_api_perms()
ensure_globus_tasks_id_is_indexed()
ensure_sequencing_project_name_is_indexed()
# Invoke a function—thereby priming its memoization cache—in order to speed up all future invocations.
get_allowed_references() # we ignore the return value here

Expand Down
29 changes: 29 additions & 0 deletions nmdc_runtime/api/models/wfe_file_stages.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,12 @@
import datetime


class WorkflowFileStagingCollectionName(str, Enum):
"""The name of a MongoDB collection related to workflow file staging."""

JGI_SEQUENCING_PROJECTS = "wf_file_staging.jgi_sequencing_projects"


class GlobusTaskStatus(str, Enum):
ACTIVE = "ACTIVE"
INACTIVE = "INACTIVE"
Expand Down Expand Up @@ -91,3 +97,26 @@ class JGISample(BaseModel):
description="Request ID from the JGI data portal after a request to have the files restored from tape is submitted.",
examples=[1],
)


class JGISequencingProject(BaseModel):
"""
A representation of a JGI sequencing project and its associated metadata.
"""

sequencing_project_name: str = Field(
...,
description="Name of the sequencing project that we can refer to while staging files.",
examples=["Human Genome Project"],
)
sequencing_project_description: str = Field(
...,
description="Detailed description of the sequencing project",
examples=["A project to sequence the human genome."],
)
jgi_proposal_id: str = Field(
..., description="JGI proposal ID", examples=["503568"]
)
nmdc_study_id: str = Field(
..., description="NMDC study ID", examples=["nmdc:sty-11-28tm5d36"]
)
50 changes: 47 additions & 3 deletions tests/lib/faker.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
)

from nmdc_runtime.api.models.job import Job
from nmdc_runtime.api.models.wfe_file_stages import GlobusTask, GlobusTaskStatus, JGISample
from nmdc_runtime.api.models.wfe_file_stages import GlobusTask, GlobusTaskStatus, JGISample, JGISequencingProject

class Faker:
r"""
Expand Down Expand Up @@ -342,6 +342,7 @@ def generate_nucleotide_sequencings(self, quantity: int, associated_studies: Lis

return documents


def generate_data_objects(self, quantity: int, **overrides) -> List[dict]:
"""
Generates the specified number of documents representing `DataObject` instances,
Expand Down Expand Up @@ -399,6 +400,7 @@ def generate_data_objects(self, quantity: int, **overrides) -> List[dict]:

return documents


def generate_workflow_executions(self, quantity: int, workflow_type: str, was_informed_by: List[str], has_input: List[str], **overrides) -> List[dict]:
"""
Generates the specified number of documents representing generic workflow execution instances,
Expand Down Expand Up @@ -478,7 +480,8 @@ def generate_workflow_executions(self, quantity: int, workflow_type: str, was_in
documents.append(document)

return documents



def generate_globus_tasks(self, quantity: int, **overrides) -> List[dict]:
"""
Generates the specified number of documents representing `GlobusTask` instances,
Expand Down Expand Up @@ -515,6 +518,7 @@ class defined locally, in the `nmdc_runtime.api.models` module.

return documents


def generate_jgi_samples(self, quantity: int, **overrides) -> List[dict]:
"""
Generates the specified number of documents representing `JGISample` instances,
Expand Down Expand Up @@ -563,7 +567,47 @@ class defined locally, in the `nmdc_runtime.api.models` module.
documents.append(document)

return documents



def generate_sequencing_projects(self, quantity: int, **overrides) -> List[dict]:
"""
Generates the specified number of documents representing `SequencingProject` instances,
which can be stored in the `sequencing_project` collection.

Note: The `SequencingProject` class is NOT defined in the NMDC Schema. It is an ad hoc
class defined locally, in the `nmdc_runtime.api.models` module.

:param quantity: Number of documents to create
:param overrides: Fields, if any, to add or override in each document
:return: The generated documents

>>> f = Faker()
>>> sequencing_projects = f.generate_sequencing_projects(1)
>>> len(sequencing_projects)
1
>>> isinstance(sequencing_projects[0]['sequencing_project_name'], str)
True
"""
documents = []
for i in range(quantity):
# Apply any overrides passed in.
params = {
"sequencing_project_name": f"arbitrary_string-{i+1}",
"sequencing_project_description": "arbitrary_string",
"jgi_proposal_id": "arbitrary_string",
"nmdc_study_id": "arbitrary_string",
**overrides,
}
# Validate the parameters by attempting to instantiate a `SequencingProject`.
instance = JGISequencingProject(**params)

# Dump the instance to a `dict`
document = instance.model_dump()
documents.append(document)

return documents


def generate_jobs(self, quantity: int, **overrides) -> List[dict]:
"""
Generates the specified number of documents representing `Job` instances,
Expand Down
Loading