diff --git a/nmdc_runtime/api/core/util.py b/nmdc_runtime/api/core/util.py index e8edd496d..c83aa23db 100644 --- a/nmdc_runtime/api/core/util.py +++ b/nmdc_runtime/api/core/util.py @@ -52,9 +52,14 @@ def raise404_if_none(doc, detail="Not found"): return doc -def now(as_str=False): - dt = datetime.now(timezone.utc) - return dt.isoformat() if as_str else dt +def now() -> datetime: + """Get a `datetime` representing the current time in UTC.""" + return datetime.now(timezone.utc) + + +def now_str() -> str: + """Get an ISO string representing the current time in UTC.""" + return now().isoformat() def expiry_dt_from_now(days=0, hours=0, minutes=0, seconds=0): diff --git a/nmdc_runtime/api/endpoints/lib/linked_instances.py b/nmdc_runtime/api/endpoints/lib/linked_instances.py index 8ac05d13d..736793fc4 100644 --- a/nmdc_runtime/api/endpoints/lib/linked_instances.py +++ b/nmdc_runtime/api/endpoints/lib/linked_instances.py @@ -6,6 +6,7 @@ """ +from datetime import timedelta from typing import Literal, Any from bson import ObjectId @@ -13,7 +14,8 @@ from pymongo.database import Database as MongoDatabase from toolz import merge -from nmdc_runtime.api.core.util import hash_from_str +from nmdc_runtime.api.core.util import hash_from_str, now +from nmdc_runtime.api.db.mongo import get_mongo_db from nmdc_runtime.util import get_class_name_to_collection_names_map, nmdc_schema_view @@ -35,6 +37,17 @@ def temp_linked_instances_collection_name(ids: list[str], types: list[str]) -> s return f"_runtime.tmp.linked_instances.{hash_from_ids_and_types(ids=ids,types=types)}.{ObjectId()}" +def drop_stale_temp_linked_instances_collections() -> None: + """Drop any temporary linked-instances collections that were generated earlier than one day ago.""" + mdb = get_mongo_db() + one_day_ago = now() - timedelta(days=1) + for collection_name in mdb.list_collection_names( + filter={"name": {"$regex": r"^_runtime.tmp.linked_instances\..*"}} + ): + if ObjectId(collection_name.split(".")[-1]).generation_time < one_day_ago: + mdb.drop_collection(collection_name) + + def gather_linked_instances( alldocs_collection: MongoCollection, ids: list[str], diff --git a/nmdc_runtime/api/endpoints/nmdcschema.py b/nmdc_runtime/api/endpoints/nmdcschema.py index 79bed17a4..f7e4a1d9a 100644 --- a/nmdc_runtime/api/endpoints/nmdcschema.py +++ b/nmdc_runtime/api/endpoints/nmdcschema.py @@ -3,7 +3,7 @@ from typing import List, Dict, Annotated import pymongo -from fastapi import APIRouter, Depends, HTTPException, Path, Query +from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException, Path, Query from pydantic import AfterValidator from refscan.lib.helpers import ( get_collection_names_from_schema, @@ -13,6 +13,7 @@ from nmdc_runtime.api.endpoints.lib.linked_instances import ( gather_linked_instances, hydrated, + drop_stale_temp_linked_instances_collections, ) from nmdc_runtime.config import IS_LINKED_INSTANCES_ENDPOINT_ENABLED from nmdc_runtime.minter.config import typecodes @@ -122,7 +123,11 @@ def get_nmdc_database_collection_stats( @decorate_if(condition=IS_LINKED_INSTANCES_ENDPOINT_ENABLED)( router.get( "/nmdcschema/linked_instances", - response_model=ListResponse[Doc], + responses={ + status.HTTP_200_OK: { + "model": ListResponse[Doc], + } + }, response_model_exclude_unset=True, ) ) @@ -179,6 +184,12 @@ def get_linked_instances( ), ] = 20, mdb: MongoDatabase = Depends(get_mongo_db), + # FastAPI will inject this `background_tasks` argument, to which we can add background tasks + # for FastAPI to run after it returns the HTTP response. + # References: + # - https://fastapi.tiangolo.com/tutorial/background-tasks/ (RE: `BackgroundTasks`) + # - https://stackoverflow.com/a/68807219 (RE: how to specify it after some optional parameters) + background_tasks: BackgroundTasks = BackgroundTasks(), ): """ Retrieves database instances that are both (a) linked to any of `ids`, and (b) of a type in `types`. @@ -222,6 +233,7 @@ class definition ([linkml:ClassDefinition](https://w3id.org/linkml/ClassDefiniti [nmdc:Sample](https://w3id.org/nmdc/Sample), etc. -- may be given. If no value for `types` is given, then all [nmdc:NamedThing](https://w3id.org/nmdc/NamedThing)s are returned. """ + background_tasks.add_task(drop_stale_temp_linked_instances_collections) if page_token is not None: rv = list_resources( req=ListRequest(page_token=page_token, max_page_size=max_page_size), mdb=mdb @@ -237,6 +249,7 @@ class definition ([linkml:ClassDefinition](https://w3id.org/linkml/ClassDefiniti status_code=status.HTTP_404_NOT_FOUND, detail=f"Some IDs not found: {ids_not_found}.", ) + types = types or ["nmdc:NamedThing"] types_possible = set([f"nmdc:{name}" for name in nmdc_schema_view().all_classes()]) types_not_found = list(set(types) - types_possible) diff --git a/nmdc_runtime/api/models/run.py b/nmdc_runtime/api/models/run.py index 3cdf43d16..b56ec4f0f 100644 --- a/nmdc_runtime/api/models/run.py +++ b/nmdc_runtime/api/models/run.py @@ -9,7 +9,7 @@ from toolz import merge from nmdc_runtime.api.core.idgen import generate_one_id -from nmdc_runtime.api.core.util import now, raise404_if_none, pick +from nmdc_runtime.api.core.util import now, now_str, raise404_if_none, pick from nmdc_runtime.api.models.user import User PRODUCER_URL_BASE_DEFAULT = ( @@ -90,7 +90,7 @@ def _add_run_requested_event(run_spec: RunUserSpec, mdb: MongoDatabase, user: Us {"producer": PRODUCER_URL, "schemaURL": SCHEMA_URL}, ), type=RunEventType.REQUESTED, - time=now(as_str=True), + time=now_str(), inputs=run_spec.inputs, ) mdb.run_events.insert_one(event.model_dump()) @@ -112,7 +112,7 @@ def _add_run_started_event(run_id: str, mdb: MongoDatabase): run=requested.run, job=requested.job, type=RunEventType.STARTED, - time=now(as_str=True), + time=now_str(), ).model_dump() ) return run_id @@ -133,7 +133,7 @@ def _add_run_fail_event(run_id: str, mdb: MongoDatabase): run=requested.run, job=requested.job, type=RunEventType.FAIL, - time=now(as_str=True), + time=now_str(), ).model_dump() ) return run_id @@ -154,7 +154,7 @@ def _add_run_complete_event(run_id: str, mdb: MongoDatabase, outputs: List[str]) run=started.run, job=started.job, type=RunEventType.COMPLETE, - time=now(as_str=True), + time=now_str(), outputs=outputs, ).model_dump() )