From a8d1945cc0f1d7c83487f138ad4994046f855a59 Mon Sep 17 00:00:00 2001 From: Donny Winston Date: Wed, 24 Sep 2025 10:34:11 +0200 Subject: [PATCH 1/6] fix: drop stale _runtime.tmp.linked_instances.* collections Fix #1240 --- .../api/endpoints/lib/linked_instances.py | 15 ++++++++++++++- nmdc_runtime/api/endpoints/nmdcschema.py | 6 +++++- 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/nmdc_runtime/api/endpoints/lib/linked_instances.py b/nmdc_runtime/api/endpoints/lib/linked_instances.py index 8ac05d13d..bf677d7ee 100644 --- a/nmdc_runtime/api/endpoints/lib/linked_instances.py +++ b/nmdc_runtime/api/endpoints/lib/linked_instances.py @@ -6,6 +6,7 @@ """ +from datetime import timedelta from typing import Literal, Any from bson import ObjectId @@ -13,7 +14,8 @@ from pymongo.database import Database as MongoDatabase from toolz import merge -from nmdc_runtime.api.core.util import hash_from_str +from nmdc_runtime.api.core.util import hash_from_str, now +from nmdc_runtime.api.db.mongo import get_mongo_db from nmdc_runtime.util import get_class_name_to_collection_names_map, nmdc_schema_view @@ -35,6 +37,17 @@ def temp_linked_instances_collection_name(ids: list[str], types: list[str]) -> s return f"_runtime.tmp.linked_instances.{hash_from_ids_and_types(ids=ids,types=types)}.{ObjectId()}" +def drop_stale_temp_linked_instances_collections(): + """Drop any temporary linked-instances collections that were generated earlier than one day ago.""" + mdb = get_mongo_db() + one_day_ago = now() - timedelta(days=1) + for collection_name in mdb.list_collection_names( + filter={"name": {"$regex": r"^_runtime.tmp.linked_instances\..*"}} + ): + if ObjectId(collection_name.split(".")[-1]).generation_time < one_day_ago: + mdb.drop_collection(collection_name) + + def gather_linked_instances( alldocs_collection: MongoCollection, ids: list[str], diff --git a/nmdc_runtime/api/endpoints/nmdcschema.py b/nmdc_runtime/api/endpoints/nmdcschema.py index 79bed17a4..e5eb18237 100644 --- a/nmdc_runtime/api/endpoints/nmdcschema.py +++ b/nmdc_runtime/api/endpoints/nmdcschema.py @@ -3,7 +3,7 @@ from typing import List, Dict, Annotated import pymongo -from fastapi import APIRouter, Depends, HTTPException, Path, Query +from fastapi import APIRouter, Depends, HTTPException, Path, Query, BackgroundTasks from pydantic import AfterValidator from refscan.lib.helpers import ( get_collection_names_from_schema, @@ -13,6 +13,7 @@ from nmdc_runtime.api.endpoints.lib.linked_instances import ( gather_linked_instances, hydrated, + drop_stale_temp_linked_instances_collections, ) from nmdc_runtime.config import IS_LINKED_INSTANCES_ENDPOINT_ENABLED from nmdc_runtime.minter.config import typecodes @@ -127,6 +128,7 @@ def get_nmdc_database_collection_stats( ) ) def get_linked_instances( + background_tasks: BackgroundTasks, ids: Annotated[ list[str], Query( @@ -222,6 +224,7 @@ class definition ([linkml:ClassDefinition](https://w3id.org/linkml/ClassDefiniti [nmdc:Sample](https://w3id.org/nmdc/Sample), etc. -- may be given. If no value for `types` is given, then all [nmdc:NamedThing](https://w3id.org/nmdc/NamedThing)s are returned. """ + background_tasks.add_task(drop_stale_temp_linked_instances_collections) if page_token is not None: rv = list_resources( req=ListRequest(page_token=page_token, max_page_size=max_page_size), mdb=mdb @@ -237,6 +240,7 @@ class definition ([linkml:ClassDefinition](https://w3id.org/linkml/ClassDefiniti status_code=status.HTTP_404_NOT_FOUND, detail=f"Some IDs not found: {ids_not_found}.", ) + types = types or ["nmdc:NamedThing"] types_possible = set([f"nmdc:{name}" for name in nmdc_schema_view().all_classes()]) types_not_found = list(set(types) - types_possible) From eb6db143303c651299566e3a33780bad9f06d4df Mon Sep 17 00:00:00 2001 From: Donny Winston Date: Wed, 24 Sep 2025 12:09:25 +0200 Subject: [PATCH 2/6] style: add type annotation to return value --- nmdc_runtime/api/endpoints/lib/linked_instances.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nmdc_runtime/api/endpoints/lib/linked_instances.py b/nmdc_runtime/api/endpoints/lib/linked_instances.py index bf677d7ee..736793fc4 100644 --- a/nmdc_runtime/api/endpoints/lib/linked_instances.py +++ b/nmdc_runtime/api/endpoints/lib/linked_instances.py @@ -37,7 +37,7 @@ def temp_linked_instances_collection_name(ids: list[str], types: list[str]) -> s return f"_runtime.tmp.linked_instances.{hash_from_ids_and_types(ids=ids,types=types)}.{ObjectId()}" -def drop_stale_temp_linked_instances_collections(): +def drop_stale_temp_linked_instances_collections() -> None: """Drop any temporary linked-instances collections that were generated earlier than one day ago.""" mdb = get_mongo_db() one_day_ago = now() - timedelta(days=1) From 2dfdc943c8b771b3393c775fab7e4be6efcf9ef4 Mon Sep 17 00:00:00 2001 From: Donny Winston Date: Wed, 24 Sep 2025 12:47:33 +0200 Subject: [PATCH 3/6] fix: attempt to not require backgroundtask param for tests to directly call endpoint function. --- nmdc_runtime/api/endpoints/nmdcschema.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/nmdc_runtime/api/endpoints/nmdcschema.py b/nmdc_runtime/api/endpoints/nmdcschema.py index e5eb18237..8670dd2de 100644 --- a/nmdc_runtime/api/endpoints/nmdcschema.py +++ b/nmdc_runtime/api/endpoints/nmdcschema.py @@ -3,12 +3,14 @@ from typing import List, Dict, Annotated import pymongo -from fastapi import APIRouter, Depends, HTTPException, Path, Query, BackgroundTasks +from fastapi import APIRouter, Depends, HTTPException, Path, Query +from fastapi.responses import JSONResponse from pydantic import AfterValidator from refscan.lib.helpers import ( get_collection_names_from_schema, get_names_of_classes_eligible_for_collection, ) +from starlette.background import BackgroundTask from nmdc_runtime.api.endpoints.lib.linked_instances import ( gather_linked_instances, @@ -123,12 +125,15 @@ def get_nmdc_database_collection_stats( @decorate_if(condition=IS_LINKED_INSTANCES_ENDPOINT_ENABLED)( router.get( "/nmdcschema/linked_instances", - response_model=ListResponse[Doc], + responses={ + status.HTTP_200_OK: { + "model": ListResponse[Doc], + } + }, response_model_exclude_unset=True, ) ) def get_linked_instances( - background_tasks: BackgroundTasks, ids: Annotated[ list[str], Query( @@ -224,14 +229,14 @@ class definition ([linkml:ClassDefinition](https://w3id.org/linkml/ClassDefiniti [nmdc:Sample](https://w3id.org/nmdc/Sample), etc. -- may be given. If no value for `types` is given, then all [nmdc:NamedThing](https://w3id.org/nmdc/NamedThing)s are returned. """ - background_tasks.add_task(drop_stale_temp_linked_instances_collections) + background_task = BackgroundTask(drop_stale_temp_linked_instances_collections) if page_token is not None: rv = list_resources( req=ListRequest(page_token=page_token, max_page_size=max_page_size), mdb=mdb ) rv["resources"] = hydrated(rv["resources"], mdb) if hydrate else rv["resources"] rv["resources"] = [strip_oid(d) for d in rv["resources"]] - return rv + return JSONResponse(rv, background=background_task) ids_found = [d["id"] for d in mdb.alldocs.find({"id": {"$in": ids}}, {"id": 1})] ids_not_found = list(set(ids) - set(ids_found)) @@ -266,7 +271,7 @@ class definition ([linkml:ClassDefinition](https://w3id.org/linkml/ClassDefiniti ) rv["resources"] = hydrated(rv["resources"], mdb) if hydrate else rv["resources"] rv["resources"] = [strip_oid(d) for d in rv["resources"]] - return rv + return JSONResponse(rv, background=background_task) @router.get( From b8a55e3c9ddd59981e5133fe5258c79e26412ae5 Mon Sep 17 00:00:00 2001 From: eecavanna Date: Tue, 4 Nov 2025 00:26:36 -0800 Subject: [PATCH 4/6] Simplify `now()` function so it always returns a `datetime` --- nmdc_runtime/api/core/util.py | 11 ++++++++--- nmdc_runtime/api/models/run.py | 10 +++++----- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/nmdc_runtime/api/core/util.py b/nmdc_runtime/api/core/util.py index e8edd496d..c83aa23db 100644 --- a/nmdc_runtime/api/core/util.py +++ b/nmdc_runtime/api/core/util.py @@ -52,9 +52,14 @@ def raise404_if_none(doc, detail="Not found"): return doc -def now(as_str=False): - dt = datetime.now(timezone.utc) - return dt.isoformat() if as_str else dt +def now() -> datetime: + """Get a `datetime` representing the current time in UTC.""" + return datetime.now(timezone.utc) + + +def now_str() -> str: + """Get an ISO string representing the current time in UTC.""" + return now().isoformat() def expiry_dt_from_now(days=0, hours=0, minutes=0, seconds=0): diff --git a/nmdc_runtime/api/models/run.py b/nmdc_runtime/api/models/run.py index 3cdf43d16..b56ec4f0f 100644 --- a/nmdc_runtime/api/models/run.py +++ b/nmdc_runtime/api/models/run.py @@ -9,7 +9,7 @@ from toolz import merge from nmdc_runtime.api.core.idgen import generate_one_id -from nmdc_runtime.api.core.util import now, raise404_if_none, pick +from nmdc_runtime.api.core.util import now, now_str, raise404_if_none, pick from nmdc_runtime.api.models.user import User PRODUCER_URL_BASE_DEFAULT = ( @@ -90,7 +90,7 @@ def _add_run_requested_event(run_spec: RunUserSpec, mdb: MongoDatabase, user: Us {"producer": PRODUCER_URL, "schemaURL": SCHEMA_URL}, ), type=RunEventType.REQUESTED, - time=now(as_str=True), + time=now_str(), inputs=run_spec.inputs, ) mdb.run_events.insert_one(event.model_dump()) @@ -112,7 +112,7 @@ def _add_run_started_event(run_id: str, mdb: MongoDatabase): run=requested.run, job=requested.job, type=RunEventType.STARTED, - time=now(as_str=True), + time=now_str(), ).model_dump() ) return run_id @@ -133,7 +133,7 @@ def _add_run_fail_event(run_id: str, mdb: MongoDatabase): run=requested.run, job=requested.job, type=RunEventType.FAIL, - time=now(as_str=True), + time=now_str(), ).model_dump() ) return run_id @@ -154,7 +154,7 @@ def _add_run_complete_event(run_id: str, mdb: MongoDatabase, outputs: List[str]) run=started.run, job=started.job, type=RunEventType.COMPLETE, - time=now(as_str=True), + time=now_str(), outputs=outputs, ).model_dump() ) From 4f5a112a64ae641c176f52b3eccba75b072d7af5 Mon Sep 17 00:00:00 2001 From: eecavanna Date: Tue, 4 Nov 2025 01:02:27 -0800 Subject: [PATCH 5/6] Use `BackgroundTasks` as shown in FastAPI docs instead of Starlette docs --- nmdc_runtime/api/endpoints/nmdcschema.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/nmdc_runtime/api/endpoints/nmdcschema.py b/nmdc_runtime/api/endpoints/nmdcschema.py index 8670dd2de..bcd965dd9 100644 --- a/nmdc_runtime/api/endpoints/nmdcschema.py +++ b/nmdc_runtime/api/endpoints/nmdcschema.py @@ -3,14 +3,12 @@ from typing import List, Dict, Annotated import pymongo -from fastapi import APIRouter, Depends, HTTPException, Path, Query -from fastapi.responses import JSONResponse +from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException, Path, Query from pydantic import AfterValidator from refscan.lib.helpers import ( get_collection_names_from_schema, get_names_of_classes_eligible_for_collection, ) -from starlette.background import BackgroundTask from nmdc_runtime.api.endpoints.lib.linked_instances import ( gather_linked_instances, @@ -186,6 +184,10 @@ def get_linked_instances( ), ] = 20, mdb: MongoDatabase = Depends(get_mongo_db), + # FastAPI will inject this `background_tasks` argument, to which we can add background tasks + # for FastAPI to run after it returns the HTTP response. + # Reference: https://fastapi.tiangolo.com/tutorial/background-tasks/ + background_tasks: BackgroundTasks = BackgroundTasks(), ): """ Retrieves database instances that are both (a) linked to any of `ids`, and (b) of a type in `types`. @@ -229,14 +231,14 @@ class definition ([linkml:ClassDefinition](https://w3id.org/linkml/ClassDefiniti [nmdc:Sample](https://w3id.org/nmdc/Sample), etc. -- may be given. If no value for `types` is given, then all [nmdc:NamedThing](https://w3id.org/nmdc/NamedThing)s are returned. """ - background_task = BackgroundTask(drop_stale_temp_linked_instances_collections) + background_tasks.add_task(drop_stale_temp_linked_instances_collections) if page_token is not None: rv = list_resources( req=ListRequest(page_token=page_token, max_page_size=max_page_size), mdb=mdb ) rv["resources"] = hydrated(rv["resources"], mdb) if hydrate else rv["resources"] rv["resources"] = [strip_oid(d) for d in rv["resources"]] - return JSONResponse(rv, background=background_task) + return rv ids_found = [d["id"] for d in mdb.alldocs.find({"id": {"$in": ids}}, {"id": 1})] ids_not_found = list(set(ids) - set(ids_found)) @@ -271,7 +273,7 @@ class definition ([linkml:ClassDefinition](https://w3id.org/linkml/ClassDefiniti ) rv["resources"] = hydrated(rv["resources"], mdb) if hydrate else rv["resources"] rv["resources"] = [strip_oid(d) for d in rv["resources"]] - return JSONResponse(rv, background=background_task) + return rv @router.get( From c32b1466dec8ca7ecb948314be0a060d8c0ce916 Mon Sep 17 00:00:00 2001 From: eecavanna Date: Tue, 4 Nov 2025 01:35:48 -0800 Subject: [PATCH 6/6] Add link to helpful StackOverflow page --- nmdc_runtime/api/endpoints/nmdcschema.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/nmdc_runtime/api/endpoints/nmdcschema.py b/nmdc_runtime/api/endpoints/nmdcschema.py index bcd965dd9..f7e4a1d9a 100644 --- a/nmdc_runtime/api/endpoints/nmdcschema.py +++ b/nmdc_runtime/api/endpoints/nmdcschema.py @@ -186,7 +186,9 @@ def get_linked_instances( mdb: MongoDatabase = Depends(get_mongo_db), # FastAPI will inject this `background_tasks` argument, to which we can add background tasks # for FastAPI to run after it returns the HTTP response. - # Reference: https://fastapi.tiangolo.com/tutorial/background-tasks/ + # References: + # - https://fastapi.tiangolo.com/tutorial/background-tasks/ (RE: `BackgroundTasks`) + # - https://stackoverflow.com/a/68807219 (RE: how to specify it after some optional parameters) background_tasks: BackgroundTasks = BackgroundTasks(), ): """