From 78ca86deea5333a9fbe62eb2cce7e3b523d5e6d0 Mon Sep 17 00:00:00 2001 From: ascibisz Date: Wed, 22 Oct 2025 13:32:55 -0700 Subject: [PATCH 01/21] add upload script --- cellpack/bin/upload_to_client.py | 51 ++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) create mode 100644 cellpack/bin/upload_to_client.py diff --git a/cellpack/bin/upload_to_client.py b/cellpack/bin/upload_to_client.py new file mode 100644 index 00000000..93d94f3a --- /dev/null +++ b/cellpack/bin/upload_to_client.py @@ -0,0 +1,51 @@ +import json +import fire + +from cellpack.autopack.FirebaseHandler import FirebaseHandler +from cellpack.autopack.DBRecipeHandler import DBUploader +from cellpack.autopack.loaders.config_loader import ConfigLoader +from cellpack.autopack.loaders.recipe_loader import RecipeLoader +from cellpack.bin.upload import get_recipe_metadata + +def upload_to_client( + recipe: str, + config: str, + fields: str, + name: str +): + """ + Uploads recipe, config, and editable fields, read from specified + JSON files, to the database for client access + """ + db_handler = FirebaseHandler() + recipe_id = "" + config_id = "" + editable_fields_ids = [] + if FirebaseHandler._initialized: + db_handler = DBUploader(db_handler) + if recipe: + recipe_loader = RecipeLoader(recipe) + recipe_full_data = recipe_loader._read(resolve_inheritance=False) + recipe_meta_data = get_recipe_metadata(recipe_loader) + recipe_id = db_handler.upload_recipe(recipe_meta_data, recipe_full_data) + if config: + config_data = ConfigLoader(config).config + config_id = db_handler.upload_config(config_data, config) + if fields: + editable_fields_data = json.load(open(fields, "r")) + for field in editable_fields_data.get("editable_fields", []): + id, _ = db_handler.upload_data("editable_fields", field) + editable_fields_ids.append(id) + recipe_metadata = { + "name": name, + "recipe": recipe_id, + "config": config_id, + "editable_fields": editable_fields_ids, + } + db_handler.upload_data("client_recipes", recipe_metadata) + +def main(): + fire.Fire(upload_to_client) + +if __name__ == "__main__": + main() From 78b9b0a756443d7aa07d359ba63225fdfdb999a6 Mon Sep 17 00:00:00 2001 From: ascibisz Date: Wed, 22 Oct 2025 13:40:00 -0700 Subject: [PATCH 02/21] add example data and more documentation --- cellpack/bin/upload_to_client.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/cellpack/bin/upload_to_client.py b/cellpack/bin/upload_to_client.py index 93d94f3a..9536defc 100644 --- a/cellpack/bin/upload_to_client.py +++ b/cellpack/bin/upload_to_client.py @@ -16,6 +16,15 @@ def upload_to_client( """ Uploads recipe, config, and editable fields, read from specified JSON files, to the database for client access + + :param recipe: string argument + path to local recipe file to upload to firebase + :param config: string argument + path to local config file to upload to firebase + :param fields: string argument + path to local editable fields file to upload to firebase + :param name: string argument + display name for recipe in client selection menu """ db_handler = FirebaseHandler() recipe_id = "" From a9b056bc0919bfacd7bfb3c3cb29f4c912fa996e Mon Sep 17 00:00:00 2001 From: ascibisz Date: Wed, 22 Oct 2025 13:42:16 -0700 Subject: [PATCH 03/21] point to correct collection --- cellpack/bin/upload_to_client.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cellpack/bin/upload_to_client.py b/cellpack/bin/upload_to_client.py index 9536defc..f778aa53 100644 --- a/cellpack/bin/upload_to_client.py +++ b/cellpack/bin/upload_to_client.py @@ -51,7 +51,9 @@ def upload_to_client( "config": config_id, "editable_fields": editable_fields_ids, } - db_handler.upload_data("client_recipes", recipe_metadata) + + # Upload the combined recipe metadata to example_packings collection for client + db_handler.upload_data("example_packings", recipe_metadata) def main(): fire.Fire(upload_to_client) From f5f7a6910a91aea4089ff861264dea6127c24c4c Mon Sep 17 00:00:00 2001 From: ascibisz Date: Wed, 22 Oct 2025 14:29:03 -0700 Subject: [PATCH 04/21] have server accept recipe as json object in body of request --- cellpack/autopack/__init__.py | 8 +++++++- cellpack/autopack/loaders/recipe_loader.py | 5 +++-- cellpack/bin/pack.py | 4 ++-- docker/server.py | 14 +++++++++----- 4 files changed, 21 insertions(+), 10 deletions(-) diff --git a/cellpack/autopack/__init__.py b/cellpack/autopack/__init__.py index 2219525b..4bfd996e 100755 --- a/cellpack/autopack/__init__.py +++ b/cellpack/autopack/__init__.py @@ -411,8 +411,14 @@ def read_text_file(filename, destination="", cache="collisionTrees", force=None) def load_file( - filename, destination="", cache="geometries", force=None, use_docker=False + filename, destination="", cache="geometries", force=None, use_docker=False, recipe_obj=None ): + if recipe_obj is not None: + composition = DBRecipeLoader.remove_empty( + recipe_obj.get("composition", {}) + ) + recipe_obj["composition"] = composition + return recipe_obj, None, False if is_remote_path(filename): database_name, file_path = convert_db_shortname_to_url(filename) if database_name == DATABASE_IDS.GITHUB: diff --git a/cellpack/autopack/loaders/recipe_loader.py b/cellpack/autopack/loaders/recipe_loader.py index 84cd78ac..4b2930c8 100644 --- a/cellpack/autopack/loaders/recipe_loader.py +++ b/cellpack/autopack/loaders/recipe_loader.py @@ -30,7 +30,7 @@ class RecipeLoader(object): # TODO: add all default values here default_values = default_recipe_values.copy() - def __init__(self, input_file_path, save_converted_recipe=False, use_docker=False): + def __init__(self, input_file_path, save_converted_recipe=False, use_docker=False, recipe_obj=None): _, file_extension = os.path.splitext(input_file_path) self.current_version = CURRENT_VERSION self.file_path = input_file_path @@ -38,6 +38,7 @@ def __init__(self, input_file_path, save_converted_recipe=False, use_docker=Fals self.ingredient_list = [] self.compartment_list = [] self.save_converted_recipe = save_converted_recipe + self.recipe_obj = recipe_obj # set CURRENT_RECIPE_PATH appropriately for remote(firebase) vs local recipes if autopack.is_remote_path(self.file_path): @@ -169,7 +170,7 @@ def _migrate_version(self, old_recipe): def _read(self, resolve_inheritance=True, use_docker=False): new_values, database_name, is_unnested_firebase = autopack.load_file( - self.file_path, cache="recipes", use_docker=use_docker + self.file_path, cache="recipes", use_docker=use_docker, recipe_obj=self.recipe_obj ) if database_name == "firebase": if is_unnested_firebase: diff --git a/cellpack/bin/pack.py b/cellpack/bin/pack.py index 9db53937..fdbfede0 100644 --- a/cellpack/bin/pack.py +++ b/cellpack/bin/pack.py @@ -25,7 +25,7 @@ def pack( - recipe, config_path=None, analysis_config_path=None, docker=False, validate=True + recipe, config_path=None, analysis_config_path=None, docker=False, validate=True, recipe_str=None ): """ Initializes an autopack packing from the command line @@ -40,7 +40,7 @@ def pack( packing_config_data = ConfigLoader(config_path, docker).config recipe_loader = RecipeLoader( - recipe, packing_config_data["save_converted_recipe"], docker + recipe, packing_config_data["save_converted_recipe"], docker, recipe_str ) recipe_data = recipe_loader.recipe_data analysis_config_data = {} diff --git a/docker/server.py b/docker/server.py index 581e0151..6e625314 100644 --- a/docker/server.py +++ b/docker/server.py @@ -12,11 +12,11 @@ class CellpackServer: def __init__(self): self.packing_tasks = set() - async def run_packing(self, recipe, config, job_id): + async def run_packing(self, recipe, config, job_id, body=None): os.environ["AWS_BATCH_JOB_ID"] = job_id self.update_job_status(job_id, "RUNNING") try: - pack(recipe=recipe, config_path=config, docker=True) + pack(recipe=recipe, config_path=config, docker=True, recipe_str=body) except Exception as e: self.update_job_status(job_id, "FAILED", error_message=str(e)) @@ -37,8 +37,12 @@ async def health_check(self, request: web.Request) -> web.Response: return web.Response() async def pack_handler(self, request: web.Request) -> web.Response: - recipe = request.rel_url.query.get("recipe") - if recipe is None: + recipe = request.rel_url.query.get("recipe") or "" + if request.can_read_body: + body = await request.json() + else: + body = None + if not recipe and not body: raise web.HTTPBadRequest( "Pack requests must include recipe as a query param" ) @@ -46,7 +50,7 @@ async def pack_handler(self, request: web.Request) -> web.Response: job_id = str(uuid.uuid4()) # Initiate packing task to run in background - packing_task = asyncio.create_task(self.run_packing(recipe, config, job_id)) + packing_task = asyncio.create_task(self.run_packing(recipe, config, job_id, body)) # Keep track of task references to prevent them from being garbage # collected, then discard after task completion From f87915ac4c8ee4137333ee8ce1d721214446ad23 Mon Sep 17 00:00:00 2001 From: ascibisz Date: Wed, 22 Oct 2025 14:29:49 -0700 Subject: [PATCH 05/21] update documentation --- docker/Dockerfile.ecs | 3 ++- docs/DOCKER.md | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/docker/Dockerfile.ecs b/docker/Dockerfile.ecs index 7a303c02..2016c222 100644 --- a/docker/Dockerfile.ecs +++ b/docker/Dockerfile.ecs @@ -4,7 +4,8 @@ WORKDIR /cellpack COPY . /cellpack RUN python -m pip install --upgrade pip --root-user-action=ignore -RUN python -m pip install . -r requirements/linux/requirements.txt --root-user-action=ignore +RUN python -m pip install . +RUN python -m pip install aiohttp mdutils EXPOSE 80 diff --git a/docs/DOCKER.md b/docs/DOCKER.md index a1214a33..48e4b6dd 100644 --- a/docs/DOCKER.md +++ b/docs/DOCKER.md @@ -13,6 +13,6 @@ ## AWS ECS Docker Image 1. Build image, running `docker build -f docker/Dockerfile.ecs -t [CONTAINER-NAME] .` 2. Run packings in the container, running: `docker run -v ~/.aws:/root/.aws -p 80:80 [CONTAINER-NAME]` -3. Try hitting the test endpoint on the server, by navigating to `http://0.0.0.0:8443/hello` in your browser. -4. Try running a packing on the server, by hitting the `http://0.0.0.0:80/pack?recipe=firebase:recipes/one_sphere_v_1.0.0` in your browser. +3. Try hitting the test endpoint on the server, by navigating to `http://0.0.0.0:80/hello` in your browser. +4. Try running a packing on the server, by hitting the `http://0.0.0.0:80/start-packing?recipe=firebase:recipes/one_sphere_v_1.0.0` in your browser. 5. Verify that the packing result path was uploaded to the firebase results table, with the job id specified in the response from the request in step 4.The result simularium file can be found at the s3 path specified there. \ No newline at end of file From 1f2d2e345523914d135379c826bdc00e51fbe6c0 Mon Sep 17 00:00:00 2001 From: ascibisz Date: Wed, 22 Oct 2025 16:41:35 -0700 Subject: [PATCH 06/21] remove accidential dockerfile changes --- docker/Dockerfile.ecs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/docker/Dockerfile.ecs b/docker/Dockerfile.ecs index 2016c222..5ed89b7b 100644 --- a/docker/Dockerfile.ecs +++ b/docker/Dockerfile.ecs @@ -4,8 +4,7 @@ WORKDIR /cellpack COPY . /cellpack RUN python -m pip install --upgrade pip --root-user-action=ignore -RUN python -m pip install . -RUN python -m pip install aiohttp mdutils +RUN python -m pip install . --root-user-action=ignore EXPOSE 80 From bd8ec42d33b005ec06316de1a386478eb4cd24f7 Mon Sep 17 00:00:00 2001 From: ascibisz Date: Thu, 23 Oct 2025 13:42:23 -0700 Subject: [PATCH 07/21] rename param json_recipe --- cellpack/autopack/__init__.py | 10 +++++----- cellpack/autopack/loaders/recipe_loader.py | 6 +++--- cellpack/bin/data-manifest.json | 10 ++++++++++ cellpack/bin/pack.py | 4 ++-- docker/server.py | 2 +- 5 files changed, 21 insertions(+), 11 deletions(-) create mode 100644 cellpack/bin/data-manifest.json diff --git a/cellpack/autopack/__init__.py b/cellpack/autopack/__init__.py index 4bfd996e..65d32078 100755 --- a/cellpack/autopack/__init__.py +++ b/cellpack/autopack/__init__.py @@ -411,14 +411,14 @@ def read_text_file(filename, destination="", cache="collisionTrees", force=None) def load_file( - filename, destination="", cache="geometries", force=None, use_docker=False, recipe_obj=None + filename, destination="", cache="geometries", force=None, use_docker=False, json_recipe=None ): - if recipe_obj is not None: + if json_recipe is not None: composition = DBRecipeLoader.remove_empty( - recipe_obj.get("composition", {}) + json_recipe.get("composition", {}) ) - recipe_obj["composition"] = composition - return recipe_obj, None, False + json_recipe["composition"] = composition + return json_recipe, None, False if is_remote_path(filename): database_name, file_path = convert_db_shortname_to_url(filename) if database_name == DATABASE_IDS.GITHUB: diff --git a/cellpack/autopack/loaders/recipe_loader.py b/cellpack/autopack/loaders/recipe_loader.py index 4b2930c8..23044443 100644 --- a/cellpack/autopack/loaders/recipe_loader.py +++ b/cellpack/autopack/loaders/recipe_loader.py @@ -30,7 +30,7 @@ class RecipeLoader(object): # TODO: add all default values here default_values = default_recipe_values.copy() - def __init__(self, input_file_path, save_converted_recipe=False, use_docker=False, recipe_obj=None): + def __init__(self, input_file_path, save_converted_recipe=False, use_docker=False, json_recipe=None): _, file_extension = os.path.splitext(input_file_path) self.current_version = CURRENT_VERSION self.file_path = input_file_path @@ -38,7 +38,7 @@ def __init__(self, input_file_path, save_converted_recipe=False, use_docker=Fals self.ingredient_list = [] self.compartment_list = [] self.save_converted_recipe = save_converted_recipe - self.recipe_obj = recipe_obj + self.json_recipe = json_recipe # set CURRENT_RECIPE_PATH appropriately for remote(firebase) vs local recipes if autopack.is_remote_path(self.file_path): @@ -170,7 +170,7 @@ def _migrate_version(self, old_recipe): def _read(self, resolve_inheritance=True, use_docker=False): new_values, database_name, is_unnested_firebase = autopack.load_file( - self.file_path, cache="recipes", use_docker=use_docker, recipe_obj=self.recipe_obj + self.file_path, cache="recipes", use_docker=use_docker, json_recipe=self.json_recipe ) if database_name == "firebase": if is_unnested_firebase: diff --git a/cellpack/bin/data-manifest.json b/cellpack/bin/data-manifest.json new file mode 100644 index 00000000..0a5e1f2d --- /dev/null +++ b/cellpack/bin/data-manifest.json @@ -0,0 +1,10 @@ +{ + "assay-dev": { + "data_source": "https://s3-us-west-2.amazonaws.com/file-download-service.allencell.org/assay-dev_2018-10-03.csv?versionId=XVdmE.6g1kk77c7jYA2Ge54eehTjY_AP", + "static_files": [] + }, + "test": { + "data_source": "https://cellpack-demo.s3.us-west-2.amazonaws.com/alli-test/test-manifest.csv", + "static_files": [] + } +} \ No newline at end of file diff --git a/cellpack/bin/pack.py b/cellpack/bin/pack.py index fdbfede0..dadf0f31 100644 --- a/cellpack/bin/pack.py +++ b/cellpack/bin/pack.py @@ -25,7 +25,7 @@ def pack( - recipe, config_path=None, analysis_config_path=None, docker=False, validate=True, recipe_str=None + recipe, config_path=None, analysis_config_path=None, docker=False, validate=True, json_recipe=None ): """ Initializes an autopack packing from the command line @@ -40,7 +40,7 @@ def pack( packing_config_data = ConfigLoader(config_path, docker).config recipe_loader = RecipeLoader( - recipe, packing_config_data["save_converted_recipe"], docker, recipe_str + recipe, packing_config_data["save_converted_recipe"], docker, json_recipe ) recipe_data = recipe_loader.recipe_data analysis_config_data = {} diff --git a/docker/server.py b/docker/server.py index 6e625314..ce6da7ee 100644 --- a/docker/server.py +++ b/docker/server.py @@ -16,7 +16,7 @@ async def run_packing(self, recipe, config, job_id, body=None): os.environ["AWS_BATCH_JOB_ID"] = job_id self.update_job_status(job_id, "RUNNING") try: - pack(recipe=recipe, config_path=config, docker=True, recipe_str=body) + pack(recipe=recipe, config_path=config, docker=True, json_recipe=body) except Exception as e: self.update_job_status(job_id, "FAILED", error_message=str(e)) From 358158eaa5aae8641d8c4c14574a03dbe1a3cb65 Mon Sep 17 00:00:00 2001 From: ascibisz Date: Fri, 9 Jan 2026 11:42:35 -0800 Subject: [PATCH 08/21] remove file that shouldn't be in this PR --- cellpack/bin/upload_to_client.py | 62 -------------------------------- 1 file changed, 62 deletions(-) delete mode 100644 cellpack/bin/upload_to_client.py diff --git a/cellpack/bin/upload_to_client.py b/cellpack/bin/upload_to_client.py deleted file mode 100644 index f778aa53..00000000 --- a/cellpack/bin/upload_to_client.py +++ /dev/null @@ -1,62 +0,0 @@ -import json -import fire - -from cellpack.autopack.FirebaseHandler import FirebaseHandler -from cellpack.autopack.DBRecipeHandler import DBUploader -from cellpack.autopack.loaders.config_loader import ConfigLoader -from cellpack.autopack.loaders.recipe_loader import RecipeLoader -from cellpack.bin.upload import get_recipe_metadata - -def upload_to_client( - recipe: str, - config: str, - fields: str, - name: str -): - """ - Uploads recipe, config, and editable fields, read from specified - JSON files, to the database for client access - - :param recipe: string argument - path to local recipe file to upload to firebase - :param config: string argument - path to local config file to upload to firebase - :param fields: string argument - path to local editable fields file to upload to firebase - :param name: string argument - display name for recipe in client selection menu - """ - db_handler = FirebaseHandler() - recipe_id = "" - config_id = "" - editable_fields_ids = [] - if FirebaseHandler._initialized: - db_handler = DBUploader(db_handler) - if recipe: - recipe_loader = RecipeLoader(recipe) - recipe_full_data = recipe_loader._read(resolve_inheritance=False) - recipe_meta_data = get_recipe_metadata(recipe_loader) - recipe_id = db_handler.upload_recipe(recipe_meta_data, recipe_full_data) - if config: - config_data = ConfigLoader(config).config - config_id = db_handler.upload_config(config_data, config) - if fields: - editable_fields_data = json.load(open(fields, "r")) - for field in editable_fields_data.get("editable_fields", []): - id, _ = db_handler.upload_data("editable_fields", field) - editable_fields_ids.append(id) - recipe_metadata = { - "name": name, - "recipe": recipe_id, - "config": config_id, - "editable_fields": editable_fields_ids, - } - - # Upload the combined recipe metadata to example_packings collection for client - db_handler.upload_data("example_packings", recipe_metadata) - -def main(): - fire.Fire(upload_to_client) - -if __name__ == "__main__": - main() From f0beaa1fc28cfb0242bfc428e8d7879716f3d7bd Mon Sep 17 00:00:00 2001 From: ascibisz Date: Fri, 9 Jan 2026 11:43:04 -0800 Subject: [PATCH 09/21] remove accidential file --- cellpack/bin/data-manifest.json | 10 ---------- 1 file changed, 10 deletions(-) delete mode 100644 cellpack/bin/data-manifest.json diff --git a/cellpack/bin/data-manifest.json b/cellpack/bin/data-manifest.json deleted file mode 100644 index 0a5e1f2d..00000000 --- a/cellpack/bin/data-manifest.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "assay-dev": { - "data_source": "https://s3-us-west-2.amazonaws.com/file-download-service.allencell.org/assay-dev_2018-10-03.csv?versionId=XVdmE.6g1kk77c7jYA2Ge54eehTjY_AP", - "static_files": [] - }, - "test": { - "data_source": "https://cellpack-demo.s3.us-west-2.amazonaws.com/alli-test/test-manifest.csv", - "static_files": [] - } -} \ No newline at end of file From a54ffa1d8393f89d2afb2926864bd1381e7870f2 Mon Sep 17 00:00:00 2001 From: ascibisz Date: Fri, 9 Jan 2026 11:52:19 -0800 Subject: [PATCH 10/21] lint fixes --- cellpack/autopack/__init__.py | 11 +++++++---- cellpack/autopack/loaders/recipe_loader.py | 13 +++++++++++-- cellpack/bin/pack.py | 7 ++++++- 3 files changed, 24 insertions(+), 7 deletions(-) diff --git a/cellpack/autopack/__init__.py b/cellpack/autopack/__init__.py index 65d32078..ce06494d 100755 --- a/cellpack/autopack/__init__.py +++ b/cellpack/autopack/__init__.py @@ -411,12 +411,15 @@ def read_text_file(filename, destination="", cache="collisionTrees", force=None) def load_file( - filename, destination="", cache="geometries", force=None, use_docker=False, json_recipe=None + filename, + destination="", + cache="geometries", + force=None, + use_docker=False, + json_recipe=None, ): if json_recipe is not None: - composition = DBRecipeLoader.remove_empty( - json_recipe.get("composition", {}) - ) + composition = DBRecipeLoader.remove_empty(json_recipe.get("composition", {})) json_recipe["composition"] = composition return json_recipe, None, False if is_remote_path(filename): diff --git a/cellpack/autopack/loaders/recipe_loader.py b/cellpack/autopack/loaders/recipe_loader.py index 23044443..cf87a10c 100644 --- a/cellpack/autopack/loaders/recipe_loader.py +++ b/cellpack/autopack/loaders/recipe_loader.py @@ -30,7 +30,13 @@ class RecipeLoader(object): # TODO: add all default values here default_values = default_recipe_values.copy() - def __init__(self, input_file_path, save_converted_recipe=False, use_docker=False, json_recipe=None): + def __init__( + self, + input_file_path, + save_converted_recipe=False, + use_docker=False, + json_recipe=None, + ): _, file_extension = os.path.splitext(input_file_path) self.current_version = CURRENT_VERSION self.file_path = input_file_path @@ -170,7 +176,10 @@ def _migrate_version(self, old_recipe): def _read(self, resolve_inheritance=True, use_docker=False): new_values, database_name, is_unnested_firebase = autopack.load_file( - self.file_path, cache="recipes", use_docker=use_docker, json_recipe=self.json_recipe + self.file_path, + cache="recipes", + use_docker=use_docker, + json_recipe=self.json_recipe, ) if database_name == "firebase": if is_unnested_firebase: diff --git a/cellpack/bin/pack.py b/cellpack/bin/pack.py index dadf0f31..99186c9d 100644 --- a/cellpack/bin/pack.py +++ b/cellpack/bin/pack.py @@ -25,7 +25,12 @@ def pack( - recipe, config_path=None, analysis_config_path=None, docker=False, validate=True, json_recipe=None + recipe, + config_path=None, + analysis_config_path=None, + docker=False, + validate=True, + json_recipe=None, ): """ Initializes an autopack packing from the command line From 3d01db317181e78eb0b9c928b6959657d4b9b778 Mon Sep 17 00:00:00 2001 From: ascibisz Date: Wed, 21 Jan 2026 14:52:55 -0800 Subject: [PATCH 11/21] refactor to try to improve clarity of json recipe vs file path --- cellpack/autopack/__init__.py | 9 ++--- cellpack/autopack/loaders/recipe_loader.py | 17 +++++--- cellpack/bin/pack.py | 45 ++++++++++++++++++++-- docker/server.py | 7 +++- 4 files changed, 62 insertions(+), 16 deletions(-) diff --git a/cellpack/autopack/__init__.py b/cellpack/autopack/__init__.py index ce06494d..4520d388 100755 --- a/cellpack/autopack/__init__.py +++ b/cellpack/autopack/__init__.py @@ -409,6 +409,10 @@ def read_text_file(filename, destination="", cache="collisionTrees", force=None) f.close() return sphere_data +def load_json_recipe(json_recipe): + composition = DBRecipeLoader.remove_empty(json_recipe.get("composition", {})) + json_recipe["composition"] = composition + return json_recipe def load_file( filename, @@ -416,12 +420,7 @@ def load_file( cache="geometries", force=None, use_docker=False, - json_recipe=None, ): - if json_recipe is not None: - composition = DBRecipeLoader.remove_empty(json_recipe.get("composition", {})) - json_recipe["composition"] = composition - return json_recipe, None, False if is_remote_path(filename): database_name, file_path = convert_db_shortname_to_url(filename) if database_name == DATABASE_IDS.GITHUB: diff --git a/cellpack/autopack/loaders/recipe_loader.py b/cellpack/autopack/loaders/recipe_loader.py index cf87a10c..df9e26ac 100644 --- a/cellpack/autopack/loaders/recipe_loader.py +++ b/cellpack/autopack/loaders/recipe_loader.py @@ -175,12 +175,17 @@ def _migrate_version(self, old_recipe): ) def _read(self, resolve_inheritance=True, use_docker=False): - new_values, database_name, is_unnested_firebase = autopack.load_file( - self.file_path, - cache="recipes", - use_docker=use_docker, - json_recipe=self.json_recipe, - ) + database_name = None + is_unnested_firebase = False + if self.json_recipe is not None: + new_values = autopack.load_json_recipe(self.json_recipe) + else: + new_values, database_name, is_unnested_firebase = autopack.load_file( + self.file_path, + cache="recipes", + use_docker=use_docker, + ) + if database_name == "firebase": if is_unnested_firebase: objects = new_values.get("objects", {}) diff --git a/cellpack/bin/pack.py b/cellpack/bin/pack.py index 99186c9d..39003eda 100644 --- a/cellpack/bin/pack.py +++ b/cellpack/bin/pack.py @@ -30,7 +30,6 @@ def pack( analysis_config_path=None, docker=False, validate=True, - json_recipe=None, ): """ Initializes an autopack packing from the command line @@ -42,12 +41,52 @@ def pack( :return: void """ - packing_config_data = ConfigLoader(config_path, docker).config + config_loader = ConfigLoader(config_path, docker) recipe_loader = RecipeLoader( - recipe, packing_config_data["save_converted_recipe"], docker, json_recipe + recipe, config_loader.config["save_converted_recipe"], docker ) + return run_packing( + recipe_loader, + config_loader, + analysis_config_path, + docker, + validate + ) + +def pack_from_json( + json_recipe, + config_path=None, + analysis_config_path=None, + docker=False, + validate=True, +): + """ + Initializes an autopack packing from the command line + :param json: JSON object representing the recipe + :param config_path: string argument, path to packing config file + :param analysis_config_path: string argument, path to analysis config file + :param docker: boolean argument, are we using docker + :param validate: boolean argument, validate recipe before packing + + :return: void + """ + config_loader = ConfigLoader(config_path, docker) + + recipe_loader = RecipeLoader( + "", config_loader.config["save_converted_recipe"], docker, json_recipe + ) + return run_packing( + recipe_loader, + config_loader, + analysis_config_path, + docker, + validate + ) + +def run_packing(recipe_loader, config_loader, analysis_config_path, docker, validate): recipe_data = recipe_loader.recipe_data + packing_config_data = config_loader.config analysis_config_data = {} if analysis_config_path is not None: analysis_config_data = AnalysisConfigLoader(analysis_config_path).config diff --git a/docker/server.py b/docker/server.py index ce6da7ee..6bbf338c 100644 --- a/docker/server.py +++ b/docker/server.py @@ -4,7 +4,7 @@ import uuid from cellpack.autopack.DBRecipeHandler import DBUploader from cellpack.autopack.interface_objects.database_ids import DATABASE_IDS -from cellpack.bin.pack import pack +from cellpack.bin.pack import pack, pack_from_json SERVER_PORT = 80 @@ -16,7 +16,10 @@ async def run_packing(self, recipe, config, job_id, body=None): os.environ["AWS_BATCH_JOB_ID"] = job_id self.update_job_status(job_id, "RUNNING") try: - pack(recipe=recipe, config_path=config, docker=True, json_recipe=body) + if body is None: + pack(recipe=recipe, config_path=config, docker=True) + else: + pack_from_json(json_recipe=body, config_path=config, docker=True) except Exception as e: self.update_job_status(job_id, "FAILED", error_message=str(e)) From 529e15b40e93ba0c60c526acf16646ed2eb04d61 Mon Sep 17 00:00:00 2001 From: ascibisz Date: Wed, 21 Jan 2026 14:56:38 -0800 Subject: [PATCH 12/21] lint fixes --- cellpack/autopack/__init__.py | 2 ++ cellpack/bin/pack.py | 2 ++ 2 files changed, 4 insertions(+) diff --git a/cellpack/autopack/__init__.py b/cellpack/autopack/__init__.py index 4520d388..f04bcb43 100755 --- a/cellpack/autopack/__init__.py +++ b/cellpack/autopack/__init__.py @@ -409,11 +409,13 @@ def read_text_file(filename, destination="", cache="collisionTrees", force=None) f.close() return sphere_data + def load_json_recipe(json_recipe): composition = DBRecipeLoader.remove_empty(json_recipe.get("composition", {})) json_recipe["composition"] = composition return json_recipe + def load_file( filename, destination="", diff --git a/cellpack/bin/pack.py b/cellpack/bin/pack.py index 39003eda..80ef6784 100644 --- a/cellpack/bin/pack.py +++ b/cellpack/bin/pack.py @@ -54,6 +54,7 @@ def pack( validate ) + def pack_from_json( json_recipe, config_path=None, @@ -84,6 +85,7 @@ def pack_from_json( validate ) + def run_packing(recipe_loader, config_loader, analysis_config_path, docker, validate): recipe_data = recipe_loader.recipe_data packing_config_data = config_loader.config From 63514c90cd54610a6ec759423c062cc52fd2ab37 Mon Sep 17 00:00:00 2001 From: ascibisz Date: Wed, 21 Jan 2026 14:57:23 -0800 Subject: [PATCH 13/21] lint fix --- cellpack/bin/pack.py | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/cellpack/bin/pack.py b/cellpack/bin/pack.py index 80ef6784..b8446f33 100644 --- a/cellpack/bin/pack.py +++ b/cellpack/bin/pack.py @@ -47,11 +47,7 @@ def pack( recipe, config_loader.config["save_converted_recipe"], docker ) return run_packing( - recipe_loader, - config_loader, - analysis_config_path, - docker, - validate + recipe_loader, config_loader, analysis_config_path, docker, validate ) @@ -78,11 +74,7 @@ def pack_from_json( "", config_loader.config["save_converted_recipe"], docker, json_recipe ) return run_packing( - recipe_loader, - config_loader, - analysis_config_path, - docker, - validate + recipe_loader, config_loader, analysis_config_path, docker, validate ) From b2440cd3f905caf29704a68db5edb2571f8e871d Mon Sep 17 00:00:00 2001 From: ascibisz Date: Wed, 21 Jan 2026 15:00:38 -0800 Subject: [PATCH 14/21] minimize changeset --- cellpack/autopack/__init__.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/cellpack/autopack/__init__.py b/cellpack/autopack/__init__.py index f04bcb43..e6701b52 100755 --- a/cellpack/autopack/__init__.py +++ b/cellpack/autopack/__init__.py @@ -416,13 +416,7 @@ def load_json_recipe(json_recipe): return json_recipe -def load_file( - filename, - destination="", - cache="geometries", - force=None, - use_docker=False, -): +def load_file(filename, destination="", cache="geometries", force=None, use_docker=False): if is_remote_path(filename): database_name, file_path = convert_db_shortname_to_url(filename) if database_name == DATABASE_IDS.GITHUB: From 470e3a18ae7e4860eac53f0ff6637a920ad2b16b Mon Sep 17 00:00:00 2001 From: ascibisz Date: Wed, 21 Jan 2026 15:02:35 -0800 Subject: [PATCH 15/21] minimize changeset --- cellpack/autopack/__init__.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cellpack/autopack/__init__.py b/cellpack/autopack/__init__.py index e6701b52..dbc90c58 100755 --- a/cellpack/autopack/__init__.py +++ b/cellpack/autopack/__init__.py @@ -416,7 +416,9 @@ def load_json_recipe(json_recipe): return json_recipe -def load_file(filename, destination="", cache="geometries", force=None, use_docker=False): +def load_file( + filename, destination="", cache="geometries", force=None, use_docker=False +): if is_remote_path(filename): database_name, file_path = convert_db_shortname_to_url(filename) if database_name == DATABASE_IDS.GITHUB: From 8a3489895a71298652056ab27dd73c391aaa0198 Mon Sep 17 00:00:00 2001 From: ascibisz Date: Wed, 21 Jan 2026 15:56:18 -0800 Subject: [PATCH 16/21] simplify changeset --- cellpack/autopack/__init__.py | 6 --- cellpack/autopack/loaders/recipe_loader.py | 15 +++++-- cellpack/bin/pack.py | 47 ++++------------------ docker/server.py | 8 ++-- 4 files changed, 23 insertions(+), 53 deletions(-) diff --git a/cellpack/autopack/__init__.py b/cellpack/autopack/__init__.py index dbc90c58..2219525b 100755 --- a/cellpack/autopack/__init__.py +++ b/cellpack/autopack/__init__.py @@ -410,12 +410,6 @@ def read_text_file(filename, destination="", cache="collisionTrees", force=None) return sphere_data -def load_json_recipe(json_recipe): - composition = DBRecipeLoader.remove_empty(json_recipe.get("composition", {})) - json_recipe["composition"] = composition - return json_recipe - - def load_file( filename, destination="", cache="geometries", force=None, use_docker=False ): diff --git a/cellpack/autopack/loaders/recipe_loader.py b/cellpack/autopack/loaders/recipe_loader.py index df9e26ac..4b89c816 100644 --- a/cellpack/autopack/loaders/recipe_loader.py +++ b/cellpack/autopack/loaders/recipe_loader.py @@ -56,6 +56,15 @@ def __init__( self.recipe_data = self._read(use_docker=use_docker) + @classmethod + def from_json(cls, json_recipe, save_converted_recipe=False, use_docker=False): + return cls( + input_file_path="", + save_converted_recipe=save_converted_recipe, + use_docker=use_docker, + json_recipe=json_recipe, + ) + @staticmethod def _resolve_object(key, objects): current_object = objects[key] @@ -177,9 +186,9 @@ def _migrate_version(self, old_recipe): def _read(self, resolve_inheritance=True, use_docker=False): database_name = None is_unnested_firebase = False - if self.json_recipe is not None: - new_values = autopack.load_json_recipe(self.json_recipe) - else: + new_values = self.json_recipe + if new_values is None: + # Read recipe from filepath new_values, database_name, is_unnested_firebase = autopack.load_file( self.file_path, cache="recipes", diff --git a/cellpack/bin/pack.py b/cellpack/bin/pack.py index b8446f33..6c603cbb 100644 --- a/cellpack/bin/pack.py +++ b/cellpack/bin/pack.py @@ -33,7 +33,7 @@ def pack( ): """ Initializes an autopack packing from the command line - :param recipe: string argument, path to recipe + :param recipe: string argument, path to recipe file, or a dictionary representing a recipe :param config_path: string argument, path to packing config file :param analysis_config_path: string argument, path to analysis config file :param docker: boolean argument, are we using docker @@ -41,45 +41,14 @@ def pack( :return: void """ - config_loader = ConfigLoader(config_path, docker) - - recipe_loader = RecipeLoader( - recipe, config_loader.config["save_converted_recipe"], docker - ) - return run_packing( - recipe_loader, config_loader, analysis_config_path, docker, validate - ) - - -def pack_from_json( - json_recipe, - config_path=None, - analysis_config_path=None, - docker=False, - validate=True, -): - """ - Initializes an autopack packing from the command line - :param json: JSON object representing the recipe - :param config_path: string argument, path to packing config file - :param analysis_config_path: string argument, path to analysis config file - :param docker: boolean argument, are we using docker - :param validate: boolean argument, validate recipe before packing - - :return: void - """ - config_loader = ConfigLoader(config_path, docker) - - recipe_loader = RecipeLoader( - "", config_loader.config["save_converted_recipe"], docker, json_recipe - ) - return run_packing( - recipe_loader, config_loader, analysis_config_path, docker, validate - ) - - -def run_packing(recipe_loader, config_loader, analysis_config_path, docker, validate): + if isinstance(recipe, dict): + # Load recipe from JSON dictionary + recipe_loader = RecipeLoader.from_json(recipe, use_docker=docker) + else: + # Load recipe from file path + recipe_loader = RecipeLoader(recipe, use_docker=docker) recipe_data = recipe_loader.recipe_data + config_loader = ConfigLoader(config_path, docker) packing_config_data = config_loader.config analysis_config_data = {} if analysis_config_path is not None: diff --git a/docker/server.py b/docker/server.py index 6bbf338c..257e66ec 100644 --- a/docker/server.py +++ b/docker/server.py @@ -4,7 +4,7 @@ import uuid from cellpack.autopack.DBRecipeHandler import DBUploader from cellpack.autopack.interface_objects.database_ids import DATABASE_IDS -from cellpack.bin.pack import pack, pack_from_json +from cellpack.bin.pack import pack SERVER_PORT = 80 @@ -16,10 +16,8 @@ async def run_packing(self, recipe, config, job_id, body=None): os.environ["AWS_BATCH_JOB_ID"] = job_id self.update_job_status(job_id, "RUNNING") try: - if body is None: - pack(recipe=recipe, config_path=config, docker=True) - else: - pack_from_json(json_recipe=body, config_path=config, docker=True) + # Pack JSON recipe in body if provided, otherwise use recipe path + pack(recipe=(body if body else recipe), config_path=config, docker=True) except Exception as e: self.update_job_status(job_id, "FAILED", error_message=str(e)) From 45d438ae8788b6d999ae546fe4556742f30cc05f Mon Sep 17 00:00:00 2001 From: ascibisz Date: Wed, 21 Jan 2026 16:03:12 -0800 Subject: [PATCH 17/21] code cleanup --- cellpack/autopack/loaders/recipe_loader.py | 4 +--- cellpack/bin/pack.py | 10 +++------- docker/server.py | 4 ++-- 3 files changed, 6 insertions(+), 12 deletions(-) diff --git a/cellpack/autopack/loaders/recipe_loader.py b/cellpack/autopack/loaders/recipe_loader.py index 4b89c816..e40b469e 100644 --- a/cellpack/autopack/loaders/recipe_loader.py +++ b/cellpack/autopack/loaders/recipe_loader.py @@ -190,9 +190,7 @@ def _read(self, resolve_inheritance=True, use_docker=False): if new_values is None: # Read recipe from filepath new_values, database_name, is_unnested_firebase = autopack.load_file( - self.file_path, - cache="recipes", - use_docker=use_docker, + self.file_path, cache="recipes", use_docker=use_docker, ) if database_name == "firebase": diff --git a/cellpack/bin/pack.py b/cellpack/bin/pack.py index 6c603cbb..23a69a57 100644 --- a/cellpack/bin/pack.py +++ b/cellpack/bin/pack.py @@ -25,11 +25,7 @@ def pack( - recipe, - config_path=None, - analysis_config_path=None, - docker=False, - validate=True, + recipe, config_path=None, analysis_config_path=None, docker=False, validate=True ): """ Initializes an autopack packing from the command line @@ -41,6 +37,8 @@ def pack( :return: void """ + packing_config_data = ConfigLoader(config_path, docker).config + if isinstance(recipe, dict): # Load recipe from JSON dictionary recipe_loader = RecipeLoader.from_json(recipe, use_docker=docker) @@ -48,8 +46,6 @@ def pack( # Load recipe from file path recipe_loader = RecipeLoader(recipe, use_docker=docker) recipe_data = recipe_loader.recipe_data - config_loader = ConfigLoader(config_path, docker) - packing_config_data = config_loader.config analysis_config_data = {} if analysis_config_path is not None: analysis_config_data = AnalysisConfigLoader(analysis_config_path).config diff --git a/docker/server.py b/docker/server.py index 257e66ec..9b1ce105 100644 --- a/docker/server.py +++ b/docker/server.py @@ -12,7 +12,7 @@ class CellpackServer: def __init__(self): self.packing_tasks = set() - async def run_packing(self, recipe, config, job_id, body=None): + async def run_packing(self, job_id, recipe=None, config=None, body=None): os.environ["AWS_BATCH_JOB_ID"] = job_id self.update_job_status(job_id, "RUNNING") try: @@ -51,7 +51,7 @@ async def pack_handler(self, request: web.Request) -> web.Response: job_id = str(uuid.uuid4()) # Initiate packing task to run in background - packing_task = asyncio.create_task(self.run_packing(recipe, config, job_id, body)) + packing_task = asyncio.create_task(self.run_packing(job_id, recipe, config, body)) # Keep track of task references to prevent them from being garbage # collected, then discard after task completion From c8fe120ef4b9ecd392a8f8030307d07fee176f1f Mon Sep 17 00:00:00 2001 From: ascibisz Date: Wed, 21 Jan 2026 16:05:28 -0800 Subject: [PATCH 18/21] minimize changeset --- cellpack/bin/pack.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cellpack/bin/pack.py b/cellpack/bin/pack.py index 23a69a57..83b22264 100644 --- a/cellpack/bin/pack.py +++ b/cellpack/bin/pack.py @@ -44,7 +44,9 @@ def pack( recipe_loader = RecipeLoader.from_json(recipe, use_docker=docker) else: # Load recipe from file path - recipe_loader = RecipeLoader(recipe, use_docker=docker) + recipe_loader = RecipeLoader( + recipe, packing_config_data["save_converted_recipe"], docker + ) recipe_data = recipe_loader.recipe_data analysis_config_data = {} if analysis_config_path is not None: From ecc645d4ca013c56e0ca6e8f82fbb6c4540f5c37 Mon Sep 17 00:00:00 2001 From: ascibisz Date: Wed, 21 Jan 2026 16:07:30 -0800 Subject: [PATCH 19/21] remove trailing comma --- cellpack/autopack/loaders/recipe_loader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cellpack/autopack/loaders/recipe_loader.py b/cellpack/autopack/loaders/recipe_loader.py index e40b469e..bbdb662a 100644 --- a/cellpack/autopack/loaders/recipe_loader.py +++ b/cellpack/autopack/loaders/recipe_loader.py @@ -190,7 +190,7 @@ def _read(self, resolve_inheritance=True, use_docker=False): if new_values is None: # Read recipe from filepath new_values, database_name, is_unnested_firebase = autopack.load_file( - self.file_path, cache="recipes", use_docker=use_docker, + self.file_path, cache="recipes", use_docker=use_docker ) if database_name == "firebase": From 17ba17c284349e249c1c7892d6b6405880cbd8fe Mon Sep 17 00:00:00 2001 From: Ruge Li <91452427+rugeli@users.noreply.github.com> Date: Thu, 29 Jan 2026 13:11:58 -0800 Subject: [PATCH 20/21] Feature/firebase lookup (#445) * remove os fetch for job_id * use dedup_hash instead of job id * proposal: get hash from recipe loader * renaming and add TODOs * format * rename param to hash * remove unused validate param and doc strings in pack * simplify get_ dedup_hash * refactor job_status update * cleanup * fix upload_job_status to handle awshandler * pass dedup_pash to env for fetching across files * add tests * format1 * format test --- cellpack/autopack/DBRecipeHandler.py | 87 +++++++++---------- .../upy/simularium/simularium_helper.py | 12 ++- cellpack/autopack/writers/__init__.py | 5 +- cellpack/bin/pack.py | 44 +++++----- cellpack/tests/test_db_uploader.py | 53 +++++++++++ docker/server.py | 55 +++++++----- 6 files changed, 160 insertions(+), 96 deletions(-) diff --git a/cellpack/autopack/DBRecipeHandler.py b/cellpack/autopack/DBRecipeHandler.py index 8b4e8578..00fc8bb7 100644 --- a/cellpack/autopack/DBRecipeHandler.py +++ b/cellpack/autopack/DBRecipeHandler.py @@ -529,7 +529,7 @@ def upload_config(self, config_data, source_path): self.db.update_doc("configs", id, config_data) return id - def upload_result_metadata(self, file_name, url, job_id=None): + def upload_result_metadata(self, file_name, url, dedup_hash=None): """ Upload the metadata of the result file to the database. """ @@ -543,28 +543,40 @@ def upload_result_metadata(self, file_name, url, job_id=None): "user": username, "timestamp": timestamp, "url": url, - "batch_job_id": job_id, + "dedup_hash": dedup_hash, }, ) - if job_id: - self.upload_job_status(job_id, "DONE", result_path=url) + if dedup_hash: + self.upload_job_status(dedup_hash, "DONE", result_path=url) - def upload_job_status(self, job_id, status, result_path=None, error_message=None): + def upload_job_status( + self, + dedup_hash, + status, + result_path=None, + error_message=None, + outputs_directory=None, + ): """ - Update status for a given job ID + Update status for a given dedup_hash """ if self.db: - timestamp = self.db.create_timestamp() - self.db.update_or_create( - "job_status", - job_id, - { - "timestamp": timestamp, - "status": str(status), - "result_path": result_path, - "error_message": error_message, - }, - ) + db_handler = self.db + # If db is AWSHandler, switch to firebase handler for job status updates + if hasattr(self.db, "s3_client"): + handler = DATABASE_IDS.handlers().get(DATABASE_IDS.FIREBASE) + db_handler = handler(default_db="staging") + timestamp = db_handler.create_timestamp() + data = { + "timestamp": timestamp, + "status": str(status), + "error_message": error_message, + } + if result_path: + data["result_path"] = result_path + if outputs_directory: + data["outputs_directory"] = outputs_directory + db_handler.update_or_create("job_status", dedup_hash, data) def save_recipe_and_config_to_output(self, output_folder, config_data, recipe_data): output_path = Path(output_folder) @@ -583,7 +595,7 @@ def upload_packing_results_workflow( self, source_folder, recipe_name, - job_id, + dedup_hash, config_data, recipe_data, ): @@ -591,7 +603,7 @@ def upload_packing_results_workflow( Complete packing results upload workflow including folder preparation and s3 upload """ try: - if job_id: + if dedup_hash: source_path = Path(source_folder) if not source_path.exists(): @@ -601,7 +613,7 @@ def upload_packing_results_workflow( # prepare unique S3 upload folder parent_folder = source_path.parent - unique_folder_name = f"{source_path.name}_run_{job_id}" + unique_folder_name = f"{source_path.name}_run_{dedup_hash}" s3_upload_folder = parent_folder / unique_folder_name logging.debug(f"outputs will be copied to: {s3_upload_folder}") @@ -618,7 +630,7 @@ def upload_packing_results_workflow( upload_result = self.upload_outputs_to_s3( output_folder=s3_upload_folder, recipe_name=recipe_name, - job_id=job_id, + dedup_hash=dedup_hash, ) # clean up temporary folder after upload @@ -628,9 +640,11 @@ def upload_packing_results_workflow( f"Cleaned up temporary upload folder: {s3_upload_folder}" ) - # update outputs directory in firebase - self.update_outputs_directory( - job_id, upload_result.get("outputs_directory") + # update outputs directory in job status + self.upload_job_status( + dedup_hash, + "DONE", + outputs_directory=upload_result.get("outputs_directory"), ) return upload_result @@ -639,7 +653,7 @@ def upload_packing_results_workflow( logging.error(e) return {"success": False, "error": e} - def upload_outputs_to_s3(self, output_folder, recipe_name, job_id): + def upload_outputs_to_s3(self, output_folder, recipe_name, dedup_hash): """ Upload packing outputs to S3 bucket """ @@ -647,7 +661,7 @@ def upload_outputs_to_s3(self, output_folder, recipe_name, job_id): bucket_name = self.db.bucket_name region_name = self.db.region_name sub_folder_name = self.db.sub_folder_name - s3_prefix = f"{sub_folder_name}/{recipe_name}/{job_id}" + s3_prefix = f"{sub_folder_name}/{recipe_name}/{dedup_hash}" try: upload_result = self.db.upload_directory( @@ -671,7 +685,7 @@ def upload_outputs_to_s3(self, output_folder, recipe_name, job_id): return { "success": True, - "run_id": job_id, + "dedup_hash": dedup_hash, "s3_bucket": bucket_name, "s3_prefix": s3_prefix, "public_url_base": f"{base_url}/{s3_prefix}/", @@ -685,25 +699,6 @@ def upload_outputs_to_s3(self, output_folder, recipe_name, job_id): logging.error(e) return {"success": False, "error": e} - def update_outputs_directory(self, job_id, outputs_directory): - if not self.db or self.db.s3_client: - # switch to firebase handler to update job status - handler = DATABASE_IDS.handlers().get("firebase") - initialized_db = handler(default_db="staging") - if job_id: - timestamp = initialized_db.create_timestamp() - initialized_db.update_or_create( - "job_status", - job_id, - { - "timestamp": timestamp, - "outputs_directory": outputs_directory, - }, - ) - logging.debug( - f"Updated outputs s3 location {outputs_directory} for job ID: {job_id}" - ) - class DBRecipeLoader(object): """ diff --git a/cellpack/autopack/upy/simularium/simularium_helper.py b/cellpack/autopack/upy/simularium/simularium_helper.py index 86af0874..4f934e0e 100644 --- a/cellpack/autopack/upy/simularium/simularium_helper.py +++ b/cellpack/autopack/upy/simularium/simularium_helper.py @@ -1385,16 +1385,14 @@ def raycast(self, **kw): def raycast_test(self, obj, start, end, length, **kw): return - def post_and_open_file(self, file_name, open_results_in_browser): + def post_and_open_file(self, file_name, open_results_in_browser, dedup_hash=None): simularium_file = Path(f"{file_name}.simularium") - url = None - job_id = os.environ.get("AWS_BATCH_JOB_ID", None) file_name, url = simulariumHelper.store_result_file( - simularium_file, storage="aws", batch_job_id=job_id + simularium_file, storage="aws", batch_job_id=dedup_hash ) if file_name and url: simulariumHelper.store_metadata( - file_name, url, db="firebase", job_id=job_id + file_name, url, db="firebase", dedup_hash=dedup_hash ) if open_results_in_browser: simulariumHelper.open_in_simularium(url) @@ -1428,7 +1426,7 @@ def store_result_file( return file_name, url @staticmethod - def store_metadata(file_name, url, db=None, job_id=None): + def store_metadata(file_name, url, db=None, dedup_hash=None): if db == "firebase": handler = DATABASE_IDS.handlers().get(db) initialized_db = handler( @@ -1436,7 +1434,7 @@ def store_metadata(file_name, url, db=None, job_id=None): ) # default to staging for metadata uploads if initialized_db._initialized: db_uploader = DBUploader(initialized_db) - db_uploader.upload_result_metadata(file_name, url, job_id) + db_uploader.upload_result_metadata(file_name, url, dedup_hash) else: db_maintainer = DBMaintenance(initialized_db) logging.warning( diff --git a/cellpack/autopack/writers/__init__.py b/cellpack/autopack/writers/__init__.py index 6ca931af..0b09e03a 100644 --- a/cellpack/autopack/writers/__init__.py +++ b/cellpack/autopack/writers/__init__.py @@ -197,8 +197,11 @@ def save_as_simularium(self, env, seed_to_results_map): number_of_packings = env.config_data.get("number_of_packings", 1) open_results_in_browser = env.config_data.get("open_results_in_browser", False) upload_results = env.config_data.get("upload_results", False) + dedup_hash = getattr(env, "dedup_hash", None) if (number_of_packings == 1 or is_aggregate) and upload_results: - autopack.helper.post_and_open_file(file_name, open_results_in_browser) + autopack.helper.post_and_open_file( + file_name, open_results_in_browser, dedup_hash + ) def save_Mixed_asJson( self, diff --git a/cellpack/bin/pack.py b/cellpack/bin/pack.py index 83b22264..27c4d018 100644 --- a/cellpack/bin/pack.py +++ b/cellpack/bin/pack.py @@ -1,6 +1,5 @@ import logging import logging.config -import os import time from pathlib import Path @@ -25,7 +24,11 @@ def pack( - recipe, config_path=None, analysis_config_path=None, docker=False, validate=True + recipe, + config_path=None, + analysis_config_path=None, + docker=False, + hash=None, ): """ Initializes an autopack packing from the command line @@ -33,7 +36,7 @@ def pack( :param config_path: string argument, path to packing config file :param analysis_config_path: string argument, path to analysis config file :param docker: boolean argument, are we using docker - :param validate: boolean argument, validate recipe before packing + :param hash: string argument, dedup hash identifier for tracking/caching results :return: void """ @@ -57,6 +60,7 @@ def pack( autopack.helper = helper env = Environment(config=packing_config_data, recipe=recipe_data) env.helper = helper + env.dedup_hash = hash log.info("Packing recipe: %s", recipe_data["name"]) log.info("Outputs will be saved to %s", env.out_folder) @@ -83,24 +87,22 @@ def pack( env.buildGrid(rebuild=True) env.pack_grid(verbose=0, usePP=False) - if docker: - job_id = os.environ.get("AWS_BATCH_JOB_ID", None) - if job_id: - handler = DATABASE_IDS.handlers().get(DATABASE_IDS.AWS) - # temporarily using demo bucket before permissions are granted - initialized_handler = handler( - bucket_name="cellpack-demo", - sub_folder_name="runs", - region_name="us-west-2", - ) - uploader = DBUploader(db_handler=initialized_handler) - uploader.upload_packing_results_workflow( - source_folder=env.out_folder, - recipe_name=recipe_data["name"], - job_id=job_id, - config_data=packing_config_data, - recipe_data=recipe_loader.serializable_recipe_data, - ) + if docker and hash: + handler = DATABASE_IDS.handlers().get(DATABASE_IDS.AWS) + # temporarily using demo bucket before permissions are granted + initialized_handler = handler( + bucket_name="cellpack-demo", + sub_folder_name="runs", + region_name="us-west-2", + ) + uploader = DBUploader(db_handler=initialized_handler) + uploader.upload_packing_results_workflow( + source_folder=env.out_folder, + recipe_name=recipe_data["name"], + dedup_hash=hash, + config_data=packing_config_data, + recipe_data=recipe_loader.serializable_recipe_data, + ) def main(): diff --git a/cellpack/tests/test_db_uploader.py b/cellpack/tests/test_db_uploader.py index 0c91cbd5..414f6b9c 100644 --- a/cellpack/tests/test_db_uploader.py +++ b/cellpack/tests/test_db_uploader.py @@ -175,3 +175,56 @@ def test_upload_recipe(): "A": "firebase:composition/test_id", } assert recipe_doc.objects_to_path_map == {"sphere_25": "firebase:objects/test_id"} + + +def test_upload_job_status_with_firebase_handler(): + mock_firebase_db = MagicMock() + mock_firebase_db.create_timestamp.return_value = "test_timestamp" + # firebaseHandler does not have s3_client attribute + del mock_firebase_db.s3_client + + uploader = DBUploader(mock_firebase_db) + uploader.upload_job_status("test_hash", "RUNNING") + + mock_firebase_db.create_timestamp.assert_called_once() + mock_firebase_db.update_or_create.assert_called_once_with( + "job_status", + "test_hash", + { + "timestamp": "test_timestamp", + "status": "RUNNING", + "error_message": None, + }, + ) + + +def test_upload_job_status_with_aws_handler(): + mock_aws_db = MagicMock() + mock_aws_db.s3_client = MagicMock() # AWSHandler has s3_client + + mock_firebase_handler = MagicMock() + mock_firebase_handler.create_timestamp.return_value = "firebase_timestamp" + + with patch( + "cellpack.autopack.DBRecipeHandler.DATABASE_IDS.handlers" + ) as mock_handlers: + mock_handlers.return_value.get.return_value = ( + lambda default_db: mock_firebase_handler + ) + + uploader = DBUploader(mock_aws_db) + uploader.upload_job_status("test_hash", "DONE", result_path="test_path") + + mock_firebase_handler.create_timestamp.assert_called_once() + mock_firebase_handler.update_or_create.assert_called_once_with( + "job_status", + "test_hash", + { + "timestamp": "firebase_timestamp", + "status": "DONE", + "error_message": None, + "result_path": "test_path", + }, + ) + # AWS handler should not be called for timestamp + mock_aws_db.create_timestamp.assert_not_called() diff --git a/docker/server.py b/docker/server.py index 9b1ce105..74bb20f3 100644 --- a/docker/server.py +++ b/docker/server.py @@ -1,8 +1,6 @@ import asyncio from aiohttp import web -import os -import uuid -from cellpack.autopack.DBRecipeHandler import DBUploader +from cellpack.autopack.DBRecipeHandler import DataDoc, DBUploader from cellpack.autopack.interface_objects.database_ids import DATABASE_IDS from cellpack.bin.pack import pack @@ -12,29 +10,40 @@ class CellpackServer: def __init__(self): self.packing_tasks = set() - async def run_packing(self, job_id, recipe=None, config=None, body=None): - os.environ["AWS_BATCH_JOB_ID"] = job_id - self.update_job_status(job_id, "RUNNING") + def _get_firebase_handler(self, database_name="firebase"): + handler = DATABASE_IDS.handlers().get(database_name) + initialized_db = handler(default_db="staging") + if initialized_db._initialized: + return initialized_db + return None + + def job_exists(self, dedup_hash): + db = self._get_firebase_handler() + if not db: + return False + + job_status, _ = db.get_doc_by_id("job_status", dedup_hash) + return job_status is not None + + async def run_packing(self, dedup_hash, recipe=None, config=None, body=None): + self.update_job_status(dedup_hash, "RUNNING") try: # Pack JSON recipe in body if provided, otherwise use recipe path - pack(recipe=(body if body else recipe), config_path=config, docker=True) + pack(recipe=(body if body else recipe), config_path=config, docker=True, hash=dedup_hash) except Exception as e: - self.update_job_status(job_id, "FAILED", error_message=str(e)) + self.update_job_status(dedup_hash, "FAILED", error_message=str(e)) - def update_job_status(self, job_id, status, result_path=None, error_message=None): - handler = DATABASE_IDS.handlers().get("firebase") - initialized_db = handler( - default_db="staging" - ) - if initialized_db._initialized: - db_uploader = DBUploader(initialized_db) - db_uploader.upload_job_status(job_id, status, result_path, error_message) + def update_job_status(self, dedup_hash, status, result_path=None, error_message=None): + db = self._get_firebase_handler() + if db: + db_uploader = DBUploader(db) + db_uploader.upload_job_status(dedup_hash, status, result_path, error_message) async def hello_world(self, request: web.Request) -> web.Response: return web.Response(text="Hello from the cellPACK server") async def health_check(self, request: web.Request) -> web.Response: - # healthcheck endpoint needed for AWS load balancer + # health check endpoint needed for AWS load balancer return web.Response() async def pack_handler(self, request: web.Request) -> web.Response: @@ -48,10 +57,14 @@ async def pack_handler(self, request: web.Request) -> web.Response: "Pack requests must include recipe as a query param" ) config = request.rel_url.query.get("config") - job_id = str(uuid.uuid4()) + + dedup_hash = DataDoc.generate_hash(body) + + if self.job_exists(dedup_hash): + return web.json_response({"jobId": dedup_hash}) # Initiate packing task to run in background - packing_task = asyncio.create_task(self.run_packing(job_id, recipe, config, body)) + packing_task = asyncio.create_task(self.run_packing(dedup_hash, recipe, config, body)) # Keep track of task references to prevent them from being garbage # collected, then discard after task completion @@ -60,7 +73,7 @@ async def pack_handler(self, request: web.Request) -> web.Response: # return job id immediately, rather than wait for task to complete, # to avoid timeout issues with API gateway - return web.json_response({"jobId": job_id}) + return web.json_response({"jobId": dedup_hash}) async def init_app() -> web.Application: @@ -75,4 +88,4 @@ async def init_app() -> web.Application: ) return app -web.run_app(init_app(), host="0.0.0.0", port=SERVER_PORT) \ No newline at end of file +web.run_app(init_app(), host="0.0.0.0", port=SERVER_PORT) From 79e77e83c3749d3d8f51640a3497dac2e39e634b Mon Sep 17 00:00:00 2001 From: Alli <111383930+ascibisz@users.noreply.github.com> Date: Wed, 4 Feb 2026 08:47:47 -0800 Subject: [PATCH 21/21] Only upload simularium file once (#446) * proposal: get hash from recipe loader * simplify get_ dedup_hash * only post simularium results file once for server job runs * update code for rebase * code cleanup --------- Co-authored-by: Ruge Li --- cellpack/autopack/DBRecipeHandler.py | 12 +++--- .../upy/simularium/simularium_helper.py | 42 ++++++++----------- 2 files changed, 24 insertions(+), 30 deletions(-) diff --git a/cellpack/autopack/DBRecipeHandler.py b/cellpack/autopack/DBRecipeHandler.py index 00fc8bb7..3eb691d5 100644 --- a/cellpack/autopack/DBRecipeHandler.py +++ b/cellpack/autopack/DBRecipeHandler.py @@ -529,7 +529,7 @@ def upload_config(self, config_data, source_path): self.db.update_doc("configs", id, config_data) return id - def upload_result_metadata(self, file_name, url, dedup_hash=None): + def upload_result_metadata(self, file_name, url): """ Upload the metadata of the result file to the database. """ @@ -543,11 +543,8 @@ def upload_result_metadata(self, file_name, url, dedup_hash=None): "user": username, "timestamp": timestamp, "url": url, - "dedup_hash": dedup_hash, }, ) - if dedup_hash: - self.upload_job_status(dedup_hash, "DONE", result_path=url) def upload_job_status( self, @@ -644,6 +641,7 @@ def upload_packing_results_workflow( self.upload_job_status( dedup_hash, "DONE", + result_path=upload_result.get("simularium_url"), outputs_directory=upload_result.get("outputs_directory"), ) @@ -675,8 +673,11 @@ def upload_outputs_to_s3(self, output_folder, recipe_name, dedup_hash): f"{base_url}/{file_info['s3_key']}" for file_info in upload_result["uploaded_files"] ] + simularium_url = None + for url in public_urls: + if url.endswith(".simularium"): + simularium_url = url outputs_directory = f"https://us-west-2.console.aws.amazon.com/s3/buckets/{bucket_name}/{s3_prefix}/" - logging.info( f"Successfully uploaded {upload_result['total_files']} files to {outputs_directory}" ) @@ -694,6 +695,7 @@ def upload_outputs_to_s3(self, output_folder, recipe_name, dedup_hash): "total_size": upload_result["total_size"], "urls": public_urls, "outputs_directory": outputs_directory, + "simularium_url": simularium_url, } except Exception as e: logging.error(e) diff --git a/cellpack/autopack/upy/simularium/simularium_helper.py b/cellpack/autopack/upy/simularium/simularium_helper.py index 4f934e0e..08179d85 100644 --- a/cellpack/autopack/upy/simularium/simularium_helper.py +++ b/cellpack/autopack/upy/simularium/simularium_helper.py @@ -1387,36 +1387,28 @@ def raycast_test(self, obj, start, end, length, **kw): def post_and_open_file(self, file_name, open_results_in_browser, dedup_hash=None): simularium_file = Path(f"{file_name}.simularium") - file_name, url = simulariumHelper.store_result_file( - simularium_file, storage="aws", batch_job_id=dedup_hash - ) - if file_name and url: - simulariumHelper.store_metadata( - file_name, url, db="firebase", dedup_hash=dedup_hash + if dedup_hash is None: + file_name, url = simulariumHelper.store_result_file( + simularium_file, storage="aws" ) - if open_results_in_browser: - simulariumHelper.open_in_simularium(url) + if file_name and url: + simulariumHelper.store_metadata( + file_name, url, db="firebase" + ) + if open_results_in_browser: + simulariumHelper.open_in_simularium(url) @staticmethod def store_result_file( - file_path, storage=None, batch_job_id=None, sub_folder="simularium" + file_path, storage=None, sub_folder="simularium" ): if storage == "aws": handler = DATABASE_IDS.handlers().get(storage) - # if batch_job_id is not None, then we are in a batch job and should use the temp bucket - # TODO: use cellpack-results bucket for batch jobs once we have the correct permissions - if batch_job_id: - initialized_handler = handler( - bucket_name="cellpack-demo", - sub_folder_name=sub_folder, - region_name="us-west-2", - ) - else: - initialized_handler = handler( - bucket_name="cellpack-results", - sub_folder_name=sub_folder, - region_name="us-west-2", - ) + initialized_handler = handler( + bucket_name="cellpack-results", + sub_folder_name=sub_folder, + region_name="us-west-2", + ) file_name, url = initialized_handler.save_file_and_get_url(file_path) if not file_name or not url: db_maintainer = DBMaintenance(initialized_handler) @@ -1426,7 +1418,7 @@ def store_result_file( return file_name, url @staticmethod - def store_metadata(file_name, url, db=None, dedup_hash=None): + def store_metadata(file_name, url, db=None): if db == "firebase": handler = DATABASE_IDS.handlers().get(db) initialized_db = handler( @@ -1434,7 +1426,7 @@ def store_metadata(file_name, url, db=None, dedup_hash=None): ) # default to staging for metadata uploads if initialized_db._initialized: db_uploader = DBUploader(initialized_db) - db_uploader.upload_result_metadata(file_name, url, dedup_hash) + db_uploader.upload_result_metadata(file_name, url) else: db_maintainer = DBMaintenance(initialized_db) logging.warning(