From 0a09cbc2fe51b7e4770e4a47adae12022a32d156 Mon Sep 17 00:00:00 2001 From: marcorudolphflex Date: Mon, 29 Sep 2025 17:53:50 +0200 Subject: [PATCH 01/22] rebase --- CHANGELOG.md | 1 + docs/index.rst | 8 +- playground/local_cache.py | 88 +++ tests/test_web/test_simulation_cache.py | 337 +++++++++++ tidy3d/config.py | 36 ++ tidy3d/web/api/asynchronous.py | 6 + tidy3d/web/api/autograd/autograd.py | 9 + tidy3d/web/api/container.py | 87 ++- tidy3d/web/api/webapi.py | 106 +++- tidy3d/web/cache.py | 754 ++++++++++++++++++++++++ tidy3d/web/core/http_util.py | 2 +- 11 files changed, 1395 insertions(+), 39 deletions(-) create mode 100644 playground/local_cache.py create mode 100644 tests/test_web/test_simulation_cache.py create mode 100644 tidy3d/web/cache.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 0b86cb8520..0593f63d03 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added - Added support for `tidy3d-extras`, an optional plugin that enables more accurate local mode solving via subpixel averaging. +- Added configurable local simulation result caching with checksum validation, eviction limits, and per-call overrides across `web.run`, `web.load`, and job workflows. ### Changed - Improved performance of antenna metrics calculation by utilizing cached wave amplitude calculations instead of recomputing wave amplitudes for each port excitation in the `TerminalComponentModelerData`. diff --git a/docs/index.rst b/docs/index.rst index 5cada50d0e..d53ffef1af 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -168,6 +168,13 @@ This will produce the following plot, which visualizes the electromagnetic field You can now postprocess simulation data using the same python session, or view the results of this simulation on our web-based `graphical user interface (GUI) `_. +.. tip:: + + Repeated runs of the same simulation can reuse solver results by enabling the optional + local cache: ``td.config.simulation_cache.enabled = True``. The cache location and limits are + configurable (see ``~/.tidy3d/config``), entries are checksum-validated, and you can clear + all stored artifacts with ``tidy3d.web.cache.clear()``. + .. `TODO: open example in colab `_ @@ -262,4 +269,3 @@ Contents - diff --git a/playground/local_cache.py b/playground/local_cache.py new file mode 100644 index 0000000000..5b8d71ce6e --- /dev/null +++ b/playground/local_cache.py @@ -0,0 +1,88 @@ +from __future__ import annotations + +from pathlib import Path + +from playground import make_sim_playground +from tidy3d.web.api import webapi as web +from tidy3d.web.cache import ( + SimulationCacheConfig, + configure_cache, + get_cache, +) + + +# ----------------------------------------------------------- +# Helpers +# ----------------------------------------------------------- +def reset_cache(): + # cache_dir = tmp_dir / "cache" + # if cache_dir.exists(): + # shutil.rmtree(cache_dir) + cfg = SimulationCacheConfig( + enabled=True, + # directory=cache_dir, + max_size_gb=1.0, + max_entries=10, + ) + configure_cache(cfg) + get_cache.cache_clear() # <--- important! + get_cache().clear() + return + + +def disp_cache_entries(): + cache = get_cache() + print(f"Cache has {len(cache.list())}") # ,entries:\n{pprint.pformat(cache.list())}") + + +# ----------------------------------------------------------- +# Manual test driver +# ----------------------------------------------------------- +if __name__ == "__main__": + tmp_dir = Path("debug_cache_tmp").resolve() + # disp_cache_entries() + # reset_cache() + disp_cache_entries() + + sim = make_sim_playground() + out_path = tmp_dir / "result.hdf5" + out_path2 = tmp_dir / "result2.hdf5" + + # print("\n=== First run (should upload/download, then cache) ===") + # data1 = web.run(sim, task_name="demo", path=str(out_path), use_cache=True) + # print(f"Got data type: {type(data1)}") + # disp_cache_entries() + # exit() + print("\n=== Second run (should hit cache, skip pipeline) ===") + data2 = web.run(sim, task_name="demo", path=str(out_path), use_cache=True) + print(f"Got data type: {type(data2)}") + disp_cache_entries() + # exit() + print("\n=== Load by task id (should also hit cache) ===") + # task_id = "fdve-dc37255d-77a5-4101-8964-97954fa3bde3" # your monkeypatch or real task id + task_id = "fdve-f56d0ebf-ec88-439e-bcbf-1316ab0ed43d" # your monkeypatch or real task id + data3 = web.load(task_id, path=str(out_path), use_cache=True) + print(f"Got data type: {type(data3)}") + disp_cache_entries() + + print("\n=== Manually corrupting artifact to trigger re-download ===") + entries = get_cache().list() + # if entries: + # key = entries[0]["cache_key"] + # + # # get the actual cache directory from config and expand '~' + # cache_dir = get_cache_config().directory.expanduser() + # + # artifact_path = cache_dir / key / "artifact.hdf5" + # artifact_path.parent.mkdir(parents=True, exist_ok=True) + # + # # overwrite with junk bytes (binary-safe) + # artifact_path.write_bytes(b"corrupted") + # print( + # f"[debug] Corrupted artifact at {artifact_path} (exists={artifact_path.exists()}, size={artifact_path.stat().st_size} bytes)") + # + # data4 = web.load(task_id, path=str(out_path), use_cache=True) + # print(f"After corruption -> Got data type: {type(data4)}") + # disp_cache_entries() + # + # print("\nDone. Inspect log output above to debug pipeline vs cache paths.") diff --git a/tests/test_web/test_simulation_cache.py b/tests/test_web/test_simulation_cache.py new file mode 100644 index 0000000000..6a7e8e0f2b --- /dev/null +++ b/tests/test_web/test_simulation_cache.py @@ -0,0 +1,337 @@ +from __future__ import annotations + +import uuid +from pathlib import Path + +import tidy3d as td +from tidy3d.web.api import webapi as web +from tidy3d.web.cache import ( + CACHE_ARTIFACT_NAME, + SimulationCache, + SimulationCacheConfig, + get_cache, +) +import pytest + + +import os +import toml +import tempfile +from pathlib import Path + +import pytest + +from tidy3d.web.cache import ( + SimulationCacheConfig, + configure_cache, + get_cache_config, + _apply_updates, + _load_env_overrides, + _load_cli_cache_settings, +) +from tidy3d.web import run_async + + +MOCK_TASK_ID = "task-xyz" + + + +class _FakeStubData: + def __init__(self, simulation: td.Simulation): + self.simulation = simulation + +@pytest.fixture(autouse=True) +def isolated_cache(tmp_path_factory): + """Force every test into its own unique simulation cache directory.""" + # make per-test unique dir with uuid + cache_dir = tmp_path_factory.mktemp(f"tidy3d_cache_{uuid.uuid4().hex}") + + # save original config (singleton) + original = get_cache()._config if get_cache() else None + + # point global cache at fresh unique directory + cfg = SimulationCacheConfig( + enabled=True, + directory=cache_dir, + max_size_gb=1.0, + max_entries=10, + ) + configure_cache(cfg) + get_cache().clear() + + yield cache_dir + + # restore previous config or clear + if original is not None: + configure_cache(original) + get_cache().clear() + +@pytest.fixture +def basic_simulation(): + pulse = td.GaussianPulse(freq0=200e12, fwidth=20e12) + pt_dipole = td.PointDipole(source_time=pulse, polarization="Ex") + return td.Simulation( + size=(1, 1, 1), + grid_spec=td.GridSpec.auto(wavelength=1.0), + run_time=1e-12, + sources=[pt_dipole], + ) + + +@pytest.fixture(autouse=True) +def fake_data(monkeypatch, basic_simulation): + """Patch postprocess to return predictable stub data and track invocations.""" + calls = {"postprocess": 0} + + def _fake_postprocess(path: str): + calls["postprocess"] += 1 + return _FakeStubData(basic_simulation) + + monkeypatch.setattr(web.Tidy3dStubData, "postprocess", staticmethod(_fake_postprocess)) + return calls + + +def _patch_run_pipeline(monkeypatch, tmp_path): + """Patch upload, start, monitor, and download to avoid network calls.""" + counters = {"upload": 0, "start": 0, "monitor": 0, "download": 0} + + def _fake_upload(**kwargs): + counters["upload"] += 1 + return MOCK_TASK_ID + + def _fake_start(task_id, **kwargs): + counters["start"] += 1 + + def _fake_monitor(task_id, verbose=True): + counters["monitor"] += 1 + + def _fake_download(*, task_id, path, **kwargs): + counters["download"] += 1 + Path(path).write_text(f"payload:{task_id}") + + monkeypatch.setattr(web, "upload", _fake_upload) + monkeypatch.setattr(web, "start", _fake_start) + monkeypatch.setattr(web, "monitor", _fake_monitor) + monkeypatch.setattr(web, "download", _fake_download) + monkeypatch.setattr( + web, + "get_info", + lambda task_id, verbose=True: type( + "_Info", (), {"solverVersion": "solver-1", "taskType": "FDTD"} + )(), + ) + return counters + + +def _reset_counters(counters: dict[str, int]) -> None: + for key in counters: + counters[key] = 0 + + +@pytest.mark.serial +def _test_run_cache_hit(monkeypatch, tmp_path, basic_simulation, fake_data): + counters = _patch_run_pipeline(monkeypatch, tmp_path) + out_path = tmp_path / "result.hdf5" + get_cache().clear() + + data = web.run(basic_simulation, task_name="demo", path=str(out_path), use_cache=True) + assert isinstance(data, _FakeStubData) + assert counters == {"upload": 1, "start": 1, "monitor": 1, "download": 1} + + _reset_counters(counters) + data2 = web.run(basic_simulation, task_name="demo", path=str(out_path), use_cache=True) + assert isinstance(data2, _FakeStubData) + assert counters == {"upload": 0, "start": 0, "monitor": 0, "download": 0} + +@pytest.mark.serial +def test_run_cache_hit_async(monkeypatch, tmp_path, basic_simulation, fake_data): + counters = _patch_run_pipeline(monkeypatch, tmp_path) + get_cache().clear() + + data = run_async({"task1": basic_simulation}) + + +@pytest.mark.serial +@pytest.mark.xdist_group("serial") +def _test_load_cache_hit(monkeypatch, tmp_path, basic_simulation, fake_data): + get_cache().clear() + counters = _patch_run_pipeline(monkeypatch, tmp_path) + out_path = tmp_path / "load.hdf5" + + web.run(basic_simulation, task_name="demo", path=str(out_path), use_cache=True) + assert counters["download"] == 1 + + _reset_counters(counters) + data = web.load(MOCK_TASK_ID, path=str(out_path), use_cache=True) + assert isinstance(data, _FakeStubData) + assert counters["download"] == 0 # served from cache + + +@pytest.mark.serial +@pytest.mark.xdist_group("serial") +def _test_checksum_mismatch_triggers_refresh(monkeypatch, tmp_path, basic_simulation): + counters = _patch_run_pipeline(monkeypatch, tmp_path) + out_path = tmp_path / "checksum.hdf5" + + web.run(basic_simulation, task_name="demo", path=str(out_path), use_cache=True) + + cache = get_cache() + metadata = cache.list()[0] + corrupted_path = cache.root / metadata["cache_key"] / CACHE_ARTIFACT_NAME + corrupted_path.write_text("corrupted") + + _reset_counters(counters) + web.load(MOCK_TASK_ID, path=str(out_path), use_cache=True) + assert counters["download"] == 1 + + +@pytest.mark.serial +@pytest.mark.xdist_group("serial") +def _test_cache_eviction_by_entries(tmp_path_factory, basic_simulation): + cache = SimulationCache(SimulationCacheConfig(enabled=True, max_size_gb=10.0, max_entries=1)) + + file1 = tmp_path_factory.mktemp("art1") / CACHE_ARTIFACT_NAME + file1.write_text("a" * 10) + cache.store_result(_FakeStubData(basic_simulation), MOCK_TASK_ID, str(file1), "FDTD") + assert len(cache) == 1 + + sim2 = basic_simulation.updated_copy(normalize_index=0.1) + file2 = tmp_path_factory.mktemp("art2") / CACHE_ARTIFACT_NAME + file2.write_text("b" * 10) + cache.store_result(_FakeStubData(sim2), MOCK_TASK_ID, str(file2), "FDTD") + + entries = cache.list() + assert len(entries) == 1 + assert entries[0]["simulation_hash"] == sim2._hash_self() + + +@pytest.mark.serial +@pytest.mark.xdist_group("serial") +def _test_cache_eviction_by_size(tmp_path_factory, basic_simulation): + cache = SimulationCache(SimulationCacheConfig(enabled=True, max_size_gb=1e-5, max_entries=10)) + + file1 = tmp_path_factory.mktemp("art1") / CACHE_ARTIFACT_NAME + file1.write_text("a" * 12_000) + cache.store_result(_FakeStubData(basic_simulation), MOCK_TASK_ID, str(file1), "FDTD") + assert len(cache) == 1 + + sim2 = basic_simulation.updated_copy(normalize_index=0.2) + file2 = tmp_path_factory.mktemp("art2") / CACHE_ARTIFACT_NAME + file2.write_text("b" * 12_000) + cache.store_result(_FakeStubData(sim2), MOCK_TASK_ID, str(file2), "FDTD") + + entries = cache.list() + assert len(entries) == 1 + assert entries[0]["simulation_hash"] == sim2._hash_self() + + +@pytest.mark.xdist_group("serial") +def test_cache_end_to_end(monkeypatch, tmp_path, tmp_path_factory, basic_simulation, fake_data): + """Run all critical cache tests in sequence to ensure end-to-end stability.""" + _test_run_cache_hit(monkeypatch, tmp_path, basic_simulation, fake_data) + _test_load_cache_hit(monkeypatch, tmp_path, basic_simulation, fake_data) + _test_checksum_mismatch_triggers_refresh(monkeypatch, tmp_path, basic_simulation) + _test_cache_eviction_by_entries(tmp_path_factory, basic_simulation) + _test_cache_eviction_by_size(tmp_path_factory, basic_simulation) + +@pytest.mark.serial +def test_configure_cache_roundtrip(tmp_path): + new_cfg = SimulationCacheConfig(enabled=True, directory=tmp_path, max_size_gb=1.23, max_entries=5) + configure_cache(new_cfg) + cfg = get_cache_config() + assert cfg.enabled is True + assert cfg.directory == tmp_path + assert cfg.max_size_gb == 1.23 + assert cfg.max_entries == 5 + +@pytest.mark.serial +def test_env_var_overrides(monkeypatch, tmp_path): + monkeypatch.setenv("TIDY3D_CACHE_ENABLED", "true") + monkeypatch.setenv("TIDY3D_CACHE_DIR", str(tmp_path)) + monkeypatch.setenv("TIDY3D_CACHE_MAX_SIZE_GB", "0.5") + monkeypatch.setenv("TIDY3D_CACHE_MAX_ENTRIES", "7") + + overrides = _load_env_overrides() + assert overrides == { + "enabled": True, + "directory": str(tmp_path), + "max_size_gb": 0.5, + "max_entries": 7, + } + +@pytest.mark.serial +def test_cli_config_overrides(tmp_path, monkeypatch): + # Build fake toml config file + cli_config_file = tmp_path / "config.toml" + monkeypatch.setenv("TIDY3D_CLI_CONFIG", str(cli_config_file)) # if your code reads via constant adjust + content = { + "simulation_cache": { + "enabled": True, + "directory": str(tmp_path / "cli_dir"), + "max_size_gb": 2.5, + "max_entries": 99, + } + } + cli_config_file.write_text(toml.dumps(content)) + + # Patch constant so _load_cli_cache_settings sees our file + from tidy3d.web import cache as cache_mod + monkeypatch.setattr(cache_mod, "CLI_CONFIG_FILE", str(cli_config_file)) + + settings = _load_cli_cache_settings() + assert settings["enabled"] is True + assert Path(settings["directory"]).name == "cli_dir" + assert settings["max_size_gb"] == 2.5 + assert settings["max_entries"] == 99 + +@pytest.mark.serial +def test_apply_updates_invalid_values(tmp_path, caplog): + base = SimulationCacheConfig() + updates = { + "enabled": "notbool", + "directory": tmp_path, + "max_size_gb": "-5", # invalid + "max_entries": "-10", # invalid + "irrelevant": 123, + } + cfg = _apply_updates(base, updates) + # directory should be updated, invalid numbers ignored + assert cfg.directory == tmp_path + assert cfg.max_size_gb == base.max_size_gb + assert cfg.max_entries == base.max_entries + +@pytest.mark.serial +def test_effective_config_cli_then_env(monkeypatch, tmp_path): + """CLI settings should apply first, then environment overrides take precedence.""" + + # --- Step 1: fake CLI config --- + cli_config_file = tmp_path / "config.toml" + cli_settings = { + "simulation_cache": { + "enabled": False, # will be overridden by env + "directory": str(tmp_path / "cli_dir"), + "max_size_gb": 2.5, + "max_entries": 99, + } + } + cli_config_file.write_text(toml.dumps(cli_settings)) + from tidy3d.web import cache as cache_mod + monkeypatch.setattr(cache_mod, "CLI_CONFIG_FILE", str(cli_config_file)) + + # --- Step 2: env vars override CLI --- + env_dir = tmp_path / "env_dir" + monkeypatch.setenv("TIDY3D_CACHE_ENABLED", "true") + monkeypatch.setenv("TIDY3D_CACHE_DIR", str(env_dir)) + monkeypatch.setenv("TIDY3D_CACHE_MAX_SIZE_GB", "0.75") + monkeypatch.setenv("TIDY3D_CACHE_MAX_ENTRIES", "7") + + # --- Step 3: load effective config --- + from tidy3d.web.cache import _load_effective_config + cfg = _load_effective_config() + + # --- Step 4: assertions --- + # Env overrides should win over CLI + assert cfg.enabled is True # env overrides False + assert cfg.directory == env_dir # env overrides cli_dir + assert cfg.max_size_gb == 0.75 # env overrides 2.5 + assert cfg.max_entries == 7 # env overrides 99 \ No newline at end of file diff --git a/tidy3d/config.py b/tidy3d/config.py index 9dfc7b702c..4730a9ddff 100644 --- a/tidy3d/config.py +++ b/tidy3d/config.py @@ -2,12 +2,42 @@ from __future__ import annotations +from pathlib import Path from typing import Optional import pydantic.v1 as pd from .log import DEFAULT_LEVEL, LogLevel, set_log_suppression, set_logging_level +_DEFAULT_CACHE_DIR = Path.home() / ".tidy3d" / "cache" / "simulations" + + +class SimulationCacheSettings(pd.BaseModel): + """Settings controlling the optional local simulation cache.""" + + enabled: bool = pd.Field( + False, + description="Enable or disable the local simulation cache.", + ) + directory: Path = pd.Field( + _DEFAULT_CACHE_DIR, + description="Directory where cached simulation artifacts are stored.", + ) + max_size_gb: float = pd.Field( + 10.0, + description="Maximum cache size in gigabytes. Set to 0 for no size limit.", + ge=0.0, + ) + max_entries: int = pd.Field( + 25, + description="Maximum number of cache entries. Set to 0 for no limit.", + ge=0, + ) + + @pd.validator("directory", pre=True, always=True) + def _validate_directory(cls, value): + return Path(value).expanduser() + class Tidy3dConfig(pd.BaseModel): """configuration of tidy3d""" @@ -43,6 +73,12 @@ class Config: "averaging will be used if 'tidy3d-extras' is installed and not used otherwise.", ) + simulation_cache: SimulationCacheSettings = pd.Field( + SimulationCacheSettings(), + title="Simulation Cache", + description="Configuration for the optional local simulation cache.", + ) + @pd.validator("logging_level", pre=True, always=True) def _set_logging_level(cls, val): """Set the logging level if logging_level is changed.""" diff --git a/tidy3d/web/api/asynchronous.py b/tidy3d/web/api/asynchronous.py index da628261f3..94b1683f99 100644 --- a/tidy3d/web/api/asynchronous.py +++ b/tidy3d/web/api/asynchronous.py @@ -24,6 +24,7 @@ def run_async( reduce_simulation: Literal["auto", True, False] = "auto", pay_type: Union[PayType, str] = PayType.AUTO, priority: Optional[int] = None, + use_cache: Optional[bool] = None, ) -> BatchData: """Submits a set of Union[:class:`.Simulation`, :class:`.HeatSimulation`, :class:`.EMESimulation`] objects to server, starts running, monitors progress, downloads, and loads results as a :class:`.BatchData` object. @@ -56,6 +57,10 @@ def run_async( priority: int = None Priority of the simulation in the Virtual GPU (vGPU) queue (1 = lowest, 10 = highest). It affects only simulations from vGPU licenses and does not impact simulations using FlexCredits. + use_cache: bool = None + Whether to use local cache if identical simulation is rerun. If not provided, cache settings from config or# + environment variables will be used. + Returns ------ :class:`BatchData` @@ -91,6 +96,7 @@ def run_async( parent_tasks=parent_tasks, reduce_simulation=reduce_simulation, pay_type=pay_type, + use_cache=use_cache, ) batch_data = batch.run(path_dir=path_dir, priority=priority) diff --git a/tidy3d/web/api/autograd/autograd.py b/tidy3d/web/api/autograd/autograd.py index 7958dc57c9..06bbfb7521 100644 --- a/tidy3d/web/api/autograd/autograd.py +++ b/tidy3d/web/api/autograd/autograd.py @@ -117,6 +117,7 @@ def run( reduce_simulation: typing.Literal["auto", True, False] = "auto", pay_type: typing.Union[PayType, str] = PayType.AUTO, priority: typing.Optional[int] = None, + use_cache: typing.Optional[bool] = None, ) -> WorkflowDataType: """ Submits a :class:`.Simulation` to server, starts running, monitors progress, downloads, @@ -248,6 +249,7 @@ def run( max_num_adjoint_per_fwd=max_num_adjoint_per_fwd, pay_type=pay_type, priority=priority, + use_cache=use_cache, ) return run_webapi( @@ -266,6 +268,7 @@ def run( reduce_simulation=reduce_simulation, pay_type=pay_type, priority=priority, + use_cache=use_cache, ) @@ -284,6 +287,7 @@ def run_async( reduce_simulation: typing.Literal["auto", True, False] = "auto", pay_type: typing.Union[PayType, str] = PayType.AUTO, priority: typing.Optional[int] = None, + use_cache: typing.Optional[bool] = None, ) -> BatchData: """Submits a set of Union[:class:`.Simulation`, :class:`.HeatSimulation`, :class:`.EMESimulation`] objects to server, starts running, monitors progress, downloads, and loads results as a :class:`.BatchData` object. @@ -318,6 +322,10 @@ def run_async( Whether to reduce structures in the simulation to the simulation domain only. Note: currently only implemented for the mode solver. pay_type: typing.Union[PayType, str] = PayType.AUTO Specify the payment method. + Whether to reduce structures in the simulation to the simulation domain only. Note: currently only implemented for the mode solver. + use_cache: bool = None + Whether to use local cache if identical simulation is rerun. If not provided, cache settings from config or# + environment variables will be used. Returns ------ @@ -375,6 +383,7 @@ def run_async( reduce_simulation=reduce_simulation, pay_type=pay_type, priority=priority, + use_cache=use_cache, ) diff --git a/tidy3d/web/api/container.py b/tidy3d/web/api/container.py index dc6a96b5a3..ce6ae43f48 100644 --- a/tidy3d/web/api/container.py +++ b/tidy3d/web/api/container.py @@ -8,19 +8,23 @@ from abc import ABC from collections.abc import Mapping from concurrent.futures import ThreadPoolExecutor -from typing import Literal, Optional, Union +from typing import Literal, Optional import pydantic.v1 as pd from rich.progress import BarColumn, Progress, TaskProgressColumn, TextColumn, TimeElapsedColumn from tidy3d.components.base import Tidy3dBaseModel, cached_property from tidy3d.components.mode.mode_solver import ModeSolver +from tidy3d.components.mode.simulation import ModeSimulation from tidy3d.components.types import annotate_type from tidy3d.components.types.workflow import WorkflowDataType, WorkflowType from tidy3d.exceptions import DataError from tidy3d.log import get_logging_console, log from tidy3d.web.api import webapi as web from tidy3d.web.api.tidy3d_stub import Tidy3dStub +from tidy3d.web.api.tidy3d_stub import Tidy3dStub, Tidy3dStubData +from tidy3d.web.api.webapi import get_reduced_simulation, _get_simulation_data_from_cache_entry +from tidy3d.web.cache import build_cache_key, _resolve_cache, CacheEntry from tidy3d.web.core.constants import TaskId, TaskName from tidy3d.web.core.task_core import Folder from tidy3d.web.core.task_info import RunInfo, TaskInfo @@ -224,6 +228,9 @@ class Job(WebContainer): "reduce_simulation", ) + use_cache: Optional[bool] = None + cache_hit: Optional[CacheEntry] = None + def to_file(self, fname: str) -> None: """Exports :class:`Tidy3dBaseModel` instance to .yaml, .json, or .hdf5 file @@ -241,7 +248,10 @@ def to_file(self, fname: str) -> None: super(Job, self).to_file(fname=fname) # noqa: UP008 def run( - self, path: str = DEFAULT_DATA_PATH, priority: Optional[int] = None + self, + path: str = DEFAULT_DATA_PATH, + priority: Optional[int] = None, + use_cache: Optional[bool] = None, ) -> WorkflowDataType: """Run :class:`Job` all the way through and return data. @@ -252,18 +262,40 @@ def run( priority: int = None Priority of the simulation in the Virtual GPU (vGPU) queue (1 = lowest, 10 = highest). It affects only simulations from vGPU licenses and does not impact simulations using FlexCredits. + use_cache: bool = None + Override cache usage behaviour for this call. ``True`` forces cache usage when available, + ``False`` bypasses it, and ``None`` defers to configuration and environment settings. Returns ------- :class:`WorkflowDataType` Object containing simulation results. """ - self.upload() - if priority is None: - self.start() - else: - self.start(priority=priority) - self.monitor() - return self.load(path=path) + self._check_path_dir(path=path) + + cache_instance = _resolve_cache(use_cache) + data = None + if cache_instance is not None: + sim_for_cache = self.simulation + if isinstance(self.simulation, (ModeSolver, ModeSimulation)) and self.reduce_simulation: + sim_for_cache = get_reduced_simulation(self.simulation, self.reduce_simulation) + entry = cache_instance.try_fetch( + simulation=sim_for_cache, + path=path, + ) + data = _get_simulation_data_from_cache_entry(entry, path) + if data is not None: + return data + + if data is None: # got no data from cache + self.upload() + if priority is None: + self.start() + else: + self.start(priority=priority) + self.monitor() + data = self.load(path=path, use_cache=use_cache) + + return data @cached_property def task_id(self) -> TaskId: @@ -273,10 +305,22 @@ def task_id(self) -> TaskId: self._check_folder(self.folder_name) return self._upload() - def _upload(self) -> TaskId: + def _upload(self) -> Optional[TaskId]: """Upload this job and return the task ID for handling.""" # upload kwargs with all fields except task_id upload_kwargs = {key: getattr(self, key) for key in self._upload_fields} + cache_instance = _resolve_cache(self.use_cache) + + if cache_instance is not None: + sim_for_cache = self.simulation + if isinstance(self.simulation, (ModeSolver, ModeSimulation)) and self.reduce_simulation: + sim_for_cache = get_reduced_simulation(self.simulation, self.reduce_simulation) + entry = cache_instance.try_fetch( + simulation=sim_for_cache + ) + if entry: + return entry.metadata["task_ids"][0] + task_id = web.upload(**upload_kwargs) return task_id @@ -347,15 +391,23 @@ def download(self, path: str = DEFAULT_DATA_PATH) -> None: ---------- path : str = "./simulation_data.hdf5" Path to download data as ``.hdf5`` file (including filename). + use_cache: bool = None + Override cache usage behaviour for this call. ``True`` forces cache usage when available, + ``False`` bypasses it, and ``None`` defers to configuration and environment settings. Note ---- To load the data after download, use :meth:`Job.load`. """ + if self.use_cache and self.cache_hit: + self.cache_hit.materialize(Path(path)) + return self._check_path_dir(path=path) web.download(task_id=self.task_id, path=path, verbose=self.verbose) - def load(self, path: str = DEFAULT_DATA_PATH) -> WorkflowDataType: + def load( + self, path: str = DEFAULT_DATA_PATH, use_cache: Optional[bool] = None + ) -> WorkflowDataType: """Download job results and load them into a data object. Parameters @@ -369,7 +421,12 @@ def load(self, path: str = DEFAULT_DATA_PATH) -> WorkflowDataType: Object containing simulation results. """ self._check_path_dir(path=path) - data = web.load(task_id=self.task_id, path=path, verbose=self.verbose) + data = web.load( + task_id=self.task_id, + path=path, + verbose=self.verbose, + use_cache=use_cache, + ) if isinstance(self.simulation, ModeSolver): self.simulation._patch_data(data=data) return data @@ -623,6 +680,8 @@ class Batch(WebContainer): "fields that were not used to create the task will cause errors.", ) + use_cache: Optional[bool] = None + _job_type = Job def run( @@ -639,6 +698,9 @@ def run( priority: int = None Priority of the simulation in the Virtual GPU (vGPU) queue (1 = lowest, 10 = highest). It affects only simulations from vGPU licenses and does not impact simulations using FlexCredits. + use_cache: bool = None + Whether to use local cache if identical simulation is rerun. If not provided, cache settings from config or# + environment variables will be used. Returns ------ :class:`BatchData` @@ -708,6 +770,7 @@ def jobs(self) -> dict[TaskName, Job]: job_kwargs["solver_version"] = self.solver_version job_kwargs["pay_type"] = self.pay_type job_kwargs["reduce_simulation"] = self.reduce_simulation + job_kwargs["use_cache"] = self.use_cache if self.parent_tasks and task_name in self.parent_tasks: job_kwargs["parent_tasks"] = self.parent_tasks[task_name] job = JobType(**job_kwargs) diff --git a/tidy3d/web/api/webapi.py b/tidy3d/web/api/webapi.py index e165b83b05..ff79b56c27 100644 --- a/tidy3d/web/api/webapi.py +++ b/tidy3d/web/api/webapi.py @@ -6,6 +6,7 @@ import os import tempfile import time +from pathlib import Path from typing import Callable, Literal, Optional, Union from requests import HTTPError @@ -18,6 +19,7 @@ from tidy3d.exceptions import WebError from tidy3d.log import get_logging_console, log from tidy3d.plugins.smatrix.component_modelers.terminal import TerminalComponentModeler +from tidy3d.web.cache import _resolve_cache, SimulationCache, CacheEntry from tidy3d.web.core.account import Account from tidy3d.web.core.constants import ( CM_DATA_HDF5_GZ, @@ -121,6 +123,12 @@ def _task_dict_to_url_bullet_list(data_dict: dict) -> str: # and then join them together with newline characters. return "\n".join([f"- {key}: '{value}'" for key, value in data_dict.items()]) +def _get_simulation_data_from_cache_entry(entry: CacheEntry, path: str) -> Optional[WorkflowDataType]: + if entry is not None: + entry.materialize(Path(path)) + data = Tidy3dStubData.postprocess(path) + return data + return None @wait_for_connection def run( @@ -139,6 +147,7 @@ def run( reduce_simulation: Literal["auto", True, False] = "auto", pay_type: Union[PayType, str] = PayType.AUTO, priority: Optional[int] = None, + use_cache: Optional[bool] = None, ) -> WorkflowDataType: """ Submits a :class:`.Simulation` to server, starts running, monitors progress, downloads, @@ -176,6 +185,9 @@ def run( priority: int = None Priority of the simulation in the Virtual GPU (vGPU) queue (1 = lowest, 10 = highest). It affects only simulations from vGPU licenses and does not impact simulations using FlexCredits. + use_cache: bool = None + Whether to use local cache if identical simulation is rerun. If not provided, cache settings from config or# + environment variables will be used. Returns ------- Union[:class:`.SimulationData`, :class:`.HeatSimulationData`, :class:`.EMESimulationData`] @@ -220,30 +232,49 @@ def run( :meth:`tidy3d.web.api.container.Batch.monitor` Monitor progress of each of the running tasks. """ - task_id = upload( - simulation=simulation, - task_name=task_name, - folder_name=folder_name, - callback_url=callback_url, - verbose=verbose, - progress_callback=progress_callback_upload, - simulation_type=simulation_type, - parent_tasks=parent_tasks, - solver_version=solver_version, - reduce_simulation=reduce_simulation, - ) - start( - task_id, - verbose=verbose, - solver_version=solver_version, - worker_group=worker_group, - pay_type=pay_type, - priority=priority, - ) - monitor(task_id, verbose=verbose) - data = load( - task_id=task_id, path=path, verbose=verbose, progress_callback=progress_callback_download - ) + cache_instance = _resolve_cache(use_cache) + data = None + if cache_instance is not None: + sim_for_cache = simulation + if isinstance(simulation, (ModeSolver, ModeSimulation)) and reduce_simulation: + sim_for_cache = get_reduced_simulation(simulation, reduce_simulation) + entry = cache_instance.try_fetch( + simulation=sim_for_cache + ) + data = _get_simulation_data_from_cache_entry(entry, path) + if data is not None: + return data + + if data is None: # got no data from cache + task_id = upload( + simulation=simulation, + task_name=task_name, + folder_name=folder_name, + callback_url=callback_url, + verbose=verbose, + progress_callback=progress_callback_upload, + simulation_type=simulation_type, + parent_tasks=parent_tasks, + solver_version=solver_version, + reduce_simulation=reduce_simulation, + ) + start( + task_id, + verbose=verbose, + solver_version=solver_version, + worker_group=worker_group, + pay_type=pay_type, + priority=priority, + ) + monitor(task_id, verbose=verbose) + data = load( + task_id=task_id, + path=path, + verbose=verbose, + progress_callback=progress_callback_download, + use_cache=use_cache, + ) + if isinstance(simulation, ModeSolver): simulation._patch_data(data=data) return data @@ -988,6 +1019,7 @@ def load( replace_existing: bool = True, verbose: bool = True, progress_callback: Optional[Callable[[float], None]] = None, + use_cache: Optional[bool] = None, lazy: bool = False, ) -> WorkflowDataType: """ @@ -1018,6 +1050,9 @@ def load( If ``True``, will print progressbars and status, otherwise, will run silently. progress_callback : Callable[[float], None] = None Optional callback function called when downloading file with ``bytes_in_chunk`` as argument. + use_cache: bool = None + Whether to use local cache if identical simulation is rerun. If not provided, cache settings from config or# + environment variables will be used. lazy : bool = False Whether to load the actual data (``lazy=False``) or return a proxy that loads the data when accessed (``lazy=True``). @@ -1032,7 +1067,17 @@ def load( base_dir = os.path.dirname(path) or "." path = os.path.join(base_dir, "cm_data.hdf5") - if not os.path.exists(path) or replace_existing: + cache_instance = _resolve_cache(use_cache) + data = None + if cache_instance is not None: + entry = cache_instance.try_fetch_by_task( + task_id=task_id, verbose=verbose + ) + data = _get_simulation_data_from_cache_entry(entry, path) + if data is not None: + return data + + if not data and (not os.path.exists(path) or replace_existing): download(task_id=task_id, path=path, verbose=verbose, progress_callback=progress_callback) if verbose: @@ -1043,6 +1088,17 @@ def load( console.log(f"loading simulation from {path}") stub_data = Tidy3dStubData.postprocess(path, lazy=lazy) + + if cache_instance is not None: + info = get_info(task_id, verbose=False) + workflow_type = getattr(info, "taskType", None) or type(stub_data).__name__ + cache_instance.store_result( + stub_data=stub_data, + task_id=task_id, + path=path, + workflow_type=workflow_type, + ) + return stub_data diff --git a/tidy3d/web/cache.py b/tidy3d/web/cache.py new file mode 100644 index 0000000000..15cde4d814 --- /dev/null +++ b/tidy3d/web/cache.py @@ -0,0 +1,754 @@ +"""Local simulation cache manager.""" + +from __future__ import annotations + +import hashlib +import json +import os +import shutil +import tempfile +import threading +import traceback +from collections.abc import Iterable +from dataclasses import dataclass, field, replace +from datetime import datetime, timezone +from enum import Enum +from functools import lru_cache +from pathlib import Path +from typing import Any, Optional, Union + +import toml + +from tidy3d.components.types.workflow import WorkflowDataType, WorkflowType +from tidy3d.log import log +from tidy3d.web.api.tidy3d_stub import Tidy3dStub, Tidy3dStubData +from tidy3d.web.cli.constants import CONFIG_FILE as CLI_CONFIG_FILE +from tidy3d.web.core.constants import TaskId +from tidy3d.web.core.environment import Env +from tidy3d.web.core.http_util import get_version as _get_protocol_version + +DEFAULT_CACHE_RELATIVE_DIR = Path(".tidy3d") / "cache" / "simulations" +CACHE_ARTIFACT_NAME = "simulation_data.hdf5" +CACHE_METADATA_NAME = "metadata.json" + +ENV_ENABLE = "TIDY3D_CACHE_ENABLED" +ENV_DIRECTORY = "TIDY3D_CACHE_DIR" +ENV_MAX_SIZE = "TIDY3D_CACHE_MAX_SIZE_GB" +ENV_MAX_ENTRIES = "TIDY3D_CACHE_MAX_ENTRIES" + +TMP_PREFIX = "tidy3d-cache-" + +def _environment_context() -> dict[str, Any]: + env = Env.current + return { + "name": env.name, + "web_api_endpoint": env.web_api_endpoint, + "website_endpoint": env.website_endpoint, + "s3_region": env.s3_region, + } + + + +def _resolve_cache(use_cache: Optional[bool]): + cache_config = get_cache_config() + + try: + from tidy3d import config as tidy3d_config + except Exception: + simulation_cache_settings = None + else: + simulation_cache_settings = getattr(tidy3d_config, "simulation_cache", None) + + if simulation_cache_settings is not None: + desired_config = SimulationCacheConfig( + enabled=simulation_cache_settings.enabled, + directory=simulation_cache_settings.directory, + max_size_gb=simulation_cache_settings.max_size_gb, + max_entries=simulation_cache_settings.max_entries, + ) + if desired_config != cache_config: + configure_cache(desired_config) + cache_config = desired_config + + enabled = cache_config.enabled + env_override = Env.current.enable_caching + if env_override is not None: + enabled = env_override + if use_cache is not None: + enabled = use_cache + if not enabled: + return None + try: + return get_cache() + except Exception as err: + log.debug("Simulation cache unavailable: %s", err) + return None + + +def _coerce_bool(value: str) -> Optional[bool]: + if value is None: + return None + normalized = value.strip().lower() + if normalized in {"1", "true", "yes", "on"}: + return True + if normalized in {"0", "false", "no", "off"}: + return False + return None + + +def _coerce_float(value: str) -> Optional[float]: + if value is None: + return None + try: + return float(value) + except (TypeError, ValueError): + return None + + +def _coerce_int(value: str) -> Optional[int]: + if value is None: + return None + try: + return int(value) + except (TypeError, ValueError): + return None + + +def _load_cli_cache_settings() -> dict[str, Any]: + if not os.path.exists(CLI_CONFIG_FILE): + return {} + try: + with open(CLI_CONFIG_FILE, encoding="utf-8") as fh: + content = fh.read() + if not content.strip(): + return {} + config = toml.loads(content) + except Exception as err: + log.debug("Failed to parse CLI cache settings: %s", err) + return {} + + section = config.get("simulation_cache") + return section if isinstance(section, dict) else {} + + +def _load_env_overrides() -> dict[str, Any]: + overrides: dict[str, Any] = {} + + enabled_env = _coerce_bool(os.getenv(ENV_ENABLE)) + if enabled_env is not None: + overrides["enabled"] = enabled_env + + directory_env = os.getenv(ENV_DIRECTORY) + if directory_env: + overrides["directory"] = directory_env + + size_env = _coerce_float(os.getenv(ENV_MAX_SIZE)) + if size_env is not None: + overrides["max_size_gb"] = size_env + + entries_env = _coerce_int(os.getenv(ENV_MAX_ENTRIES)) + if entries_env is not None: + overrides["max_entries"] = entries_env + + return overrides + + +def _apply_updates(config: SimulationCacheConfig, updates: dict[str, Any]) -> SimulationCacheConfig: + if not updates: + return config + + kwargs: dict[str, Any] = {} + for key, value in updates.items(): + if key not in {"enabled", "directory", "max_size_gb", "max_entries"}: + continue + if key == "directory" and value is not None: + try: + value = Path(value).expanduser() + except Exception: + log.debug("Ignoring invalid cache directory override: %s", value) + continue + if key == "max_size_gb" and value is not None: + try: + value = float(value) + except (TypeError, ValueError): + log.debug("Ignoring invalid cache size override: %s", value) + continue + if value < 0: + log.debug("Ignoring negative cache size override: %s", value) + continue + if key == "max_entries" and value is not None: + try: + value = int(value) + except (TypeError, ValueError): + log.debug("Ignoring invalid cache entry override: %s", value) + continue + if value < 0: + log.debug("Ignoring negative cache entry override: %s", value) + continue + kwargs[key] = value + return replace(config, **kwargs) if kwargs else config + + +def _load_effective_config() -> SimulationCacheConfig: + config = SimulationCacheConfig() + config = _apply_updates(config, _load_cli_cache_settings()) + config = _apply_updates(config, _load_env_overrides()) + return config + + +@dataclass(frozen=True) +class SimulationCacheConfig: + """Configuration for the simulation cache.""" + + enabled: bool = False + directory: Path = field(default_factory=lambda: Path.home() / DEFAULT_CACHE_RELATIVE_DIR) + max_size_gb: float = 8.0 + max_entries: int = 32 + + +@dataclass +class CacheEntry: + """Internal representation of a cache entry.""" + + key: str + root: Path + metadata: dict[str, Any] + + @property + def path(self) -> Path: + return self.root / self.key + + @property + def artifact_path(self) -> Path: + return self.path / CACHE_ARTIFACT_NAME + + @property + def metadata_path(self) -> Path: + return self.path / CACHE_METADATA_NAME + + def exists(self) -> bool: + return self.path.exists() and self.artifact_path.exists() and self.metadata_path.exists() + + def verify(self) -> bool: + if not self.exists(): + return False + checksum = self.metadata.get("checksum") + if not checksum: + return False + try: + actual_checksum, file_size = _copy_and_hash(self.artifact_path, None) + except FileNotFoundError: + return False + if checksum != actual_checksum: + log.warning( + "Simulation cache checksum mismatch for key '%s'. Removing stale entry.", self.key + ) + return False + if int(self.metadata.get("file_size", file_size)) != file_size: + self.metadata["file_size"] = file_size + _write_metadata(self.metadata_path, self.metadata) + return True + + def materialize(self, target: Path) -> Path: + """Copy cached artifact to ``target`` and return the resulting path.""" + target = Path(target) + target.parent.mkdir(parents=True, exist_ok=True) + shutil.copy2(self.artifact_path, target) + return target + +class SimulationCache: + """Manages storing and retrieving cached simulation artifacts.""" + + def __init__(self, config: SimulationCacheConfig): + self._config = config + self._root = Path(config.directory).expanduser().resolve() + self._lock = threading.RLock() + if config.enabled: + self._root.mkdir(parents=True, exist_ok=True) + + @property + def config(self) -> SimulationCacheConfig: + return self._config + + @property + def root(self) -> Path: + return self._root + + def list(self) -> list[dict[str, Any]]: + """Return metadata for all cache entries.""" + with self._lock: + return [entry.metadata for entry in self._iter_entries()] + + def clear(self) -> None: + """Remove all cache contents.""" + with self._lock: + if self._root.exists(): + try: + shutil.rmtree(self._root) + self._root.mkdir(parents=True, exist_ok=True) + except (FileNotFoundError, OSError): + pass + + + + + def _fetch(self, key: str) -> Optional[CacheEntry]: + """Retrieve an entry by key, verifying checksum.""" + with self._lock: + entry = self._load_entry(key) + if not entry or not entry.exists(): + return None + if not entry.verify(): + self._remove_entry(entry) + return None + self._touch(entry) + return entry + + def fetch_by_task(self, task_id: str) -> Optional[CacheEntry]: + """Retrieve an entry by task id.""" + with self._lock: + for entry in self._iter_entries(): + metadata = entry.metadata + task_ids = metadata.get("task_ids", []) + if task_id in task_ids and entry.exists(): + if not entry.verify(): + self._remove_entry(entry) + return None + self._touch(entry) + return entry + return None + + def __len__(self) -> int: + """Return number of valid cache entries.""" + with self._lock: + return sum(1 for _ in self._iter_entries()) + + def _store( + self, key: str, task_id: Optional[str], source_path: Path, metadata: dict[str, Any] + ) -> Optional[CacheEntry]: + """Store a new cache entry from ``source_path``. + + Parameters + ---------- + key : str + Cache key computed from simulation hash and runtime context. + task_id : str, optional + Server task id associated with this artifact. + source_path : Path + Location of the artifact to cache. + metadata : dict[str, Any] + Additional metadata to persist alongside artifact. + + Returns + ------- + CacheEntry + Representation of the stored cache entry. + """ + source_path = Path(source_path) + if not source_path.exists(): + raise FileNotFoundError(f"Cannot cache missing artifact: {source_path}") + os.makedirs(self._root, exist_ok=True) + tmp_dir = Path(tempfile.mkdtemp(prefix=TMP_PREFIX, dir=self._root)) + tmp_artifact = tmp_dir / CACHE_ARTIFACT_NAME + tmp_meta = tmp_dir / CACHE_METADATA_NAME + os.makedirs(tmp_dir, exist_ok=True) + + checksum, file_size = _copy_and_hash(source_path, tmp_artifact) + now_iso = _now() + metadata = dict(metadata) + metadata.setdefault("cache_key", key) + metadata.setdefault("created_at", now_iso) + metadata["last_used"] = now_iso + metadata["checksum"] = checksum + metadata["file_size"] = file_size + if task_id: + task_ids = list(metadata.get("task_ids", [])) + if task_id not in task_ids: + task_ids.append(task_id) + metadata["task_ids"] = task_ids + + _write_metadata(tmp_meta, metadata) + try: + with self._lock: + self._root.mkdir(parents=True, exist_ok=True) + self._ensure_limits(file_size) + final_dir = self._root / key + backup_dir: Optional[Path] = None + + try: + if final_dir.exists(): + backup_dir = final_dir.with_name( + f"{final_dir.name}.bak.{_timestamp_suffix()}" + ) + os.replace(final_dir, backup_dir) + # move tmp_dir into place + os.replace(tmp_dir, final_dir) + except Exception: + # restore backup if needed + if backup_dir and backup_dir.exists(): + os.replace(backup_dir, final_dir) + raise + else: + entry = CacheEntry(key=key, root=self._root, metadata=metadata) + if backup_dir and backup_dir.exists(): + shutil.rmtree(backup_dir, ignore_errors=True) + log.debug("Stored simulation cache entry '%s' (%d bytes).", key, file_size) + return entry + finally: + try: + if tmp_dir.exists(): + shutil.rmtree(tmp_dir, ignore_errors=True) + except FileNotFoundError: + pass + + def invalidate(self, key: str) -> None: + with self._lock: + entry = self._load_entry(key) + if entry: + self._remove_entry(entry) + + def _ensure_limits(self, incoming_size: int) -> None: + max_entries = max(self._config.max_entries, 0) + max_size_bytes = int(max(0.0, self._config.max_size_gb) * (1024**3)) + + entries = list(self._iter_entries()) + if max_entries and len(entries) >= max_entries: + self._evict(entries, keep=max_entries - 1) + entries = list(self._iter_entries()) + + if not max_size_bytes: + return + + existing_size = sum(int(e.metadata.get("file_size", 0)) for e in entries) + allowed_size = max(max_size_bytes - incoming_size, 0) + if existing_size > allowed_size: + self._evict_by_size(entries, existing_size, allowed_size) + + def _evict(self, entries: Iterable[CacheEntry], keep: int) -> None: + sorted_entries = sorted(entries, key=lambda e: e.metadata.get("last_used", "")) + to_remove = sorted_entries[: max(0, len(sorted_entries) - keep)] + for entry in to_remove: + self._remove_entry(entry) + + def _evict_by_size( + self, entries: Iterable[CacheEntry], current_size: int, allowed_size: float + ) -> None: + if allowed_size < 0: + allowed_size = 0 + sorted_entries = sorted(entries, key=lambda e: e.metadata.get("last_used", "")) + reclaimed = 0 + for entry in sorted_entries: + if current_size - reclaimed <= allowed_size: + break + size = int(entry.metadata.get("file_size", 0)) + self._remove_entry(entry) + reclaimed += size + log.info(f"Simulation cache evicted entry '{entry.key}' to reclaim {size} bytes.") + + def _iter_entries(self) -> Iterable[CacheEntry]: + if not self._root.exists(): + return [] + entries: list[CacheEntry] = [] + for child in self._root.iterdir(): + if child.name.startswith(TMP_PREFIX): + continue + meta_path = child / CACHE_METADATA_NAME + if not meta_path.exists(): + continue + try: + metadata = json.loads(meta_path.read_text(encoding="utf-8")) + except Exception: + metadata = {} + entries.append(CacheEntry(key=child.name, root=self._root, metadata=metadata)) + return entries + + def _load_entry(self, key: str) -> Optional[CacheEntry]: + entry = CacheEntry(key=key, root=self._root, metadata={}) + if not entry.metadata_path.exists() or not entry.artifact_path.exists(): + return None + try: + metadata = json.loads(entry.metadata_path.read_text(encoding="utf-8")) + except Exception: + metadata = {} + entry.metadata = metadata + return entry + + def _touch(self, entry: CacheEntry) -> None: + entry.metadata["last_used"] = _now() + _write_metadata(entry.metadata_path, entry.metadata) + + def _remove_entry(self, entry: CacheEntry) -> None: + if entry.path.exists(): + shutil.rmtree(entry.path, ignore_errors=True) + + def try_fetch( + self, + simulation: WorkflowType, + verbose: bool = False, + ) -> Optional[CacheEntry]: + """ + Attempt to resolve and fetch a cached result entry for the given simulation context. + On miss or any cache error, returns None (the caller should proceed with upload/run). + + Notes + ----- + - Mirrors the exact cache key/context computation from `run`. + - Safe to call regardless of `use_cache` value; will no-op if cache is disabled. + """ + try: + simulation_hash = simulation._hash_self() + workflow_type = Tidy3dStub(simulation=simulation).get_type() + + versions = _get_protocol_version() + environment = _environment_context() + cache_key = build_cache_key( + simulation_hash=simulation_hash, + workflow_type=workflow_type, + environment=environment, + version=versions, + ) + + entry = self._fetch(cache_key) + if not entry: + return None + if verbose: + log.info("Simulation cache hit for workflow '%s'; using local results.", workflow_type) + + return entry + except Exception: + log.error("Failed to fetch cache results.") + + def try_fetch_by_task( + self, + task_id: TaskId, + verbose: bool = False, + ) -> Optional[CacheEntry]: + """ + Try to satisfy `load()` from cache BEFORE downloading. + Since we don't have the simulation hash yet, we use the task-id index. + Returns None on miss or on any cache error. + """ + + try: + entry = self.fetch_by_task(task_id) + if not entry: + return None + if verbose: + log.info("Simulation cache hit for task '%s'; using local results.", task_id) + return entry + except Exception as err: + log.debug("Simulation cache unavailable for load: %s", err) + return None + + def store_result( + self, + stub_data: WorkflowDataType, + task_id: TaskId, + path: str, + workflow_type: str, + ) -> None: + """ + After we have the data (postprocess done), store it in the cache using the + canonical key (simulation hash + workflow type + environment + version). + Also records the task_id mapping for legacy lookups. + """ + try: + simulation_obj = getattr(stub_data, "simulation", None) + simulation_hash = simulation_obj._hash_self() if simulation_obj is not None else None + if not simulation_hash: + return + + version = _get_protocol_version() + environment = _environment_context() + + cache_key = build_cache_key( + simulation_hash=simulation_hash, + workflow_type=workflow_type, + environment=environment, + version=version, + ) + + metadata = build_entry_metadata( + simulation_hash=simulation_hash, + workflow_type=workflow_type, + runtime_context={ + "task_id": task_id, + }, + environment=environment, + version=version, + extras={"path": str(Path(path))}, + ) + + self._store( + key=cache_key, + task_id=task_id, # keeps a reverse link for legacy fetch_by_task + source_path=Path(path), + metadata=metadata, + ) + except Exception as e: + log.error("Could not store cache entry.") + print("ERROR", e, traceback.format_exc()) + + +def _copy_and_hash( + source: Path, dest: Optional[Path], existing_hash: Optional[str] = None +) -> tuple[str, int]: + """Copy ``source`` to ``dest`` while computing SHA256 checksum. + + Parameters + ---------- + source : Path + Source file path. + dest : Path or None + Destination file path. If ``None``, no copy is performed. + existing_hash : str, optional + If provided alongside ``dest`` and ``dest`` already exists, skip copying when hashes match. + + Returns + ------- + tuple[str, int] + The hexadecimal digest and file size in bytes. + """ + source = Path(source) + if dest is not None: + dest = Path(dest) + sha256 = _Hasher() + size = 0 + with source.open("rb") as src: + if dest is None: + while chunk := src.read(1024 * 1024): + sha256.update(chunk) + size += len(chunk) + else: + dest.parent.mkdir(parents=True, exist_ok=True) + with dest.open("wb") as dst: + while chunk := src.read(1024 * 1024): + dst.write(chunk) + sha256.update(chunk) + size += len(chunk) + return sha256.hexdigest(), size + + +def _write_metadata(path: Path, metadata: dict[str, Any]) -> None: + tmp_path = path.with_suffix(".tmp") + with tmp_path.open("w", encoding="utf-8") as fh: + json.dump(metadata, fh, indent=2, sort_keys=True) + os.replace(tmp_path, path) + + +def _now() -> str: + return datetime.now(timezone.utc).isoformat() + + +def _timestamp_suffix() -> str: + return datetime.now(timezone.utc).strftime("%Y%m%d%H%M%S%f") + + +class _Hasher: + def __init__(self): + self._hasher = hashlib.sha256() + + def update(self, data: bytes) -> None: + self._hasher.update(data) + + def hexdigest(self) -> str: + return self._hasher.hexdigest() + + +_CONFIG_LOCK = threading.RLock() +_CACHE_CONFIG = _load_effective_config() + + +def get_cache_config() -> SimulationCacheConfig: + """Return a copy of the active cache configuration.""" + + with _CONFIG_LOCK: + return replace(_CACHE_CONFIG) + + +@lru_cache +def get_cache() -> SimulationCache: + """Get a singleton ``SimulationCache`` instance.""" + + return SimulationCache(get_cache_config()) + + +def configure_cache(config: SimulationCacheConfig) -> None: + """Override the global cache configuration.""" + + global _CACHE_CONFIG + with _CONFIG_LOCK: + _CACHE_CONFIG = config + get_cache.cache_clear() + + +def clear() -> None: + """Remove all cache entries.""" + + get_cache().clear() + + +def _canonicalize(value: Any) -> Any: + """Convert value into a JSON-serializable object for hashing/metadata.""" + + if isinstance(value, dict): + return { + str(k): _canonicalize(v) + for k, v in sorted(value.items(), key=lambda item: str(item[0])) + } + if isinstance(value, (list, tuple)): + return [_canonicalize(v) for v in value] + if isinstance(value, set): + return sorted(_canonicalize(v) for v in value) + if isinstance(value, Enum): + return value.value + if isinstance(value, Path): + return str(value) + if isinstance(value, datetime): + return value.isoformat() + if isinstance(value, bytes): + return value.decode("utf-8", errors="ignore") + return value + + +def build_cache_key( + *, + simulation_hash: str, + workflow_type: str, + environment: dict[str, Any], + version: str, +) -> str: + """Construct a deterministic cache key.""" + + payload = { + "simulation_hash": simulation_hash, + "workflow_type": workflow_type, + "environment": _canonicalize(environment), + "versions": _canonicalize(version), + } + encoded = json.dumps(payload, sort_keys=True, separators=(",", ":")).encode("utf-8") + return hashlib.sha256(encoded).hexdigest() + + +def build_entry_metadata( + *, + simulation_hash: str, + workflow_type: str, + runtime_context: dict[str, Any], + environment: dict[str, Any], + version: str, + extras: Optional[dict[str, Any]] = None, +) -> dict[str, Any]: + """Create metadata dictionary for a cache entry.""" + + metadata: dict[str, Any] = { + "simulation_hash": simulation_hash, + "workflow_type": workflow_type, + "runtime_context": _canonicalize(runtime_context), + "environment": _canonicalize(environment), + "versions": _canonicalize(version), + "task_ids": [], + } + if extras: + metadata.update(_canonicalize(extras)) + return metadata diff --git a/tidy3d/web/core/http_util.py b/tidy3d/web/core/http_util.py index f802752bb1..f2f24e576a 100644 --- a/tidy3d/web/core/http_util.py +++ b/tidy3d/web/core/http_util.py @@ -48,7 +48,7 @@ class ResponseCodes(Enum): NOT_FOUND = 404 -def get_version() -> None: +def get_version() -> str: """Get the version for the current environment.""" return core_config.get_version() From 1be817cf22e7640b1dc9e087ddf543101897307e Mon Sep 17 00:00:00 2001 From: marcorudolphflex Date: Mon, 29 Sep 2025 18:37:41 +0200 Subject: [PATCH 02/22] use sim cache in batch --- playground/local_cache.py | 88 ------------------------- tests/test_web/test_simulation_cache.py | 14 +++- tidy3d/web/api/container.py | 38 +++++++---- 3 files changed, 36 insertions(+), 104 deletions(-) delete mode 100644 playground/local_cache.py diff --git a/playground/local_cache.py b/playground/local_cache.py deleted file mode 100644 index 5b8d71ce6e..0000000000 --- a/playground/local_cache.py +++ /dev/null @@ -1,88 +0,0 @@ -from __future__ import annotations - -from pathlib import Path - -from playground import make_sim_playground -from tidy3d.web.api import webapi as web -from tidy3d.web.cache import ( - SimulationCacheConfig, - configure_cache, - get_cache, -) - - -# ----------------------------------------------------------- -# Helpers -# ----------------------------------------------------------- -def reset_cache(): - # cache_dir = tmp_dir / "cache" - # if cache_dir.exists(): - # shutil.rmtree(cache_dir) - cfg = SimulationCacheConfig( - enabled=True, - # directory=cache_dir, - max_size_gb=1.0, - max_entries=10, - ) - configure_cache(cfg) - get_cache.cache_clear() # <--- important! - get_cache().clear() - return - - -def disp_cache_entries(): - cache = get_cache() - print(f"Cache has {len(cache.list())}") # ,entries:\n{pprint.pformat(cache.list())}") - - -# ----------------------------------------------------------- -# Manual test driver -# ----------------------------------------------------------- -if __name__ == "__main__": - tmp_dir = Path("debug_cache_tmp").resolve() - # disp_cache_entries() - # reset_cache() - disp_cache_entries() - - sim = make_sim_playground() - out_path = tmp_dir / "result.hdf5" - out_path2 = tmp_dir / "result2.hdf5" - - # print("\n=== First run (should upload/download, then cache) ===") - # data1 = web.run(sim, task_name="demo", path=str(out_path), use_cache=True) - # print(f"Got data type: {type(data1)}") - # disp_cache_entries() - # exit() - print("\n=== Second run (should hit cache, skip pipeline) ===") - data2 = web.run(sim, task_name="demo", path=str(out_path), use_cache=True) - print(f"Got data type: {type(data2)}") - disp_cache_entries() - # exit() - print("\n=== Load by task id (should also hit cache) ===") - # task_id = "fdve-dc37255d-77a5-4101-8964-97954fa3bde3" # your monkeypatch or real task id - task_id = "fdve-f56d0ebf-ec88-439e-bcbf-1316ab0ed43d" # your monkeypatch or real task id - data3 = web.load(task_id, path=str(out_path), use_cache=True) - print(f"Got data type: {type(data3)}") - disp_cache_entries() - - print("\n=== Manually corrupting artifact to trigger re-download ===") - entries = get_cache().list() - # if entries: - # key = entries[0]["cache_key"] - # - # # get the actual cache directory from config and expand '~' - # cache_dir = get_cache_config().directory.expanduser() - # - # artifact_path = cache_dir / key / "artifact.hdf5" - # artifact_path.parent.mkdir(parents=True, exist_ok=True) - # - # # overwrite with junk bytes (binary-safe) - # artifact_path.write_bytes(b"corrupted") - # print( - # f"[debug] Corrupted artifact at {artifact_path} (exists={artifact_path.exists()}, size={artifact_path.stat().st_size} bytes)") - # - # data4 = web.load(task_id, path=str(out_path), use_cache=True) - # print(f"After corruption -> Got data type: {type(data4)}") - # disp_cache_entries() - # - # print("\nDone. Inspect log output above to debug pipeline vs cache paths.") diff --git a/tests/test_web/test_simulation_cache.py b/tests/test_web/test_simulation_cache.py index 6a7e8e0f2b..181085d420 100644 --- a/tests/test_web/test_simulation_cache.py +++ b/tests/test_web/test_simulation_cache.py @@ -4,6 +4,8 @@ from pathlib import Path import tidy3d as td +from tests.test_plugins.test_adjoint import use_emulated_run +from tests.utils import run_emulated from tidy3d.web.api import webapi as web from tidy3d.web.cache import ( CACHE_ARTIFACT_NAME, @@ -143,12 +145,20 @@ def _test_run_cache_hit(monkeypatch, tmp_path, basic_simulation, fake_data): assert isinstance(data2, _FakeStubData) assert counters == {"upload": 0, "start": 0, "monitor": 0, "download": 0} + + @pytest.mark.serial -def test_run_cache_hit_async(monkeypatch, tmp_path, basic_simulation, fake_data): +def test_run_cache_hit_async(use_emulated_run, monkeypatch, tmp_path, basic_simulation, fake_data): counters = _patch_run_pipeline(monkeypatch, tmp_path) + out_path = tmp_path / "result.hdf5" get_cache().clear() - data = run_async({"task1": basic_simulation}) + data = web.run(basic_simulation, task_name="demo", path=str(out_path), use_cache=True) + assert isinstance(data, _FakeStubData) + assert counters == {"upload": 1, "start": 1, "monitor": 1, "download": 1} + + _reset_counters(counters) + data = run_async({"task1": basic_simulation}, use_cache=True) @pytest.mark.serial diff --git a/tidy3d/web/api/container.py b/tidy3d/web/api/container.py index ce6ae43f48..4cca6cdf61 100644 --- a/tidy3d/web/api/container.py +++ b/tidy3d/web/api/container.py @@ -8,7 +8,8 @@ from abc import ABC from collections.abc import Mapping from concurrent.futures import ThreadPoolExecutor -from typing import Literal, Optional +from pathlib import Path +from typing import Literal, Optional, Union import pydantic.v1 as pd from rich.progress import BarColumn, Progress, TaskProgressColumn, TextColumn, TimeElapsedColumn @@ -229,7 +230,6 @@ class Job(WebContainer): ) use_cache: Optional[bool] = None - cache_hit: Optional[CacheEntry] = None def to_file(self, fname: str) -> None: """Exports :class:`Tidy3dBaseModel` instance to .yaml, .json, or .hdf5 file @@ -247,6 +247,18 @@ def to_file(self, fname: str) -> None: self = self.updated_copy(task_id_cached=task_id_cached) super(Job, self).to_file(fname=fname) # noqa: UP008 + def get_cache_hit_entry(self) -> Optional[CacheEntry]: + cache_instance = _resolve_cache(self.use_cache) + if cache_instance is not None: + sim_for_cache = self.simulation + if isinstance(self.simulation, (ModeSolver, ModeSimulation)) and self.reduce_simulation: + sim_for_cache = get_reduced_simulation(self.simulation, self.reduce_simulation) + entry = cache_instance.try_fetch( + simulation=sim_for_cache, + ) + return entry + return None + def run( self, path: str = DEFAULT_DATA_PATH, @@ -274,14 +286,8 @@ def run( cache_instance = _resolve_cache(use_cache) data = None - if cache_instance is not None: - sim_for_cache = self.simulation - if isinstance(self.simulation, (ModeSolver, ModeSimulation)) and self.reduce_simulation: - sim_for_cache = get_reduced_simulation(self.simulation, self.reduce_simulation) - entry = cache_instance.try_fetch( - simulation=sim_for_cache, - path=path, - ) + entry = self.get_cache_hit_entry() + if entry is not None: data = _get_simulation_data_from_cache_entry(entry, path) if data is not None: return data @@ -399,9 +405,13 @@ def download(self, path: str = DEFAULT_DATA_PATH) -> None: ---- To load the data after download, use :meth:`Job.load`. """ - if self.use_cache and self.cache_hit: - self.cache_hit.materialize(Path(path)) - return + cache_instance = _resolve_cache(self.use_cache) + print("GELLO", self.use_cache, cache_instance) + if cache_instance is not None: + entry = self.get_cache_hit_entry() + if entry is not None: + entry.materialize(Path(path)) + return self._check_path_dir(path=path) web.download(task_id=self.task_id, path=path, verbose=self.verbose) @@ -1088,7 +1098,7 @@ def download(self, path_dir: str = DEFAULT_DATA_DIR, replace_existing: bool = Fa log.info(f"File '{job_path_str}' already exists. Overwriting.") else: log.info(f"File '{job_path_str}' already exists. Skipping.") - continue + # continue if "error" in job.status: log.warning(f"Not downloading '{task_name}' as the task errored.") continue From 594b1fbf0944d9eae271963d672449658c94f3aa Mon Sep 17 00:00:00 2001 From: marcorudolphflex Date: Tue, 30 Sep 2025 13:05:08 +0200 Subject: [PATCH 03/22] renaming --- tidy3d/web/api/container.py | 42 ++++++++++++++++++++++--------------- tidy3d/web/api/webapi.py | 16 +++++++------- 2 files changed, 33 insertions(+), 25 deletions(-) diff --git a/tidy3d/web/api/container.py b/tidy3d/web/api/container.py index 4cca6cdf61..4980c9184c 100644 --- a/tidy3d/web/api/container.py +++ b/tidy3d/web/api/container.py @@ -248,12 +248,12 @@ def to_file(self, fname: str) -> None: super(Job, self).to_file(fname=fname) # noqa: UP008 def get_cache_hit_entry(self) -> Optional[CacheEntry]: - cache_instance = _resolve_cache(self.use_cache) - if cache_instance is not None: + simulation_cache = _resolve_cache(self.use_cache) + if simulation_cache is not None: sim_for_cache = self.simulation if isinstance(self.simulation, (ModeSolver, ModeSimulation)) and self.reduce_simulation: sim_for_cache = get_reduced_simulation(self.simulation, self.reduce_simulation) - entry = cache_instance.try_fetch( + entry = simulation_cache.try_fetch( simulation=sim_for_cache, ) return entry @@ -284,7 +284,6 @@ def run( """ self._check_path_dir(path=path) - cache_instance = _resolve_cache(use_cache) data = None entry = self.get_cache_hit_entry() if entry is not None: @@ -315,13 +314,13 @@ def _upload(self) -> Optional[TaskId]: """Upload this job and return the task ID for handling.""" # upload kwargs with all fields except task_id upload_kwargs = {key: getattr(self, key) for key in self._upload_fields} - cache_instance = _resolve_cache(self.use_cache) + simulation_cache = _resolve_cache(self.use_cache) - if cache_instance is not None: + if simulation_cache is not None: sim_for_cache = self.simulation if isinstance(self.simulation, (ModeSolver, ModeSimulation)) and self.reduce_simulation: sim_for_cache = get_reduced_simulation(self.simulation, self.reduce_simulation) - entry = cache_instance.try_fetch( + entry = simulation_cache.try_fetch( simulation=sim_for_cache ) if entry: @@ -363,12 +362,14 @@ def start(self, priority: Optional[int] = None) -> None: ---- To monitor progress of the :class:`Job`, call :meth:`Job.monitor` after started. """ - web.start( - self.task_id, - solver_version=self.solver_version, - pay_type=self.pay_type, - priority=priority, - ) + entry = self.get_cache_hit_entry() + if entry is None: + web.start( + self.task_id, + solver_version=self.solver_version, + pay_type=self.pay_type, + priority=priority, + ) def get_run_info(self) -> RunInfo: """Return information about the running :class:`Job`. @@ -405,12 +406,19 @@ def download(self, path: str = DEFAULT_DATA_PATH) -> None: ---- To load the data after download, use :meth:`Job.load`. """ - cache_instance = _resolve_cache(self.use_cache) - print("GELLO", self.use_cache, cache_instance) - if cache_instance is not None: + simulation_cache = _resolve_cache(self.use_cache) + if simulation_cache is not None: entry = self.get_cache_hit_entry() if entry is not None: entry.materialize(Path(path)) + print("GMATERIALIZED") + # workflow_type = self.simulation_type + # simulation_cache.store_result( + # stub_data=data, + # task_id=self.task_id, + # path=path, + # workflow_type=workflow_type, + # ) return self._check_path_dir(path=path) web.download(task_id=self.task_id, path=path, verbose=self.verbose) @@ -1098,7 +1106,7 @@ def download(self, path_dir: str = DEFAULT_DATA_DIR, replace_existing: bool = Fa log.info(f"File '{job_path_str}' already exists. Overwriting.") else: log.info(f"File '{job_path_str}' already exists. Skipping.") - # continue + # continue # TODO remove if "error" in job.status: log.warning(f"Not downloading '{task_name}' as the task errored.") continue diff --git a/tidy3d/web/api/webapi.py b/tidy3d/web/api/webapi.py index ff79b56c27..6dca500921 100644 --- a/tidy3d/web/api/webapi.py +++ b/tidy3d/web/api/webapi.py @@ -232,13 +232,13 @@ def run( :meth:`tidy3d.web.api.container.Batch.monitor` Monitor progress of each of the running tasks. """ - cache_instance = _resolve_cache(use_cache) + simulation_cache = _resolve_cache(use_cache) data = None - if cache_instance is not None: + if simulation_cache is not None: sim_for_cache = simulation if isinstance(simulation, (ModeSolver, ModeSimulation)) and reduce_simulation: sim_for_cache = get_reduced_simulation(simulation, reduce_simulation) - entry = cache_instance.try_fetch( + entry = simulation_cache.try_fetch( simulation=sim_for_cache ) data = _get_simulation_data_from_cache_entry(entry, path) @@ -1067,10 +1067,10 @@ def load( base_dir = os.path.dirname(path) or "." path = os.path.join(base_dir, "cm_data.hdf5") - cache_instance = _resolve_cache(use_cache) + simulation_cache = _resolve_cache(use_cache) data = None - if cache_instance is not None: - entry = cache_instance.try_fetch_by_task( + if simulation_cache is not None: + entry = simulation_cache.try_fetch_by_task( task_id=task_id, verbose=verbose ) data = _get_simulation_data_from_cache_entry(entry, path) @@ -1089,10 +1089,10 @@ def load( stub_data = Tidy3dStubData.postprocess(path, lazy=lazy) - if cache_instance is not None: + if simulation_cache is not None: info = get_info(task_id, verbose=False) workflow_type = getattr(info, "taskType", None) or type(stub_data).__name__ - cache_instance.store_result( + simulation_cache.store_result( stub_data=stub_data, task_id=task_id, path=path, From 6c1a8c40d2bf929998dbf1625d59c063b4d3b999 Mon Sep 17 00:00:00 2001 From: marcorudolphflex Date: Mon, 6 Oct 2025 19:41:59 +0200 Subject: [PATCH 04/22] rebase2 --- tests/test_web/test_simulation_cache.py | 113 ++++++++---------- tidy3d/web/api/asynchronous.py | 2 +- tidy3d/web/api/autograd/autograd.py | 6 +- tidy3d/web/api/container.py | 149 ++++++++++++++++++------ tidy3d/web/api/webapi.py | 5 +- 5 files changed, 173 insertions(+), 102 deletions(-) diff --git a/tests/test_web/test_simulation_cache.py b/tests/test_web/test_simulation_cache.py index 181085d420..c34144669d 100644 --- a/tests/test_web/test_simulation_cache.py +++ b/tests/test_web/test_simulation_cache.py @@ -31,42 +31,14 @@ _load_env_overrides, _load_cli_cache_settings, ) -from tidy3d.web import run_async - +from tidy3d.web import run_async, Job MOCK_TASK_ID = "task-xyz" - - class _FakeStubData: def __init__(self, simulation: td.Simulation): self.simulation = simulation -@pytest.fixture(autouse=True) -def isolated_cache(tmp_path_factory): - """Force every test into its own unique simulation cache directory.""" - # make per-test unique dir with uuid - cache_dir = tmp_path_factory.mktemp(f"tidy3d_cache_{uuid.uuid4().hex}") - - # save original config (singleton) - original = get_cache()._config if get_cache() else None - - # point global cache at fresh unique directory - cfg = SimulationCacheConfig( - enabled=True, - directory=cache_dir, - max_size_gb=1.0, - max_entries=10, - ) - configure_cache(cfg) - get_cache().clear() - - yield cache_dir - - # restore previous config or clear - if original is not None: - configure_cache(original) - get_cache().clear() @pytest.fixture def basic_simulation(): @@ -93,7 +65,7 @@ def _fake_postprocess(path: str): return calls -def _patch_run_pipeline(monkeypatch, tmp_path): +def _patch_run_pipeline(monkeypatch): """Patch upload, start, monitor, and download to avoid network calls.""" counters = {"upload": 0, "start": 0, "monitor": 0, "download": 0} @@ -111,10 +83,15 @@ def _fake_download(*, task_id, path, **kwargs): counters["download"] += 1 Path(path).write_text(f"payload:{task_id}") + def _fake_status(self): + return "success" + monkeypatch.setattr(web, "upload", _fake_upload) monkeypatch.setattr(web, "start", _fake_start) monkeypatch.setattr(web, "monitor", _fake_monitor) monkeypatch.setattr(web, "download", _fake_download) + monkeypatch.setattr(web, "estimate_cost", lambda *args, **kwargs: 0.0) + monkeypatch.setattr(Job, "status", property(_fake_status)) monkeypatch.setattr( web, "get_info", @@ -130,9 +107,8 @@ def _reset_counters(counters: dict[str, int]) -> None: counters[key] = 0 -@pytest.mark.serial def _test_run_cache_hit(monkeypatch, tmp_path, basic_simulation, fake_data): - counters = _patch_run_pipeline(monkeypatch, tmp_path) + counters = _patch_run_pipeline(monkeypatch) out_path = tmp_path / "result.hdf5" get_cache().clear() @@ -146,26 +122,44 @@ def _test_run_cache_hit(monkeypatch, tmp_path, basic_simulation, fake_data): assert counters == {"upload": 0, "start": 0, "monitor": 0, "download": 0} - -@pytest.mark.serial -def test_run_cache_hit_async(use_emulated_run, monkeypatch, tmp_path, basic_simulation, fake_data): - counters = _patch_run_pipeline(monkeypatch, tmp_path) - out_path = tmp_path / "result.hdf5" +def _test_run_cache_hit_async(monkeypatch, basic_simulation): + counters = _patch_run_pipeline(monkeypatch) get_cache().clear() + _reset_counters(counters) + sim2 = basic_simulation.updated_copy(shutoff=1e-4) + sim3 = basic_simulation.updated_copy(shutoff=1e-3) + + data = run_async({"task1": basic_simulation, "task2": sim2}, use_cache=True) + print(counters) + assert counters["download"] == 2 + data_task1 = data["task1"] # access to store in cache + data_task2 = data["task2"] # access to store in cache + assert isinstance(data_task1, _FakeStubData) + assert isinstance(data_task2, _FakeStubData) + cache = get_cache() + print("cache size", len(cache)) - data = web.run(basic_simulation, task_name="demo", path=str(out_path), use_cache=True) - assert isinstance(data, _FakeStubData) - assert counters == {"upload": 1, "start": 1, "monitor": 1, "download": 1} + _reset_counters(counters) + data = run_async({"task1": basic_simulation, "task2": sim2}, use_cache=True) + print(counters) + assert counters["download"] == 0 + data_task1 = data["task1"] + assert isinstance(data_task1, _FakeStubData) _reset_counters(counters) - data = run_async({"task1": basic_simulation}, use_cache=True) + data = run_async({"task1": basic_simulation, "task3": sim3}, use_cache=True) + print(counters) + assert counters["download"] == 1 + + data_task1 = data["task1"] + data_task2 = data["task2"] + assert isinstance(data_task1, _FakeStubData) + assert isinstance(data_task2, _FakeStubData) -@pytest.mark.serial -@pytest.mark.xdist_group("serial") def _test_load_cache_hit(monkeypatch, tmp_path, basic_simulation, fake_data): get_cache().clear() - counters = _patch_run_pipeline(monkeypatch, tmp_path) + counters = _patch_run_pipeline(monkeypatch) out_path = tmp_path / "load.hdf5" web.run(basic_simulation, task_name="demo", path=str(out_path), use_cache=True) @@ -177,10 +171,8 @@ def _test_load_cache_hit(monkeypatch, tmp_path, basic_simulation, fake_data): assert counters["download"] == 0 # served from cache -@pytest.mark.serial -@pytest.mark.xdist_group("serial") def _test_checksum_mismatch_triggers_refresh(monkeypatch, tmp_path, basic_simulation): - counters = _patch_run_pipeline(monkeypatch, tmp_path) + counters = _patch_run_pipeline(monkeypatch) out_path = tmp_path / "checksum.hdf5" web.run(basic_simulation, task_name="demo", path=str(out_path), use_cache=True) @@ -195,8 +187,6 @@ def _test_checksum_mismatch_triggers_refresh(monkeypatch, tmp_path, basic_simula assert counters["download"] == 1 -@pytest.mark.serial -@pytest.mark.xdist_group("serial") def _test_cache_eviction_by_entries(tmp_path_factory, basic_simulation): cache = SimulationCache(SimulationCacheConfig(enabled=True, max_size_gb=10.0, max_entries=1)) @@ -215,8 +205,6 @@ def _test_cache_eviction_by_entries(tmp_path_factory, basic_simulation): assert entries[0]["simulation_hash"] == sim2._hash_self() -@pytest.mark.serial -@pytest.mark.xdist_group("serial") def _test_cache_eviction_by_size(tmp_path_factory, basic_simulation): cache = SimulationCache(SimulationCacheConfig(enabled=True, max_size_gb=1e-5, max_entries=10)) @@ -235,16 +223,17 @@ def _test_cache_eviction_by_size(tmp_path_factory, basic_simulation): assert entries[0]["simulation_hash"] == sim2._hash_self() -@pytest.mark.xdist_group("serial") + def test_cache_end_to_end(monkeypatch, tmp_path, tmp_path_factory, basic_simulation, fake_data): """Run all critical cache tests in sequence to ensure end-to-end stability.""" - _test_run_cache_hit(monkeypatch, tmp_path, basic_simulation, fake_data) - _test_load_cache_hit(monkeypatch, tmp_path, basic_simulation, fake_data) - _test_checksum_mismatch_triggers_refresh(monkeypatch, tmp_path, basic_simulation) - _test_cache_eviction_by_entries(tmp_path_factory, basic_simulation) - _test_cache_eviction_by_size(tmp_path_factory, basic_simulation) + # _test_run_cache_hit(monkeypatch, tmp_path, basic_simulation, fake_data) + # _test_load_cache_hit(monkeypatch, tmp_path, basic_simulation, fake_data) + # _test_checksum_mismatch_triggers_refresh(monkeypatch, tmp_path, basic_simulation) + # _test_cache_eviction_by_entries(tmp_path_factory, basic_simulation) + # _test_cache_eviction_by_size(tmp_path_factory, basic_simulation) + _test_run_cache_hit_async(monkeypatch, basic_simulation) + -@pytest.mark.serial def test_configure_cache_roundtrip(tmp_path): new_cfg = SimulationCacheConfig(enabled=True, directory=tmp_path, max_size_gb=1.23, max_entries=5) configure_cache(new_cfg) @@ -254,7 +243,7 @@ def test_configure_cache_roundtrip(tmp_path): assert cfg.max_size_gb == 1.23 assert cfg.max_entries == 5 -@pytest.mark.serial + def test_env_var_overrides(monkeypatch, tmp_path): monkeypatch.setenv("TIDY3D_CACHE_ENABLED", "true") monkeypatch.setenv("TIDY3D_CACHE_DIR", str(tmp_path)) @@ -269,7 +258,7 @@ def test_env_var_overrides(monkeypatch, tmp_path): "max_entries": 7, } -@pytest.mark.serial + def test_cli_config_overrides(tmp_path, monkeypatch): # Build fake toml config file cli_config_file = tmp_path / "config.toml" @@ -294,7 +283,7 @@ def test_cli_config_overrides(tmp_path, monkeypatch): assert settings["max_size_gb"] == 2.5 assert settings["max_entries"] == 99 -@pytest.mark.serial + def test_apply_updates_invalid_values(tmp_path, caplog): base = SimulationCacheConfig() updates = { @@ -310,7 +299,7 @@ def test_apply_updates_invalid_values(tmp_path, caplog): assert cfg.max_size_gb == base.max_size_gb assert cfg.max_entries == base.max_entries -@pytest.mark.serial + def test_effective_config_cli_then_env(monkeypatch, tmp_path): """CLI settings should apply first, then environment overrides take precedence.""" diff --git a/tidy3d/web/api/asynchronous.py b/tidy3d/web/api/asynchronous.py index 94b1683f99..ab8fad38ab 100644 --- a/tidy3d/web/api/asynchronous.py +++ b/tidy3d/web/api/asynchronous.py @@ -58,7 +58,7 @@ def run_async( Priority of the simulation in the Virtual GPU (vGPU) queue (1 = lowest, 10 = highest). It affects only simulations from vGPU licenses and does not impact simulations using FlexCredits. use_cache: bool = None - Whether to use local cache if identical simulation is rerun. If not provided, cache settings from config or# + Whether to use local cache if identical simulation is rerun. If not provided, cache settings from config or environment variables will be used. Returns diff --git a/tidy3d/web/api/autograd/autograd.py b/tidy3d/web/api/autograd/autograd.py index 06bbfb7521..e7a08e9d42 100644 --- a/tidy3d/web/api/autograd/autograd.py +++ b/tidy3d/web/api/autograd/autograd.py @@ -159,6 +159,9 @@ def run( Which method to pay for the simulation. priority: int = None Task priority for vGPU queue (1=lowest, 10=highest). + use_cache: bool = None + Whether to use local cache if identical simulation is rerun. If not provided, cache settings from config or + environment variables will be used. Returns ------- Union[:class:`.SimulationData`, :class:`.HeatSimulationData`, :class:`.EMESimulationData`, :class:`.ModalComponentModelerData`, :class:`.TerminalComponentModelerData`] @@ -324,7 +327,7 @@ def run_async( Specify the payment method. Whether to reduce structures in the simulation to the simulation domain only. Note: currently only implemented for the mode solver. use_cache: bool = None - Whether to use local cache if identical simulation is rerun. If not provided, cache settings from config or# + Whether to use local cache if identical simulation is rerun. If not provided, cache settings from config or environment variables will be used. Returns @@ -368,6 +371,7 @@ def run_async( max_num_adjoint_per_fwd=max_num_adjoint_per_fwd, pay_type=pay_type, priority=priority, + use_cache=use_cache, ) return run_async_webapi( diff --git a/tidy3d/web/api/container.py b/tidy3d/web/api/container.py index 4980c9184c..47daa547df 100644 --- a/tidy3d/web/api/container.py +++ b/tidy3d/web/api/container.py @@ -4,6 +4,8 @@ import concurrent import os +import random +import shutil import time from abc import ABC from collections.abc import Mapping @@ -217,6 +219,12 @@ class Job(WebContainer): description="Specify the payment method.", ) + data_cache_path: str | None = pd.Field( + None, + title="Data Cache Path", + description="File where cache is copied to.", + ) + _upload_fields = ( "simulation", "task_name", @@ -229,7 +237,13 @@ class Job(WebContainer): "reduce_simulation", ) - use_cache: Optional[bool] = None + _cache_file_moved = False + + use_cache: Optional[bool] = pd.Field( + None, + title="Use Cache", + description="Whether to use local cache for retrieving Simulation results.", + ) def to_file(self, fname: str) -> None: """Exports :class:`Tidy3dBaseModel` instance to .yaml, .json, or .hdf5 file @@ -302,9 +316,52 @@ def run( return data + @cached_property + def data_cache_path(self) -> str: + cache = _resolve_cache(self.use_cache) + path = os.path.join(cache._root, "tmp", f"{self.task_name}.hdf5") + return path + + @cached_property + def load_if_cached( + self + ) -> Optional[str]: + """Run :class:`Job` all the way through and return data. + + Parameters + ---------- + path : str = "./simulation_data.hdf5" + Path to download results file (.hdf5), including filename. + priority: int = None + Priority of the simulation in the Virtual GPU (vGPU) queue (1 = lowest, 10 = highest). + It affects only simulations from vGPU licenses and does not impact simulations using FlexCredits. + use_cache: bool = None + Override cache usage behaviour for this call. ``True`` forces cache usage when available, + ``False`` bypasses it, and ``None`` defers to configuration and environment settings. + Returns + ------- + :class:`WorkflowDataType` + Object containing simulation results. + """ + path = self.data_cache_path + self._check_path_dir(path=path) + entry = self.get_cache_hit_entry() + if entry is not None: + data = _get_simulation_data_from_cache_entry(entry, path) + if data is not None: + entry = self.get_cache_hit_entry() + if entry is not None: + entry.materialize(Path(path)) + print(f"{self.task_name} found in cache") + return path + print(f"{self.task_name} not found in cache") + return None + @cached_property def task_id(self) -> TaskId: """The task ID for this ``Job``. Uploads the ``Job`` if it hasn't already been uploaded.""" + if self.load_if_cached: + return "cached_" + self.task_name if self.task_id_cached: return self.task_id_cached self._check_folder(self.folder_name) @@ -314,23 +371,13 @@ def _upload(self) -> Optional[TaskId]: """Upload this job and return the task ID for handling.""" # upload kwargs with all fields except task_id upload_kwargs = {key: getattr(self, key) for key in self._upload_fields} - simulation_cache = _resolve_cache(self.use_cache) - - if simulation_cache is not None: - sim_for_cache = self.simulation - if isinstance(self.simulation, (ModeSolver, ModeSimulation)) and self.reduce_simulation: - sim_for_cache = get_reduced_simulation(self.simulation, self.reduce_simulation) - entry = simulation_cache.try_fetch( - simulation=sim_for_cache - ) - if entry: - return entry.metadata["task_ids"][0] - task_id = web.upload(**upload_kwargs) return task_id def upload(self) -> None: - """Upload this ``Job``.""" + """Upload this ``Job`` if not already got cached results.""" + if self.load_if_cached: + return _ = self.task_id def get_info(self) -> TaskInfo: @@ -347,6 +394,8 @@ def get_info(self) -> TaskInfo: @property def status(self): """Return current status of :class:`Job`.""" + if self.load_if_cached: + return "success" return self.get_info().status def start(self, priority: Optional[int] = None) -> None: @@ -361,9 +410,10 @@ def start(self, priority: Optional[int] = None) -> None: Note ---- To monitor progress of the :class:`Job`, call :meth:`Job.monitor` after started. + Function has no effect if cache is enabled and data was found in cache. """ - entry = self.get_cache_hit_entry() - if entry is None: + loaded = self.load_if_cached + if loaded is None: web.start( self.task_id, solver_version=self.solver_version, @@ -389,6 +439,8 @@ def monitor(self) -> None: To load the output of completed simulation into :class:`.SimulationData` objects, call :meth:`Job.load`. """ + if self.load_if_cached: + return web.monitor(self.task_id, verbose=self.verbose) def download(self, path: str = DEFAULT_DATA_PATH) -> None: @@ -406,25 +458,24 @@ def download(self, path: str = DEFAULT_DATA_PATH) -> None: ---- To load the data after download, use :meth:`Job.load`. """ - simulation_cache = _resolve_cache(self.use_cache) - if simulation_cache is not None: - entry = self.get_cache_hit_entry() - if entry is not None: - entry.materialize(Path(path)) - print("GMATERIALIZED") - # workflow_type = self.simulation_type - # simulation_cache.store_result( - # stub_data=data, - # task_id=self.task_id, - # path=path, - # workflow_type=workflow_type, - # ) - return + if self.load_if_cached: + self.move_cache_file(path=path) + return self._check_path_dir(path=path) web.download(task_id=self.task_id, path=path, verbose=self.verbose) + + def move_cache_file(self, path: str) -> None: + if self._cache_file_moved: + return + if os.path.exists(self.data_cache_path): + shutil.move(self.data_cache_path, path) + self._cache_file_moved = True + else: + raise FileNotFoundError(f"Cached file does not longer exist in {path}.") + def load( - self, path: str = DEFAULT_DATA_PATH, use_cache: Optional[bool] = None + self, path: str = DEFAULT_DATA_PATH ) -> WorkflowDataType: """Download job results and load them into a data object. @@ -432,18 +483,25 @@ def load( ---------- path : str = "./simulation_data.hdf5" Path to download data as ``.hdf5`` file (including filename). + use_cache: bool = None + Whether to use local cache if identical simulation is rerun. If not provided, cache settings from config or + environment variables will be used. Returns ------- Union[:class:`.SimulationData`, :class:`.HeatSimulationData`, :class:`.EMESimulationData`] Object containing simulation results. """ + if self.load_if_cached: + self.move_cache_file(path=path) + data = Tidy3dStubData.postprocess(path) + return data self._check_path_dir(path=path) data = web.load( task_id=self.task_id, path=path, verbose=self.verbose, - use_cache=use_cache, + use_cache=self.use_cache, ) if isinstance(self.simulation, ModeSolver): self.simulation._patch_data(data=data) @@ -486,6 +544,8 @@ def estimate_cost(self, verbose: bool = True) -> float: Cost is calculated assuming the simulation runs for the full ``run_time``. If early shut-off is triggered, the cost is adjusted proportionately. """ + if self.load_if_cached: + return 0.0 return web.estimate_cost(self.task_id, verbose=verbose, solver_version=self.solver_version) @staticmethod @@ -551,6 +611,11 @@ class BatchData(Tidy3dBaseModel, Mapping): verbose: bool = pd.Field( True, title="Verbose", description="Whether to print info messages and progressbars." ) + use_cache: Optional[bool] = pd.Field( + None, + title="Use Cache", + description="Whether to use local cache for retrieving Simulation results.", + ) def load_sim_data(self, task_name: str) -> WorkflowDataType: """Load a simulation data object from file by task name.""" @@ -558,7 +623,7 @@ def load_sim_data(self, task_name: str) -> WorkflowDataType: task_id = self.task_ids[task_name] web.get_info(task_id) - return web.load(task_id=task_id, path=task_data_path, verbose=False) + return web.load(task_id=task_id, path=task_data_path, verbose=False, use_cache=self.use_cache) def __getitem__(self, task_name: TaskName) -> WorkflowDataType: """Get the simulation data object for a given ``task_name``.""" @@ -698,7 +763,11 @@ class Batch(WebContainer): "fields that were not used to create the task will cause errors.", ) - use_cache: Optional[bool] = None + use_cache: Optional[bool] = pd.Field( + None, + title="Use Cache", + description="Whether to use local cache for retrieving Simulation results.", + ) _job_type = Job @@ -717,7 +786,7 @@ def run( Priority of the simulation in the Virtual GPU (vGPU) queue (1 = lowest, 10 = highest). It affects only simulations from vGPU licenses and does not impact simulations using FlexCredits. use_cache: bool = None - Whether to use local cache if identical simulation is rerun. If not provided, cache settings from config or# + Whether to use local cache if identical simulation is rerun. If not provided, cache settings from config or environment variables will be used. Returns ------ @@ -739,6 +808,14 @@ def run( rather it iterates over the task names and loads the corresponding data from file one by one. If no file exists for that task, it downloads it. """ + jobs = self.jobs + loaded = list() + for task_name, job in jobs.items(): + loaded.append(job.load_if_cached) + if all([l is not None for l in loaded]): # if all results were found in cache + print("ALL CACHED") + return self.load(path_dir=path_dir) + self._check_path_dir(path_dir) self.upload() self.to_file(self._batch_path(path_dir=path_dir)) @@ -1168,7 +1245,7 @@ def load(self, path_dir: str = DEFAULT_DATA_DIR, replace_existing: bool = False) task_paths[task_name] = self._job_data_path(task_id=job.task_id, path_dir=path_dir) task_ids[task_name] = self.jobs[task_name].task_id - data = BatchData(task_paths=task_paths, task_ids=task_ids, verbose=self.verbose) + data = BatchData(task_paths=task_paths, task_ids=task_ids, verbose=self.verbose, use_cache=self.use_cache) for task_name, job in self.jobs.items(): if isinstance(job.simulation, ModeSolver): diff --git a/tidy3d/web/api/webapi.py b/tidy3d/web/api/webapi.py index 6dca500921..35ae448f4a 100644 --- a/tidy3d/web/api/webapi.py +++ b/tidy3d/web/api/webapi.py @@ -186,7 +186,7 @@ def run( Priority of the simulation in the Virtual GPU (vGPU) queue (1 = lowest, 10 = highest). It affects only simulations from vGPU licenses and does not impact simulations using FlexCredits. use_cache: bool = None - Whether to use local cache if identical simulation is rerun. If not provided, cache settings from config or# + Whether to use local cache if identical simulation is rerun. If not provided, cache settings from config or environment variables will be used. Returns ------- @@ -1051,7 +1051,7 @@ def load( progress_callback : Callable[[float], None] = None Optional callback function called when downloading file with ``bytes_in_chunk`` as argument. use_cache: bool = None - Whether to use local cache if identical simulation is rerun. If not provided, cache settings from config or# + Whether to use local cache if identical simulation is rerun. If not provided, cache settings from config or environment variables will be used. lazy : bool = False Whether to load the actual data (``lazy=False``) or return a proxy that loads @@ -1098,6 +1098,7 @@ def load( path=path, workflow_type=workflow_type, ) + print("STORED", task_id) return stub_data From f6a843cb4dea72c6656e015727e229bd2f017fe5 Mon Sep 17 00:00:00 2001 From: marcorudolphflex Date: Mon, 6 Oct 2025 20:37:50 +0200 Subject: [PATCH 05/22] fix batchdata load wip --- tests/test_web/test_simulation_cache.py | 75 +++++++++++++++++++++---- tidy3d/web/api/container.py | 30 +++------- tidy3d/web/cache.py | 6 ++ 3 files changed, 79 insertions(+), 32 deletions(-) diff --git a/tests/test_web/test_simulation_cache.py b/tests/test_web/test_simulation_cache.py index c34144669d..de5c2d7ccb 100644 --- a/tests/test_web/test_simulation_cache.py +++ b/tests/test_web/test_simulation_cache.py @@ -34,6 +34,15 @@ from tidy3d.web import run_async, Job MOCK_TASK_ID = "task-xyz" +# --- Fake pipeline global maps / queue --- +TASK_TO_SIM: dict[str, td.Simulation] = {} # task_id -> Simulation +PATH_TO_SIM: dict[str, td.Simulation] = {} # artifact path -> Simulation +SIM_ORDER: list[td.Simulation] = [] # fallback queue when upload isn't called + +def _reset_fake_maps(): + TASK_TO_SIM.clear() + PATH_TO_SIM.clear() + SIM_ORDER.clear() class _FakeStubData: def __init__(self, simulation: td.Simulation): @@ -54,24 +63,61 @@ def basic_simulation(): @pytest.fixture(autouse=True) def fake_data(monkeypatch, basic_simulation): - """Patch postprocess to return predictable stub data and track invocations.""" + """Patch postprocess to return stub data bound to the correct simulation.""" calls = {"postprocess": 0} def _fake_postprocess(path: str): calls["postprocess"] += 1 - return _FakeStubData(basic_simulation) + p = Path(path) + sim = PATH_TO_SIM.get(str(p)) + if sim is None: + # Try to recover task_id from file payload written by _fake_download + try: + txt = p.read_text() + if "payload:" in txt: + task_id = txt.split("payload:", 1)[1].strip() + sim = TASK_TO_SIM.get(task_id) + except Exception: + pass + if sim is None: + # Last-resort fallback (keeps tests from crashing even if mapping failed) + sim = basic_simulation + return _FakeStubData(sim) monkeypatch.setattr(web.Tidy3dStubData, "postprocess", staticmethod(_fake_postprocess)) return calls - def _patch_run_pipeline(monkeypatch): - """Patch upload, start, monitor, and download to avoid network calls.""" + """Patch upload, start, monitor, and download to avoid network calls and map sims.""" counters = {"upload": 0, "start": 0, "monitor": 0, "download": 0} + _reset_fake_maps() # isolate between tests + + def _extract_simulation(kwargs): + """Extract the first td.Simulation object from upload kwargs.""" + if "simulation" in kwargs and isinstance(kwargs["simulation"], td.Simulation): + return kwargs["simulation"] + if "simulations" in kwargs: + sims = kwargs["simulations"] + if isinstance(sims, dict): + for sim in sims.values(): + if isinstance(sim, td.Simulation): + return sim + elif isinstance(sims, (list, tuple)): + for sim in sims: + if isinstance(sim, td.Simulation): + return sim + return None def _fake_upload(**kwargs): counters["upload"] += 1 - return MOCK_TASK_ID + task_id = f"{MOCK_TASK_ID}{counters['upload']}" + sim = _extract_simulation(kwargs) + if sim is None and SIM_ORDER: + # Upload wasn't given the sim (or async path differs) -> fallback + sim = SIM_ORDER.pop(0) + if sim is not None: + TASK_TO_SIM[task_id] = sim + return task_id def _fake_start(task_id, **kwargs): counters["start"] += 1 @@ -81,7 +127,14 @@ def _fake_monitor(task_id, verbose=True): def _fake_download(*, task_id, path, **kwargs): counters["download"] += 1 + # Ensure we have a simulation for this task id (even if upload wasn't called) + sim = TASK_TO_SIM.get(task_id) + if sim is None and SIM_ORDER: + sim = SIM_ORDER.pop(0) + TASK_TO_SIM[task_id] = sim Path(path).write_text(f"payload:{task_id}") + if sim is not None: + PATH_TO_SIM[str(Path(path))] = sim def _fake_status(self): return "success" @@ -101,7 +154,6 @@ def _fake_status(self): ) return counters - def _reset_counters(counters: dict[str, int]) -> None: for key in counters: counters[key] = 0 @@ -128,6 +180,7 @@ def _test_run_cache_hit_async(monkeypatch, basic_simulation): _reset_counters(counters) sim2 = basic_simulation.updated_copy(shutoff=1e-4) sim3 = basic_simulation.updated_copy(shutoff=1e-3) + SIM_ORDER[:] = [basic_simulation, sim2, sim3] data = run_async({"task1": basic_simulation, "task2": sim2}, use_cache=True) print(counters) @@ -147,7 +200,7 @@ def _test_run_cache_hit_async(monkeypatch, basic_simulation): assert isinstance(data_task1, _FakeStubData) _reset_counters(counters) - data = run_async({"task1": basic_simulation, "task3": sim3}, use_cache=True) + data = run_async({"task1": basic_simulation, "task2": sim3}, use_cache=True) print(counters) assert counters["download"] == 1 @@ -166,7 +219,7 @@ def _test_load_cache_hit(monkeypatch, tmp_path, basic_simulation, fake_data): assert counters["download"] == 1 _reset_counters(counters) - data = web.load(MOCK_TASK_ID, path=str(out_path), use_cache=True) + data = web.load(MOCK_TASK_ID + "1", path=str(out_path), use_cache=True) assert isinstance(data, _FakeStubData) assert counters["download"] == 0 # served from cache @@ -183,7 +236,7 @@ def _test_checksum_mismatch_triggers_refresh(monkeypatch, tmp_path, basic_simula corrupted_path.write_text("corrupted") _reset_counters(counters) - web.load(MOCK_TASK_ID, path=str(out_path), use_cache=True) + web.load(MOCK_TASK_ID + "1", path=str(out_path), use_cache=True) assert counters["download"] == 1 @@ -195,7 +248,7 @@ def _test_cache_eviction_by_entries(tmp_path_factory, basic_simulation): cache.store_result(_FakeStubData(basic_simulation), MOCK_TASK_ID, str(file1), "FDTD") assert len(cache) == 1 - sim2 = basic_simulation.updated_copy(normalize_index=0.1) + sim2 = basic_simulation.updated_copy(shutoff=1e-4) file2 = tmp_path_factory.mktemp("art2") / CACHE_ARTIFACT_NAME file2.write_text("b" * 10) cache.store_result(_FakeStubData(sim2), MOCK_TASK_ID, str(file2), "FDTD") @@ -213,7 +266,7 @@ def _test_cache_eviction_by_size(tmp_path_factory, basic_simulation): cache.store_result(_FakeStubData(basic_simulation), MOCK_TASK_ID, str(file1), "FDTD") assert len(cache) == 1 - sim2 = basic_simulation.updated_copy(normalize_index=0.2) + sim2 = basic_simulation.updated_copy(shutoff=1e-4) file2 = tmp_path_factory.mktemp("art2") / CACHE_ARTIFACT_NAME file2.write_text("b" * 12_000) cache.store_result(_FakeStubData(sim2), MOCK_TASK_ID, str(file2), "FDTD") diff --git a/tidy3d/web/api/container.py b/tidy3d/web/api/container.py index 47daa547df..3185f29d4f 100644 --- a/tidy3d/web/api/container.py +++ b/tidy3d/web/api/container.py @@ -7,6 +7,7 @@ import random import shutil import time +import uuid from abc import ABC from collections.abc import Mapping from concurrent.futures import ThreadPoolExecutor @@ -268,7 +269,7 @@ def get_cache_hit_entry(self) -> Optional[CacheEntry]: if isinstance(self.simulation, (ModeSolver, ModeSimulation)) and self.reduce_simulation: sim_for_cache = get_reduced_simulation(self.simulation, self.reduce_simulation) entry = simulation_cache.try_fetch( - simulation=sim_for_cache, + simulation=sim_for_cache, register_if_found=True ) return entry return None @@ -325,23 +326,13 @@ def data_cache_path(self) -> str: @cached_property def load_if_cached( self - ) -> Optional[str]: - """Run :class:`Job` all the way through and return data. + ) -> bool: + """Checks if data is already cached. - Parameters - ---------- - path : str = "./simulation_data.hdf5" - Path to download results file (.hdf5), including filename. - priority: int = None - Priority of the simulation in the Virtual GPU (vGPU) queue (1 = lowest, 10 = highest). - It affects only simulations from vGPU licenses and does not impact simulations using FlexCredits. - use_cache: bool = None - Override cache usage behaviour for this call. ``True`` forces cache usage when available, - ``False`` bypasses it, and ``None`` defers to configuration and environment settings. Returns ------- - :class:`WorkflowDataType` - Object containing simulation results. + bool + Whether item was found in cache. """ path = self.data_cache_path self._check_path_dir(path=path) @@ -352,16 +343,14 @@ def load_if_cached( entry = self.get_cache_hit_entry() if entry is not None: entry.materialize(Path(path)) - print(f"{self.task_name} found in cache") - return path - print(f"{self.task_name} not found in cache") - return None + return True + return False @cached_property def task_id(self) -> TaskId: """The task ID for this ``Job``. Uploads the ``Job`` if it hasn't already been uploaded.""" if self.load_if_cached: - return "cached_" + self.task_name + return "cached_" + self.task_name + "_" + str(uuid.uuid4()) if self.task_id_cached: return self.task_id_cached self._check_folder(self.folder_name) @@ -813,7 +802,6 @@ def run( for task_name, job in jobs.items(): loaded.append(job.load_if_cached) if all([l is not None for l in loaded]): # if all results were found in cache - print("ALL CACHED") return self.load(path_dir=path_dir) self._check_path_dir(path_dir) diff --git a/tidy3d/web/cache.py b/tidy3d/web/cache.py index 15cde4d814..78e674a262 100644 --- a/tidy3d/web/cache.py +++ b/tidy3d/web/cache.py @@ -484,6 +484,8 @@ def _remove_entry(self, entry: CacheEntry) -> None: def try_fetch( self, simulation: WorkflowType, + register_if_found: bool = False, + task_id: Optional[str] = None, verbose: bool = False, ) -> Optional[CacheEntry]: """ @@ -511,6 +513,10 @@ def try_fetch( entry = self._fetch(cache_key) if not entry: return None + if register_if_found: + if task_id is None: + raise ValueError("provide task_id if item should be registered in cache") + # self._store(key=cache_key, task_id=task_id, source_path=path, metadata={}) if verbose: log.info("Simulation cache hit for workflow '%s'; using local results.", workflow_type) From a5ffd213a63e17f5bfe54ef0a565bd2746c2dd6b Mon Sep 17 00:00:00 2001 From: marcorudolphflex Date: Tue, 7 Oct 2025 09:57:42 +0200 Subject: [PATCH 06/22] rebase3 --- tests/test_web/test_simulation_cache.py | 13 +++++- tidy3d/web/api/container.py | 56 ++++++++++++------------- tidy3d/web/api/webapi.py | 27 +++++------- tidy3d/web/cache.py | 9 +--- 4 files changed, 50 insertions(+), 55 deletions(-) diff --git a/tests/test_web/test_simulation_cache.py b/tests/test_web/test_simulation_cache.py index de5c2d7ccb..fbc13cbbee 100644 --- a/tests/test_web/test_simulation_cache.py +++ b/tests/test_web/test_simulation_cache.py @@ -177,6 +177,7 @@ def _test_run_cache_hit(monkeypatch, tmp_path, basic_simulation, fake_data): def _test_run_cache_hit_async(monkeypatch, basic_simulation): counters = _patch_run_pipeline(monkeypatch) get_cache().clear() + cache = get_cache() _reset_counters(counters) sim2 = basic_simulation.updated_copy(shutoff=1e-4) sim3 = basic_simulation.updated_copy(shutoff=1e-3) @@ -189,8 +190,10 @@ def _test_run_cache_hit_async(monkeypatch, basic_simulation): data_task2 = data["task2"] # access to store in cache assert isinstance(data_task1, _FakeStubData) assert isinstance(data_task2, _FakeStubData) - cache = get_cache() print("cache size", len(cache)) + assert len(cache) == 2 + + print("-------------------") _reset_counters(counters) data = run_async({"task1": basic_simulation, "task2": sim2}, use_cache=True) @@ -198,9 +201,13 @@ def _test_run_cache_hit_async(monkeypatch, basic_simulation): assert counters["download"] == 0 data_task1 = data["task1"] assert isinstance(data_task1, _FakeStubData) + print("cache size", len(cache)) + assert len(cache) == 2 + + print("-------------------") _reset_counters(counters) - data = run_async({"task1": basic_simulation, "task2": sim3}, use_cache=True) + data = run_async({"task1": basic_simulation, "task3": sim3}, use_cache=True) print(counters) assert counters["download"] == 1 @@ -208,6 +215,8 @@ def _test_run_cache_hit_async(monkeypatch, basic_simulation): data_task2 = data["task2"] assert isinstance(data_task1, _FakeStubData) assert isinstance(data_task2, _FakeStubData) + print("cache size", len(cache)) + assert len(cache) == 3 def _test_load_cache_hit(monkeypatch, tmp_path, basic_simulation, fake_data): diff --git a/tidy3d/web/api/container.py b/tidy3d/web/api/container.py index 3185f29d4f..e7c9e34029 100644 --- a/tidy3d/web/api/container.py +++ b/tidy3d/web/api/container.py @@ -268,9 +268,7 @@ def get_cache_hit_entry(self) -> Optional[CacheEntry]: sim_for_cache = self.simulation if isinstance(self.simulation, (ModeSolver, ModeSimulation)) and self.reduce_simulation: sim_for_cache = get_reduced_simulation(self.simulation, self.reduce_simulation) - entry = simulation_cache.try_fetch( - simulation=sim_for_cache, register_if_found=True - ) + entry = simulation_cache.try_fetch(simulation=sim_for_cache) return entry return None @@ -299,21 +297,15 @@ def run( """ self._check_path_dir(path=path) - data = None - entry = self.get_cache_hit_entry() - if entry is not None: - data = _get_simulation_data_from_cache_entry(entry, path) - if data is not None: - return data - - if data is None: # got no data from cache + loaded_from_cache = self.load_if_cached + if not loaded_from_cache: self.upload() if priority is None: self.start() else: self.start(priority=priority) self.monitor() - data = self.load(path=path, use_cache=use_cache) + data = self.load(path=path) return data @@ -343,7 +335,10 @@ def load_if_cached( entry = self.get_cache_hit_entry() if entry is not None: entry.materialize(Path(path)) + print(f"+ {self.task_name} found") return True + + print(f"X {self.task_name} NOT found") return False @cached_property @@ -600,6 +595,12 @@ class BatchData(Tidy3dBaseModel, Mapping): verbose: bool = pd.Field( True, title="Verbose", description="Whether to print info messages and progressbars." ) + cached_tasks: Optional[dict[TaskName, bool]] = pd.Field( + None, + title="Cached Tasks", + description="Whether the data of a task came from the cache.", + ) + use_cache: Optional[bool] = pd.Field( None, title="Use Cache", @@ -610,9 +611,10 @@ def load_sim_data(self, task_name: str) -> WorkflowDataType: """Load a simulation data object from file by task name.""" task_data_path = self.task_paths[task_name] task_id = self.task_ids[task_name] + from_cache = self.cached_tasks[task_name] if self.cached_tasks else False web.get_info(task_id) - return web.load(task_id=task_id, path=task_data_path, verbose=False, use_cache=self.use_cache) + return web.load(task_id=task_id, path=task_data_path, verbose=False, from_cache=from_cache, use_cache=self.use_cache, replace_existing=False) def __getitem__(self, task_name: TaskName) -> WorkflowDataType: """Get the simulation data object for a given ``task_name``.""" @@ -797,21 +799,16 @@ def run( rather it iterates over the task names and loads the corresponding data from file one by one. If no file exists for that task, it downloads it. """ - jobs = self.jobs - loaded = list() - for task_name, job in jobs.items(): - loaded.append(job.load_if_cached) - if all([l is not None for l in loaded]): # if all results were found in cache - return self.load(path_dir=path_dir) - - self._check_path_dir(path_dir) - self.upload() - self.to_file(self._batch_path(path_dir=path_dir)) - if priority is None: - self.start() - else: - self.start(priority=priority) - self.monitor() + loaded = [job.load_if_cached for job in self.jobs.values()] + if not all(loaded): + self._check_path_dir(path_dir) + self.upload() + self.to_file(self._batch_path(path_dir=path_dir)) + if priority is None: + self.start() + else: + self.start(priority=priority) + self.monitor() return self.load(path_dir=path_dir) @cached_property @@ -1233,7 +1230,8 @@ def load(self, path_dir: str = DEFAULT_DATA_DIR, replace_existing: bool = False) task_paths[task_name] = self._job_data_path(task_id=job.task_id, path_dir=path_dir) task_ids[task_name] = self.jobs[task_name].task_id - data = BatchData(task_paths=task_paths, task_ids=task_ids, verbose=self.verbose, use_cache=self.use_cache) + loaded = {task_name: job.load_if_cached for task_name, job in self.jobs.items()} + data = BatchData(task_paths=task_paths, task_ids=task_ids, verbose=self.verbose, cached_tasks=loaded, use_cache=self.use_cache) for task_name, job in self.jobs.items(): if isinstance(job.simulation, ModeSolver): diff --git a/tidy3d/web/api/webapi.py b/tidy3d/web/api/webapi.py index 35ae448f4a..80b44683b0 100644 --- a/tidy3d/web/api/webapi.py +++ b/tidy3d/web/api/webapi.py @@ -242,8 +242,6 @@ def run( simulation=sim_for_cache ) data = _get_simulation_data_from_cache_entry(entry, path) - if data is not None: - return data if data is None: # got no data from cache task_id = upload( @@ -1019,8 +1017,8 @@ def load( replace_existing: bool = True, verbose: bool = True, progress_callback: Optional[Callable[[float], None]] = None, - use_cache: Optional[bool] = None, - lazy: bool = False, + use_cache: bool = False, + from_cache: bool = False, ) -> WorkflowDataType: """ Download and Load simulation results into :class:`.SimulationData` object. @@ -1067,18 +1065,11 @@ def load( base_dir = os.path.dirname(path) or "." path = os.path.join(base_dir, "cm_data.hdf5") - simulation_cache = _resolve_cache(use_cache) - data = None - if simulation_cache is not None: - entry = simulation_cache.try_fetch_by_task( - task_id=task_id, verbose=verbose - ) - data = _get_simulation_data_from_cache_entry(entry, path) - if data is not None: - return data - - if not data and (not os.path.exists(path) or replace_existing): - download(task_id=task_id, path=path, verbose=verbose, progress_callback=progress_callback) + if from_cache: + if not os.path.exists(path): + raise FileNotFoundError("Cached file not found.") + elif not os.path.exists(path) or replace_existing: + download(task_id=task_id, path=path, verbose=verbose, progress_callback=progress_callback) if verbose: console = get_logging_console() @@ -1089,7 +1080,9 @@ def load( stub_data = Tidy3dStubData.postprocess(path, lazy=lazy) - if simulation_cache is not None: + + simulation_cache = _resolve_cache(use_cache) + if simulation_cache is not None and not from_cache: info = get_info(task_id, verbose=False) workflow_type = getattr(info, "taskType", None) or type(stub_data).__name__ simulation_cache.store_result( diff --git a/tidy3d/web/cache.py b/tidy3d/web/cache.py index 78e674a262..94768532c4 100644 --- a/tidy3d/web/cache.py +++ b/tidy3d/web/cache.py @@ -484,8 +484,6 @@ def _remove_entry(self, entry: CacheEntry) -> None: def try_fetch( self, simulation: WorkflowType, - register_if_found: bool = False, - task_id: Optional[str] = None, verbose: bool = False, ) -> Optional[CacheEntry]: """ @@ -513,16 +511,13 @@ def try_fetch( entry = self._fetch(cache_key) if not entry: return None - if register_if_found: - if task_id is None: - raise ValueError("provide task_id if item should be registered in cache") # self._store(key=cache_key, task_id=task_id, source_path=path, metadata={}) if verbose: log.info("Simulation cache hit for workflow '%s'; using local results.", workflow_type) return entry - except Exception: - log.error("Failed to fetch cache results.") + except Exception as e: + log.error("Failed to fetch cache results." + str(e)) def try_fetch_by_task( self, From 389d36cffdfccaf0220abfb642317fe99c1eda8b Mon Sep 17 00:00:00 2001 From: marcorudolphflex Date: Tue, 7 Oct 2025 12:23:36 +0200 Subject: [PATCH 07/22] fixed cache config --- playground/cache_conf_test.py | 17 ++ tests/test_web/test_simulation_cache.py | 181 ++++----------- tidy3d/config.py | 4 +- tidy3d/web/api/container.py | 28 +-- tidy3d/web/api/webapi.py | 52 +++-- tidy3d/web/cache.py | 293 ++++++++++-------------- 6 files changed, 231 insertions(+), 344 deletions(-) create mode 100644 playground/cache_conf_test.py diff --git a/playground/cache_conf_test.py b/playground/cache_conf_test.py new file mode 100644 index 0000000000..e0df5eccca --- /dev/null +++ b/playground/cache_conf_test.py @@ -0,0 +1,17 @@ +from tidy3d import config +from tidy3d.web.cache import resolve_simulation_cache, get_cache + +# config.simulation_cache.max_size_gb = float(10_000 * 1e-9) +# cache = resolve_simulation_cache(use_cache=True) +# print(cache.config) +tmp_path = "dfsd" + +config.simulation_cache.enabled = True +config.simulation_cache.directory = tmp_path +config.simulation_cache.max_size_gb = 1.23 +config.simulation_cache.max_entries = 5 +cfg = resolve_simulation_cache().config +assert cfg.enabled is True +assert cfg.directory == tmp_path +assert cfg.max_size_gb == 1.23 +assert cfg.max_entries == 5 \ No newline at end of file diff --git a/tests/test_web/test_simulation_cache.py b/tests/test_web/test_simulation_cache.py index fbc13cbbee..a76a90bae3 100644 --- a/tests/test_web/test_simulation_cache.py +++ b/tests/test_web/test_simulation_cache.py @@ -6,31 +6,16 @@ import tidy3d as td from tests.test_plugins.test_adjoint import use_emulated_run from tests.utils import run_emulated +from tidy3d import config from tidy3d.web.api import webapi as web from tidy3d.web.cache import ( CACHE_ARTIFACT_NAME, - SimulationCache, - SimulationCacheConfig, - get_cache, + get_cache, resolve_simulation_cache, ) -import pytest - - -import os -import toml -import tempfile from pathlib import Path import pytest -from tidy3d.web.cache import ( - SimulationCacheConfig, - configure_cache, - get_cache_config, - _apply_updates, - _load_env_overrides, - _load_cli_cache_settings, -) from tidy3d.web import run_async, Job MOCK_TASK_ID = "task-xyz" @@ -176,46 +161,37 @@ def _test_run_cache_hit(monkeypatch, tmp_path, basic_simulation, fake_data): def _test_run_cache_hit_async(monkeypatch, basic_simulation): counters = _patch_run_pipeline(monkeypatch) - get_cache().clear() - cache = get_cache() + cache = resolve_simulation_cache(use_cache=True) + cache.clear() _reset_counters(counters) sim2 = basic_simulation.updated_copy(shutoff=1e-4) sim3 = basic_simulation.updated_copy(shutoff=1e-3) SIM_ORDER[:] = [basic_simulation, sim2, sim3] data = run_async({"task1": basic_simulation, "task2": sim2}, use_cache=True) - print(counters) - assert counters["download"] == 2 data_task1 = data["task1"] # access to store in cache data_task2 = data["task2"] # access to store in cache + assert counters["download"] == 2 assert isinstance(data_task1, _FakeStubData) assert isinstance(data_task2, _FakeStubData) - print("cache size", len(cache)) assert len(cache) == 2 - print("-------------------") _reset_counters(counters) - data = run_async({"task1": basic_simulation, "task2": sim2}, use_cache=True) - print(counters) + run_async({"task1": basic_simulation, "task2": sim2}, use_cache=True) assert counters["download"] == 0 - data_task1 = data["task1"] assert isinstance(data_task1, _FakeStubData) - print("cache size", len(cache)) assert len(cache) == 2 - print("-------------------") _reset_counters(counters) data = run_async({"task1": basic_simulation, "task3": sim3}, use_cache=True) - print(counters) - assert counters["download"] == 1 data_task1 = data["task1"] - data_task2 = data["task2"] + data_task2 = data["task3"] # access to store in cache + assert counters["download"] == 1 # sim3 is new assert isinstance(data_task1, _FakeStubData) assert isinstance(data_task2, _FakeStubData) - print("cache size", len(cache)) assert len(cache) == 3 @@ -224,18 +200,22 @@ def _test_load_cache_hit(monkeypatch, tmp_path, basic_simulation, fake_data): counters = _patch_run_pipeline(monkeypatch) out_path = tmp_path / "load.hdf5" + cache = get_cache() + web.run(basic_simulation, task_name="demo", path=str(out_path), use_cache=True) assert counters["download"] == 1 + assert len(cache) == 1 _reset_counters(counters) - data = web.load(MOCK_TASK_ID + "1", path=str(out_path), use_cache=True) + data = web.load(None, path=str(out_path), from_cache=True) assert isinstance(data, _FakeStubData) assert counters["download"] == 0 # served from cache + assert len(cache) == 1 # still 1 item in cache def _test_checksum_mismatch_triggers_refresh(monkeypatch, tmp_path, basic_simulation): - counters = _patch_run_pipeline(monkeypatch) out_path = tmp_path / "checksum.hdf5" + get_cache().clear() web.run(basic_simulation, task_name="demo", path=str(out_path), use_cache=True) @@ -244,22 +224,23 @@ def _test_checksum_mismatch_triggers_refresh(monkeypatch, tmp_path, basic_simula corrupted_path = cache.root / metadata["cache_key"] / CACHE_ARTIFACT_NAME corrupted_path.write_text("corrupted") - _reset_counters(counters) - web.load(MOCK_TASK_ID + "1", path=str(out_path), use_cache=True) - assert counters["download"] == 1 + cache._fetch(metadata["cache_key"]) + assert len(cache) == 0 def _test_cache_eviction_by_entries(tmp_path_factory, basic_simulation): - cache = SimulationCache(SimulationCacheConfig(enabled=True, max_size_gb=10.0, max_entries=1)) + config.simulation_cache.max_entries = 1 + cache = resolve_simulation_cache(use_cache=True) + cache.clear() file1 = tmp_path_factory.mktemp("art1") / CACHE_ARTIFACT_NAME - file1.write_text("a" * 10) + file1.write_text("a") cache.store_result(_FakeStubData(basic_simulation), MOCK_TASK_ID, str(file1), "FDTD") assert len(cache) == 1 sim2 = basic_simulation.updated_copy(shutoff=1e-4) file2 = tmp_path_factory.mktemp("art2") / CACHE_ARTIFACT_NAME - file2.write_text("b" * 10) + file2.write_text("b") cache.store_result(_FakeStubData(sim2), MOCK_TASK_ID, str(file2), "FDTD") entries = cache.list() @@ -268,38 +249,44 @@ def _test_cache_eviction_by_entries(tmp_path_factory, basic_simulation): def _test_cache_eviction_by_size(tmp_path_factory, basic_simulation): - cache = SimulationCache(SimulationCacheConfig(enabled=True, max_size_gb=1e-5, max_entries=10)) + config.simulation_cache.max_size_gb = float(10_000 * 1e-9) + cache = resolve_simulation_cache(use_cache=True) + cache.clear() file1 = tmp_path_factory.mktemp("art1") / CACHE_ARTIFACT_NAME - file1.write_text("a" * 12_000) + file1.write_text("a" * 8_000) cache.store_result(_FakeStubData(basic_simulation), MOCK_TASK_ID, str(file1), "FDTD") assert len(cache) == 1 sim2 = basic_simulation.updated_copy(shutoff=1e-4) file2 = tmp_path_factory.mktemp("art2") / CACHE_ARTIFACT_NAME - file2.write_text("b" * 12_000) + file2.write_text("b" * 8_000) cache.store_result(_FakeStubData(sim2), MOCK_TASK_ID, str(file2), "FDTD") entries = cache.list() - assert len(entries) == 1 + print("len(entries)", len(entries)) + assert len(cache) == 1 assert entries[0]["simulation_hash"] == sim2._hash_self() def test_cache_end_to_end(monkeypatch, tmp_path, tmp_path_factory, basic_simulation, fake_data): """Run all critical cache tests in sequence to ensure end-to-end stability.""" - # _test_run_cache_hit(monkeypatch, tmp_path, basic_simulation, fake_data) - # _test_load_cache_hit(monkeypatch, tmp_path, basic_simulation, fake_data) - # _test_checksum_mismatch_triggers_refresh(monkeypatch, tmp_path, basic_simulation) - # _test_cache_eviction_by_entries(tmp_path_factory, basic_simulation) - # _test_cache_eviction_by_size(tmp_path_factory, basic_simulation) + _test_run_cache_hit(monkeypatch, tmp_path, basic_simulation, fake_data) + _test_load_cache_hit(monkeypatch, tmp_path, basic_simulation, fake_data) + _test_checksum_mismatch_triggers_refresh(monkeypatch, tmp_path, basic_simulation) + _test_cache_eviction_by_entries(tmp_path_factory, basic_simulation) + _test_cache_eviction_by_size(tmp_path_factory, basic_simulation) _test_run_cache_hit_async(monkeypatch, basic_simulation) -def test_configure_cache_roundtrip(tmp_path): - new_cfg = SimulationCacheConfig(enabled=True, directory=tmp_path, max_size_gb=1.23, max_entries=5) - configure_cache(new_cfg) - cfg = get_cache_config() +def test_configure_cache_roundtrip(monkeypatch, tmp_path): + monkeypatch.setattr(config.simulation_cache, "enabled", True) + monkeypatch.setattr(config.simulation_cache, "directory", tmp_path) + monkeypatch.setattr(config.simulation_cache, "max_size_gb", 1.23) + monkeypatch.setattr(config.simulation_cache, "max_entries", 5) + + cfg = resolve_simulation_cache().config assert cfg.enabled is True assert cfg.directory == tmp_path assert cfg.max_size_gb == 1.23 @@ -310,89 +297,13 @@ def test_env_var_overrides(monkeypatch, tmp_path): monkeypatch.setenv("TIDY3D_CACHE_ENABLED", "true") monkeypatch.setenv("TIDY3D_CACHE_DIR", str(tmp_path)) monkeypatch.setenv("TIDY3D_CACHE_MAX_SIZE_GB", "0.5") - monkeypatch.setenv("TIDY3D_CACHE_MAX_ENTRIES", "7") - overrides = _load_env_overrides() - assert overrides == { - "enabled": True, - "directory": str(tmp_path), - "max_size_gb": 0.5, - "max_entries": 7, - } - - -def test_cli_config_overrides(tmp_path, monkeypatch): - # Build fake toml config file - cli_config_file = tmp_path / "config.toml" - monkeypatch.setenv("TIDY3D_CLI_CONFIG", str(cli_config_file)) # if your code reads via constant adjust - content = { - "simulation_cache": { - "enabled": True, - "directory": str(tmp_path / "cli_dir"), - "max_size_gb": 2.5, - "max_entries": 99, - } - } - cli_config_file.write_text(toml.dumps(content)) - - # Patch constant so _load_cli_cache_settings sees our file - from tidy3d.web import cache as cache_mod - monkeypatch.setattr(cache_mod, "CLI_CONFIG_FILE", str(cli_config_file)) - - settings = _load_cli_cache_settings() - assert settings["enabled"] is True - assert Path(settings["directory"]).name == "cli_dir" - assert settings["max_size_gb"] == 2.5 - assert settings["max_entries"] == 99 - - -def test_apply_updates_invalid_values(tmp_path, caplog): - base = SimulationCacheConfig() - updates = { - "enabled": "notbool", - "directory": tmp_path, - "max_size_gb": "-5", # invalid - "max_entries": "-10", # invalid - "irrelevant": 123, - } - cfg = _apply_updates(base, updates) - # directory should be updated, invalid numbers ignored - assert cfg.directory == tmp_path - assert cfg.max_size_gb == base.max_size_gb - assert cfg.max_entries == base.max_entries - - -def test_effective_config_cli_then_env(monkeypatch, tmp_path): - """CLI settings should apply first, then environment overrides take precedence.""" - - # --- Step 1: fake CLI config --- - cli_config_file = tmp_path / "config.toml" - cli_settings = { - "simulation_cache": { - "enabled": False, # will be overridden by env - "directory": str(tmp_path / "cli_dir"), - "max_size_gb": 2.5, - "max_entries": 99, - } - } - cli_config_file.write_text(toml.dumps(cli_settings)) - from tidy3d.web import cache as cache_mod - monkeypatch.setattr(cache_mod, "CLI_CONFIG_FILE", str(cli_config_file)) - - # --- Step 2: env vars override CLI --- - env_dir = tmp_path / "env_dir" - monkeypatch.setenv("TIDY3D_CACHE_ENABLED", "true") - monkeypatch.setenv("TIDY3D_CACHE_DIR", str(env_dir)) - monkeypatch.setenv("TIDY3D_CACHE_MAX_SIZE_GB", "0.75") + config.simulation_cache.max_entries = 5 monkeypatch.setenv("TIDY3D_CACHE_MAX_ENTRIES", "7") - # --- Step 3: load effective config --- - from tidy3d.web.cache import _load_effective_config - cfg = _load_effective_config() + cfg = resolve_simulation_cache().config + assert cfg.enabled is True + assert cfg.directory == tmp_path + assert cfg.max_size_gb == 0.5 + assert cfg.max_entries == 7 - # --- Step 4: assertions --- - # Env overrides should win over CLI - assert cfg.enabled is True # env overrides False - assert cfg.directory == env_dir # env overrides cli_dir - assert cfg.max_size_gb == 0.75 # env overrides 2.5 - assert cfg.max_entries == 7 # env overrides 99 \ No newline at end of file diff --git a/tidy3d/config.py b/tidy3d/config.py index 4730a9ddff..7fa0c6c169 100644 --- a/tidy3d/config.py +++ b/tidy3d/config.py @@ -29,7 +29,7 @@ class SimulationCacheSettings(pd.BaseModel): ge=0.0, ) max_entries: int = pd.Field( - 25, + 128, description="Maximum number of cache entries. Set to 0 for no limit.", ge=0, ) @@ -74,7 +74,7 @@ class Config: ) simulation_cache: SimulationCacheSettings = pd.Field( - SimulationCacheSettings(), + default_factory=SimulationCacheSettings, title="Simulation Cache", description="Configuration for the optional local simulation cache.", ) diff --git a/tidy3d/web/api/container.py b/tidy3d/web/api/container.py index e7c9e34029..fc3108745e 100644 --- a/tidy3d/web/api/container.py +++ b/tidy3d/web/api/container.py @@ -28,7 +28,7 @@ from tidy3d.web.api.tidy3d_stub import Tidy3dStub from tidy3d.web.api.tidy3d_stub import Tidy3dStub, Tidy3dStubData from tidy3d.web.api.webapi import get_reduced_simulation, _get_simulation_data_from_cache_entry -from tidy3d.web.cache import build_cache_key, _resolve_cache, CacheEntry +from tidy3d.web.cache import build_cache_key, resolve_simulation_cache, CacheEntry, TMP_BATCH_PREFIX from tidy3d.web.core.constants import TaskId, TaskName from tidy3d.web.core.task_core import Folder from tidy3d.web.core.task_info import RunInfo, TaskInfo @@ -263,10 +263,10 @@ def to_file(self, fname: str) -> None: super(Job, self).to_file(fname=fname) # noqa: UP008 def get_cache_hit_entry(self) -> Optional[CacheEntry]: - simulation_cache = _resolve_cache(self.use_cache) + simulation_cache = resolve_simulation_cache(self.use_cache) if simulation_cache is not None: sim_for_cache = self.simulation - if isinstance(self.simulation, (ModeSolver, ModeSimulation)) and self.reduce_simulation: + if isinstance(self.simulation, (ModeSolver, ModeSimulation)): sim_for_cache = get_reduced_simulation(self.simulation, self.reduce_simulation) entry = simulation_cache.try_fetch(simulation=sim_for_cache) return entry @@ -276,7 +276,6 @@ def run( self, path: str = DEFAULT_DATA_PATH, priority: Optional[int] = None, - use_cache: Optional[bool] = None, ) -> WorkflowDataType: """Run :class:`Job` all the way through and return data. @@ -311,8 +310,8 @@ def run( @cached_property def data_cache_path(self) -> str: - cache = _resolve_cache(self.use_cache) - path = os.path.join(cache._root, "tmp", f"{self.task_name}.hdf5") + cache = resolve_simulation_cache(self.use_cache) + path = os.path.join(cache._root, TMP_BATCH_PREFIX, f"{self.task_name}.hdf5") return path @cached_property @@ -330,13 +329,10 @@ def load_if_cached( self._check_path_dir(path=path) entry = self.get_cache_hit_entry() if entry is not None: - data = _get_simulation_data_from_cache_entry(entry, path) - if data is not None: - entry = self.get_cache_hit_entry() - if entry is not None: - entry.materialize(Path(path)) - print(f"+ {self.task_name} found") - return True + loaded_from_cache = _get_simulation_data_from_cache_entry(entry, path) + if loaded_from_cache: + print(f"+ {self.task_name} found") + return True print(f"X {self.task_name} NOT found") return False @@ -351,7 +347,7 @@ def task_id(self) -> TaskId: self._check_folder(self.folder_name) return self._upload() - def _upload(self) -> Optional[TaskId]: + def _upload(self) -> TaskId: """Upload this job and return the task ID for handling.""" # upload kwargs with all fields except task_id upload_kwargs = {key: getattr(self, key) for key in self._upload_fields} @@ -776,9 +772,6 @@ def run( priority: int = None Priority of the simulation in the Virtual GPU (vGPU) queue (1 = lowest, 10 = highest). It affects only simulations from vGPU licenses and does not impact simulations using FlexCredits. - use_cache: bool = None - Whether to use local cache if identical simulation is rerun. If not provided, cache settings from config or - environment variables will be used. Returns ------ :class:`BatchData` @@ -1168,7 +1161,6 @@ def download(self, path_dir: str = DEFAULT_DATA_DIR, replace_existing: bool = Fa log.info(f"File '{job_path_str}' already exists. Overwriting.") else: log.info(f"File '{job_path_str}' already exists. Skipping.") - # continue # TODO remove if "error" in job.status: log.warning(f"Not downloading '{task_name}' as the task errored.") continue diff --git a/tidy3d/web/api/webapi.py b/tidy3d/web/api/webapi.py index 80b44683b0..fd553970b7 100644 --- a/tidy3d/web/api/webapi.py +++ b/tidy3d/web/api/webapi.py @@ -19,7 +19,7 @@ from tidy3d.exceptions import WebError from tidy3d.log import get_logging_console, log from tidy3d.plugins.smatrix.component_modelers.terminal import TerminalComponentModeler -from tidy3d.web.cache import _resolve_cache, SimulationCache, CacheEntry +from tidy3d.web.cache import resolve_simulation_cache, SimulationCache, CacheEntry from tidy3d.web.core.account import Account from tidy3d.web.core.constants import ( CM_DATA_HDF5_GZ, @@ -123,12 +123,14 @@ def _task_dict_to_url_bullet_list(data_dict: dict) -> str: # and then join them together with newline characters. return "\n".join([f"- {key}: '{value}'" for key, value in data_dict.items()]) -def _get_simulation_data_from_cache_entry(entry: CacheEntry, path: str) -> Optional[WorkflowDataType]: +def _get_simulation_data_from_cache_entry(entry: CacheEntry, path: str) -> bool: if entry is not None: - entry.materialize(Path(path)) - data = Tidy3dStubData.postprocess(path) - return data - return None + try: + entry.materialize(Path(path)) + return True + except Exception: + return False + return False @wait_for_connection def run( @@ -232,18 +234,18 @@ def run( :meth:`tidy3d.web.api.container.Batch.monitor` Monitor progress of each of the running tasks. """ - simulation_cache = _resolve_cache(use_cache) - data = None + simulation_cache = resolve_simulation_cache(use_cache) + loaded_from_cache = False if simulation_cache is not None: sim_for_cache = simulation - if isinstance(simulation, (ModeSolver, ModeSimulation)) and reduce_simulation: + if isinstance(simulation, (ModeSolver, ModeSimulation)): sim_for_cache = get_reduced_simulation(simulation, reduce_simulation) entry = simulation_cache.try_fetch( simulation=sim_for_cache ) - data = _get_simulation_data_from_cache_entry(entry, path) + loaded_from_cache = _get_simulation_data_from_cache_entry(entry, path) - if data is None: # got no data from cache + if not loaded_from_cache: task_id = upload( simulation=simulation, task_name=task_name, @@ -265,13 +267,17 @@ def run( priority=priority, ) monitor(task_id, verbose=verbose) - data = load( - task_id=task_id, - path=path, - verbose=verbose, - progress_callback=progress_callback_download, - use_cache=use_cache, - ) + else: + task_id = None + + data = load( + task_id=task_id, + path=path, + verbose=verbose, + progress_callback=progress_callback_download, + use_cache=use_cache, + from_cache=loaded_from_cache, + ) if isinstance(simulation, ModeSolver): simulation._patch_data(data=data) @@ -1012,7 +1018,7 @@ def download_log( @wait_for_connection def load( - task_id: TaskId, + task_id: Optional[TaskId], path: str = "simulation_data.hdf5", replace_existing: bool = True, verbose: bool = True, @@ -1060,8 +1066,10 @@ def load( Union[:class:`.SimulationData`, :class:`.HeatSimulationData`, :class:`.EMESimulationData`] Object containing simulation data. """ + assert from_cache or task_id, "Either task_id or from_cache must be provided." + # For component modeler batches, default to a clearer filename if the default was used. - if _is_modeler_batch(task_id) and os.path.basename(path) == "simulation_data.hdf5": + if not from_cache and _is_modeler_batch(task_id) and os.path.basename(path) == "simulation_data.hdf5": base_dir = os.path.dirname(path) or "." path = os.path.join(base_dir, "cm_data.hdf5") @@ -1073,7 +1081,7 @@ def load( if verbose: console = get_logging_console() - if _is_modeler_batch(task_id): + if not from_cache and _is_modeler_batch(task_id): # TODO inspect console.log(f"loading component modeler data from {path}") else: console.log(f"loading simulation from {path}") @@ -1081,7 +1089,7 @@ def load( stub_data = Tidy3dStubData.postprocess(path, lazy=lazy) - simulation_cache = _resolve_cache(use_cache) + simulation_cache = resolve_simulation_cache(use_cache) if simulation_cache is not None and not from_cache: info = get_info(task_id, verbose=False) workflow_type = getattr(info, "taskType", None) or type(stub_data).__name__ diff --git a/tidy3d/web/cache.py b/tidy3d/web/cache.py index 94768532c4..f8da995150 100644 --- a/tidy3d/web/cache.py +++ b/tidy3d/web/cache.py @@ -15,14 +15,11 @@ from enum import Enum from functools import lru_cache from pathlib import Path -from typing import Any, Optional, Union - -import toml - +from typing import Any, Optional +from tidy3d import config from tidy3d.components.types.workflow import WorkflowDataType, WorkflowType from tidy3d.log import log -from tidy3d.web.api.tidy3d_stub import Tidy3dStub, Tidy3dStubData -from tidy3d.web.cli.constants import CONFIG_FILE as CLI_CONFIG_FILE +from tidy3d.web.api.tidy3d_stub import Tidy3dStub from tidy3d.web.core.constants import TaskId from tidy3d.web.core.environment import Env from tidy3d.web.core.http_util import get_version as _get_protocol_version @@ -37,52 +34,21 @@ ENV_MAX_ENTRIES = "TIDY3D_CACHE_MAX_ENTRIES" TMP_PREFIX = "tidy3d-cache-" +TMP_BATCH_PREFIX = "tmp_batch" -def _environment_context() -> dict[str, Any]: - env = Env.current - return { - "name": env.name, - "web_api_endpoint": env.web_api_endpoint, - "website_endpoint": env.website_endpoint, - "s3_region": env.s3_region, - } +_CONFIG_LOCK = threading.RLock() -def _resolve_cache(use_cache: Optional[bool]): - cache_config = get_cache_config() - try: - from tidy3d import config as tidy3d_config - except Exception: - simulation_cache_settings = None - else: - simulation_cache_settings = getattr(tidy3d_config, "simulation_cache", None) - - if simulation_cache_settings is not None: - desired_config = SimulationCacheConfig( - enabled=simulation_cache_settings.enabled, - directory=simulation_cache_settings.directory, - max_size_gb=simulation_cache_settings.max_size_gb, - max_entries=simulation_cache_settings.max_entries, - ) - if desired_config != cache_config: - configure_cache(desired_config) - cache_config = desired_config - - enabled = cache_config.enabled - env_override = Env.current.enable_caching - if env_override is not None: - enabled = env_override - if use_cache is not None: - enabled = use_cache - if not enabled: - return None - try: - return get_cache() - except Exception as err: - log.debug("Simulation cache unavailable: %s", err) - return None +@dataclass(frozen=True) +class SimulationCacheConfig: + """Configuration for the simulation cache.""" + + enabled: bool = False + directory: Path = field(default_factory=lambda: Path.home() / DEFAULT_CACHE_RELATIVE_DIR) + max_size_gb: float = 8.0 + max_entries: int = 32 def _coerce_bool(value: str) -> Optional[bool]: @@ -114,22 +80,6 @@ def _coerce_int(value: str) -> Optional[int]: return None -def _load_cli_cache_settings() -> dict[str, Any]: - if not os.path.exists(CLI_CONFIG_FILE): - return {} - try: - with open(CLI_CONFIG_FILE, encoding="utf-8") as fh: - content = fh.read() - if not content.strip(): - return {} - config = toml.loads(content) - except Exception as err: - log.debug("Failed to parse CLI cache settings: %s", err) - return {} - - section = config.get("simulation_cache") - return section if isinstance(section, dict) else {} - def _load_env_overrides() -> dict[str, Any]: overrides: dict[str, Any] = {} @@ -152,58 +102,122 @@ def _load_env_overrides() -> dict[str, Any]: return overrides +def _load_effective_config() -> SimulationCacheConfig: + """ + Build the initial, global cache config at import-time. -def _apply_updates(config: SimulationCacheConfig, updates: dict[str, Any]) -> SimulationCacheConfig: - if not updates: - return config + Precedence for fields (lowest → highest): + 1) library defaults (disabled, ~/.tidy3d/cache/simulations, limits) + 2) persisted app config (config.simulation_cache_settings), if present + 3) environment overrides (TIDY3D_CACHE_*) - kwargs: dict[str, Any] = {} - for key, value in updates.items(): - if key not in {"enabled", "directory", "max_size_gb", "max_entries"}: - continue - if key == "directory" and value is not None: - try: - value = Path(value).expanduser() - except Exception: - log.debug("Ignoring invalid cache directory override: %s", value) - continue - if key == "max_size_gb" and value is not None: - try: - value = float(value) - except (TypeError, ValueError): - log.debug("Ignoring invalid cache size override: %s", value) - continue - if value < 0: - log.debug("Ignoring negative cache size override: %s", value) - continue - if key == "max_entries" and value is not None: - try: - value = int(value) - except (TypeError, ValueError): - log.debug("Ignoring invalid cache entry override: %s", value) - continue - if value < 0: - log.debug("Ignoring negative cache entry override: %s", value) - continue - kwargs[key] = value - return replace(config, **kwargs) if kwargs else config + Note: per-call `use_cache` is *not* applied here; that’s handled in + resolve_simulation_cache(...), which can reconfigure the singleton later. + """ + sim_cache_settings = config.simulation_cache + cfg = SimulationCacheConfig( + enabled=sim_cache_settings.enabled, + directory=sim_cache_settings.directory, + max_size_gb=sim_cache_settings.max_size_gb, + max_entries=sim_cache_settings.max_entries, + ) -def _load_effective_config() -> SimulationCacheConfig: - config = SimulationCacheConfig() - config = _apply_updates(config, _load_cli_cache_settings()) - config = _apply_updates(config, _load_env_overrides()) - return config + env_overrides = _load_env_overrides() + if env_overrides: + allowed = {k: v for k, v in env_overrides.items() if v is not None} + if allowed: + cfg = replace(cfg, **allowed) + + if cfg.directory: + cfg = replace(cfg, directory=Path(cfg.directory).expanduser().resolve()) + + return cfg + + +_CACHE_CONFIG: SimulationCacheConfig = _load_effective_config() + + +def get_cache_config() -> SimulationCacheConfig: + """Thread-safe snapshot copy of the active global cache configuration.""" + with _CONFIG_LOCK: + return replace(_CACHE_CONFIG) + + +def configure_cache(new_config: SimulationCacheConfig) -> None: + """Swap the active global config and reset the cache singleton.""" + global _CACHE_CONFIG + with _CONFIG_LOCK: + _CACHE_CONFIG = new_config + get_cache.cache_clear() + + +@lru_cache +def get_cache() -> SimulationCache: + """ + Return the singleton SimulationCache built from the *current* global config. + + This is automatically refreshed whenever `configure_cache(...)` is called, + because that function clears this LRU entry. + """ + cfg = get_cache_config() + return SimulationCache(cfg) + + + +def _merge_from_tidy3d_config() -> SimulationCacheConfig: + """Overlay app-level persisted settings (if any) onto the current global config snapshot.""" + simulation_cache_settings = config.simulation_cache + return SimulationCacheConfig( + enabled=simulation_cache_settings.enabled, + directory=simulation_cache_settings.directory, + max_size_gb=simulation_cache_settings.max_size_gb, + max_entries=simulation_cache_settings.max_entries, + ) + + +def _apply_overrides(cfg: SimulationCacheConfig, overrides: dict[str, Any]) -> SimulationCacheConfig: + """Apply dict-based overrides (enabled/directory/max_size_gb/max_entries).""" + if not overrides: + return cfg + # Filter to fields that exist on the dataclass and are not None + allowed = { + k: v for k, v in overrides.items() + if v is not None and hasattr(cfg, k) + } + return replace(cfg, **allowed) if allowed else cfg + + +def resolve_simulation_cache(use_cache: Optional[bool] = None) -> Optional[SimulationCache]: + """ + Return a SimulationCache configured from: + 1) persisted config (directory/limits + default enabled), + 2) environment overrides (enabled + directory/limits), + 3) per-call 'use_cache' (enabled only, highest precedence). + + If effective config differs from the active global config, reconfigure the singleton. + Returns None if final 'enabled' is False. + """ + current = get_cache_config() + desired = _load_effective_config() + + if use_cache is not None: + desired = replace(desired, enabled=use_cache) + + if desired != current: + configure_cache(desired) + + if not desired.enabled: + return None + + try: + return get_cache() + except Exception as err: + log.debug("Simulation cache unavailable: %s", err) + return None -@dataclass(frozen=True) -class SimulationCacheConfig: - """Configuration for the simulation cache.""" - enabled: bool = False - directory: Path = field(default_factory=lambda: Path.home() / DEFAULT_CACHE_RELATIVE_DIR) - max_size_gb: float = 8.0 - max_entries: int = 32 @dataclass @@ -290,8 +304,6 @@ def clear(self) -> None: pass - - def _fetch(self, key: str) -> Optional[CacheEntry]: """Retrieve an entry by key, verifying checksum.""" with self._lock: @@ -450,7 +462,7 @@ def _iter_entries(self) -> Iterable[CacheEntry]: return [] entries: list[CacheEntry] = [] for child in self._root.iterdir(): - if child.name.startswith(TMP_PREFIX): + if child.name.startswith(TMP_PREFIX) or child.name.startswith(TMP_BATCH_PREFIX): continue meta_path = child / CACHE_METADATA_NAME if not meta_path.exists(): @@ -500,11 +512,10 @@ def try_fetch( workflow_type = Tidy3dStub(simulation=simulation).get_type() versions = _get_protocol_version() - environment = _environment_context() + cache_key = build_cache_key( simulation_hash=simulation_hash, workflow_type=workflow_type, - environment=environment, version=versions, ) @@ -519,27 +530,6 @@ def try_fetch( except Exception as e: log.error("Failed to fetch cache results." + str(e)) - def try_fetch_by_task( - self, - task_id: TaskId, - verbose: bool = False, - ) -> Optional[CacheEntry]: - """ - Try to satisfy `load()` from cache BEFORE downloading. - Since we don't have the simulation hash yet, we use the task-id index. - Returns None on miss or on any cache error. - """ - - try: - entry = self.fetch_by_task(task_id) - if not entry: - return None - if verbose: - log.info("Simulation cache hit for task '%s'; using local results.", task_id) - return entry - except Exception as err: - log.debug("Simulation cache unavailable for load: %s", err) - return None def store_result( self, @@ -560,12 +550,10 @@ def store_result( return version = _get_protocol_version() - environment = _environment_context() cache_key = build_cache_key( simulation_hash=simulation_hash, workflow_type=workflow_type, - environment=environment, version=version, ) @@ -575,7 +563,6 @@ def store_result( runtime_context={ "task_id": task_id, }, - environment=environment, version=version, extras={"path": str(Path(path))}, ) @@ -656,32 +643,6 @@ def hexdigest(self) -> str: return self._hasher.hexdigest() -_CONFIG_LOCK = threading.RLock() -_CACHE_CONFIG = _load_effective_config() - - -def get_cache_config() -> SimulationCacheConfig: - """Return a copy of the active cache configuration.""" - - with _CONFIG_LOCK: - return replace(_CACHE_CONFIG) - - -@lru_cache -def get_cache() -> SimulationCache: - """Get a singleton ``SimulationCache`` instance.""" - - return SimulationCache(get_cache_config()) - - -def configure_cache(config: SimulationCacheConfig) -> None: - """Override the global cache configuration.""" - - global _CACHE_CONFIG - with _CONFIG_LOCK: - _CACHE_CONFIG = config - get_cache.cache_clear() - def clear() -> None: """Remove all cache entries.""" @@ -716,16 +677,16 @@ def build_cache_key( *, simulation_hash: str, workflow_type: str, - environment: dict[str, Any], version: str, + solver_version: str, ) -> str: """Construct a deterministic cache key.""" payload = { "simulation_hash": simulation_hash, "workflow_type": workflow_type, - "environment": _canonicalize(environment), "versions": _canonicalize(version), + "solver_version": _canonicalize(solver_version), } encoded = json.dumps(payload, sort_keys=True, separators=(",", ":")).encode("utf-8") return hashlib.sha256(encoded).hexdigest() @@ -736,7 +697,6 @@ def build_entry_metadata( simulation_hash: str, workflow_type: str, runtime_context: dict[str, Any], - environment: dict[str, Any], version: str, extras: Optional[dict[str, Any]] = None, ) -> dict[str, Any]: @@ -746,7 +706,6 @@ def build_entry_metadata( "simulation_hash": simulation_hash, "workflow_type": workflow_type, "runtime_context": _canonicalize(runtime_context), - "environment": _canonicalize(environment), "versions": _canonicalize(version), "task_ids": [], } From e762ad762663645ace934e42ae1dc925aa014768 Mon Sep 17 00:00:00 2001 From: marcorudolphflex Date: Tue, 7 Oct 2025 12:29:20 +0200 Subject: [PATCH 08/22] bugfix --- tidy3d/web/api/container.py | 12 ++++++++---- tidy3d/web/cache.py | 2 -- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/tidy3d/web/api/container.py b/tidy3d/web/api/container.py index fc3108745e..fad7674875 100644 --- a/tidy3d/web/api/container.py +++ b/tidy3d/web/api/container.py @@ -309,10 +309,12 @@ def run( return data @cached_property - def data_cache_path(self) -> str: + def data_cache_path(self) -> Optional[str]: cache = resolve_simulation_cache(self.use_cache) - path = os.path.join(cache._root, TMP_BATCH_PREFIX, f"{self.task_name}.hdf5") - return path + if cache is not None: + path = os.path.join(cache._root, TMP_BATCH_PREFIX, f"{self.task_name}.hdf5") + return path + return None @cached_property def load_if_cached( @@ -326,6 +328,8 @@ def load_if_cached( Whether item was found in cache. """ path = self.data_cache_path + if path is None: + return False self._check_path_dir(path=path) entry = self.get_cache_hit_entry() if entry is not None: @@ -393,7 +397,7 @@ def start(self, priority: Optional[int] = None) -> None: Function has no effect if cache is enabled and data was found in cache. """ loaded = self.load_if_cached - if loaded is None: + if not loaded: web.start( self.task_id, solver_version=self.solver_version, diff --git a/tidy3d/web/cache.py b/tidy3d/web/cache.py index f8da995150..d319757fed 100644 --- a/tidy3d/web/cache.py +++ b/tidy3d/web/cache.py @@ -678,7 +678,6 @@ def build_cache_key( simulation_hash: str, workflow_type: str, version: str, - solver_version: str, ) -> str: """Construct a deterministic cache key.""" @@ -686,7 +685,6 @@ def build_cache_key( "simulation_hash": simulation_hash, "workflow_type": workflow_type, "versions": _canonicalize(version), - "solver_version": _canonicalize(solver_version), } encoded = json.dumps(payload, sort_keys=True, separators=(",", ":")).encode("utf-8") return hashlib.sha256(encoded).hexdigest() From 55782b19b3c8f83b8fd12b0ff5c7aa9da5b4d336 Mon Sep 17 00:00:00 2001 From: marcorudolphflex Date: Tue, 7 Oct 2025 13:08:11 +0200 Subject: [PATCH 09/22] after rebase fixes --- playground/cache_conf_test.py | 6 ++- tests/test_web/test_simulation_cache.py | 71 ++++++++++++------------- tidy3d/web/api/asynchronous.py | 2 +- tidy3d/web/api/autograd/autograd.py | 4 +- tidy3d/web/api/autograd/engine.py | 2 +- tidy3d/web/api/container.py | 49 ++++++++--------- tidy3d/web/api/webapi.py | 33 +++++++----- tidy3d/web/cache.py | 38 ++++++------- 8 files changed, 103 insertions(+), 102 deletions(-) diff --git a/playground/cache_conf_test.py b/playground/cache_conf_test.py index e0df5eccca..83e652299e 100644 --- a/playground/cache_conf_test.py +++ b/playground/cache_conf_test.py @@ -1,5 +1,7 @@ +from __future__ import annotations + from tidy3d import config -from tidy3d.web.cache import resolve_simulation_cache, get_cache +from tidy3d.web.cache import resolve_simulation_cache # config.simulation_cache.max_size_gb = float(10_000 * 1e-9) # cache = resolve_simulation_cache(use_cache=True) @@ -14,4 +16,4 @@ assert cfg.enabled is True assert cfg.directory == tmp_path assert cfg.max_size_gb == 1.23 -assert cfg.max_entries == 5 \ No newline at end of file +assert cfg.max_entries == 5 diff --git a/tests/test_web/test_simulation_cache.py b/tests/test_web/test_simulation_cache.py index a76a90bae3..baca20ac5a 100644 --- a/tests/test_web/test_simulation_cache.py +++ b/tests/test_web/test_simulation_cache.py @@ -1,34 +1,32 @@ from __future__ import annotations -import uuid from pathlib import Path +import pytest + import tidy3d as td -from tests.test_plugins.test_adjoint import use_emulated_run -from tests.utils import run_emulated from tidy3d import config +from tidy3d.web import Job, run_async from tidy3d.web.api import webapi as web from tidy3d.web.cache import ( CACHE_ARTIFACT_NAME, - get_cache, resolve_simulation_cache, + get_cache, + resolve_simulation_cache, ) -from pathlib import Path - -import pytest - -from tidy3d.web import run_async, Job MOCK_TASK_ID = "task-xyz" # --- Fake pipeline global maps / queue --- -TASK_TO_SIM: dict[str, td.Simulation] = {} # task_id -> Simulation -PATH_TO_SIM: dict[str, td.Simulation] = {} # artifact path -> Simulation -SIM_ORDER: list[td.Simulation] = [] # fallback queue when upload isn't called +TASK_TO_SIM: dict[str, td.Simulation] = {} # task_id -> Simulation +PATH_TO_SIM: dict[str, td.Simulation] = {} # artifact path -> Simulation +SIM_ORDER: list[td.Simulation] = [] # fallback queue when upload isn't called + def _reset_fake_maps(): TASK_TO_SIM.clear() PATH_TO_SIM.clear() SIM_ORDER.clear() + class _FakeStubData: def __init__(self, simulation: td.Simulation): self.simulation = simulation @@ -51,7 +49,7 @@ def fake_data(monkeypatch, basic_simulation): """Patch postprocess to return stub data bound to the correct simulation.""" calls = {"postprocess": 0} - def _fake_postprocess(path: str): + def _fake_postprocess(path: str, lazy: bool = False): calls["postprocess"] += 1 p = Path(path) sim = PATH_TO_SIM.get(str(p)) @@ -72,6 +70,7 @@ def _fake_postprocess(path: str): monkeypatch.setattr(web.Tidy3dStubData, "postprocess", staticmethod(_fake_postprocess)) return calls + def _patch_run_pipeline(monkeypatch): """Patch upload, start, monitor, and download to avoid network calls and map sims.""" counters = {"upload": 0, "start": 0, "monitor": 0, "download": 0} @@ -139,6 +138,7 @@ def _fake_status(self): ) return counters + def _reset_counters(counters: dict[str, int]) -> None: for key in counters: counters[key] = 0 @@ -161,35 +161,36 @@ def _test_run_cache_hit(monkeypatch, tmp_path, basic_simulation, fake_data): def _test_run_cache_hit_async(monkeypatch, basic_simulation): counters = _patch_run_pipeline(monkeypatch) + monkeypatch.setattr(config.simulation_cache, "max_entries", 128) + monkeypatch.setattr(config.simulation_cache, "max_size_gb", 10) cache = resolve_simulation_cache(use_cache=True) cache.clear() + print("cfg", cache.config) _reset_counters(counters) sim2 = basic_simulation.updated_copy(shutoff=1e-4) sim3 = basic_simulation.updated_copy(shutoff=1e-3) SIM_ORDER[:] = [basic_simulation, sim2, sim3] data = run_async({"task1": basic_simulation, "task2": sim2}, use_cache=True) - data_task1 = data["task1"] # access to store in cache - data_task2 = data["task2"] # access to store in cache + data_task1 = data["task1"] # access to store in cache + data_task2 = data["task2"] # access to store in cache assert counters["download"] == 2 assert isinstance(data_task1, _FakeStubData) assert isinstance(data_task2, _FakeStubData) assert len(cache) == 2 - _reset_counters(counters) run_async({"task1": basic_simulation, "task2": sim2}, use_cache=True) assert counters["download"] == 0 assert isinstance(data_task1, _FakeStubData) assert len(cache) == 2 - _reset_counters(counters) data = run_async({"task1": basic_simulation, "task3": sim3}, use_cache=True) data_task1 = data["task1"] - data_task2 = data["task3"] # access to store in cache - assert counters["download"] == 1 # sim3 is new + data_task2 = data["task3"] # access to store in cache + assert counters["download"] == 1 # sim3 is new assert isinstance(data_task1, _FakeStubData) assert isinstance(data_task2, _FakeStubData) assert len(cache) == 3 @@ -210,7 +211,7 @@ def _test_load_cache_hit(monkeypatch, tmp_path, basic_simulation, fake_data): data = web.load(None, path=str(out_path), from_cache=True) assert isinstance(data, _FakeStubData) assert counters["download"] == 0 # served from cache - assert len(cache) == 1 # still 1 item in cache + assert len(cache) == 1 # still 1 item in cache def _test_checksum_mismatch_triggers_refresh(monkeypatch, tmp_path, basic_simulation): @@ -228,8 +229,8 @@ def _test_checksum_mismatch_triggers_refresh(monkeypatch, tmp_path, basic_simula assert len(cache) == 0 -def _test_cache_eviction_by_entries(tmp_path_factory, basic_simulation): - config.simulation_cache.max_entries = 1 +def _test_cache_eviction_by_entries(monkeypatch, tmp_path_factory, basic_simulation): + monkeypatch.setattr(config.simulation_cache, "max_entries", 1) cache = resolve_simulation_cache(use_cache=True) cache.clear() @@ -248,8 +249,8 @@ def _test_cache_eviction_by_entries(tmp_path_factory, basic_simulation): assert entries[0]["simulation_hash"] == sim2._hash_self() -def _test_cache_eviction_by_size(tmp_path_factory, basic_simulation): - config.simulation_cache.max_size_gb = float(10_000 * 1e-9) +def _test_cache_eviction_by_size(monkeypatch, tmp_path_factory, basic_simulation): + monkeypatch.setattr(config.simulation_cache, "max_size_gb", float(10_000 * 1e-9)) cache = resolve_simulation_cache(use_cache=True) cache.clear() @@ -269,17 +270,6 @@ def _test_cache_eviction_by_size(tmp_path_factory, basic_simulation): assert entries[0]["simulation_hash"] == sim2._hash_self() - -def test_cache_end_to_end(monkeypatch, tmp_path, tmp_path_factory, basic_simulation, fake_data): - """Run all critical cache tests in sequence to ensure end-to-end stability.""" - _test_run_cache_hit(monkeypatch, tmp_path, basic_simulation, fake_data) - _test_load_cache_hit(monkeypatch, tmp_path, basic_simulation, fake_data) - _test_checksum_mismatch_triggers_refresh(monkeypatch, tmp_path, basic_simulation) - _test_cache_eviction_by_entries(tmp_path_factory, basic_simulation) - _test_cache_eviction_by_size(tmp_path_factory, basic_simulation) - _test_run_cache_hit_async(monkeypatch, basic_simulation) - - def test_configure_cache_roundtrip(monkeypatch, tmp_path): monkeypatch.setattr(config.simulation_cache, "enabled", True) monkeypatch.setattr(config.simulation_cache, "directory", tmp_path) @@ -298,7 +288,7 @@ def test_env_var_overrides(monkeypatch, tmp_path): monkeypatch.setenv("TIDY3D_CACHE_DIR", str(tmp_path)) monkeypatch.setenv("TIDY3D_CACHE_MAX_SIZE_GB", "0.5") - config.simulation_cache.max_entries = 5 + monkeypatch.setattr(config.simulation_cache, "max_entries", 5) monkeypatch.setenv("TIDY3D_CACHE_MAX_ENTRIES", "7") cfg = resolve_simulation_cache().config @@ -307,3 +297,12 @@ def test_env_var_overrides(monkeypatch, tmp_path): assert cfg.max_size_gb == 0.5 assert cfg.max_entries == 7 + +def test_cache_end_to_end(monkeypatch, tmp_path, tmp_path_factory, basic_simulation, fake_data): + """Run all critical cache tests in sequence to ensure end-to-end stability.""" + _test_run_cache_hit(monkeypatch, tmp_path, basic_simulation, fake_data) + _test_load_cache_hit(monkeypatch, tmp_path, basic_simulation, fake_data) + _test_checksum_mismatch_triggers_refresh(monkeypatch, tmp_path, basic_simulation) + _test_cache_eviction_by_entries(monkeypatch, tmp_path_factory, basic_simulation) + _test_cache_eviction_by_size(monkeypatch, tmp_path_factory, basic_simulation) + _test_run_cache_hit_async(monkeypatch, basic_simulation) diff --git a/tidy3d/web/api/asynchronous.py b/tidy3d/web/api/asynchronous.py index ab8fad38ab..34c03569c1 100644 --- a/tidy3d/web/api/asynchronous.py +++ b/tidy3d/web/api/asynchronous.py @@ -57,7 +57,7 @@ def run_async( priority: int = None Priority of the simulation in the Virtual GPU (vGPU) queue (1 = lowest, 10 = highest). It affects only simulations from vGPU licenses and does not impact simulations using FlexCredits. - use_cache: bool = None + use_cache: Optional[bool] = None Whether to use local cache if identical simulation is rerun. If not provided, cache settings from config or environment variables will be used. diff --git a/tidy3d/web/api/autograd/autograd.py b/tidy3d/web/api/autograd/autograd.py index e7a08e9d42..928b953c02 100644 --- a/tidy3d/web/api/autograd/autograd.py +++ b/tidy3d/web/api/autograd/autograd.py @@ -159,7 +159,7 @@ def run( Which method to pay for the simulation. priority: int = None Task priority for vGPU queue (1=lowest, 10=highest). - use_cache: bool = None + use_cache: Optional[bool] = None Whether to use local cache if identical simulation is rerun. If not provided, cache settings from config or environment variables will be used. Returns @@ -326,7 +326,7 @@ def run_async( pay_type: typing.Union[PayType, str] = PayType.AUTO Specify the payment method. Whether to reduce structures in the simulation to the simulation domain only. Note: currently only implemented for the mode solver. - use_cache: bool = None + use_cache: Optional[bool] = None Whether to use local cache if identical simulation is rerun. If not provided, cache settings from config or environment variables will be used. diff --git a/tidy3d/web/api/autograd/engine.py b/tidy3d/web/api/autograd/engine.py index 2cd0abe451..bf7713f534 100644 --- a/tidy3d/web/api/autograd/engine.py +++ b/tidy3d/web/api/autograd/engine.py @@ -10,7 +10,7 @@ def parse_run_kwargs(**run_kwargs): """Parse the ``run_kwargs`` to extract what should be passed to the ``Job``/``Batch`` init.""" - job_fields = [*list(Job._upload_fields), "solver_version", "pay_type"] + job_fields = [*list(Job._upload_fields), "solver_version", "pay_type", "use_cache"] job_init_kwargs = {k: v for k, v in run_kwargs.items() if k in job_fields} return job_init_kwargs diff --git a/tidy3d/web/api/container.py b/tidy3d/web/api/container.py index fad7674875..6af0e3136c 100644 --- a/tidy3d/web/api/container.py +++ b/tidy3d/web/api/container.py @@ -4,14 +4,12 @@ import concurrent import os -import random import shutil import time import uuid from abc import ABC from collections.abc import Mapping from concurrent.futures import ThreadPoolExecutor -from pathlib import Path from typing import Literal, Optional, Union import pydantic.v1 as pd @@ -25,10 +23,9 @@ from tidy3d.exceptions import DataError from tidy3d.log import get_logging_console, log from tidy3d.web.api import webapi as web -from tidy3d.web.api.tidy3d_stub import Tidy3dStub from tidy3d.web.api.tidy3d_stub import Tidy3dStub, Tidy3dStubData -from tidy3d.web.api.webapi import get_reduced_simulation, _get_simulation_data_from_cache_entry -from tidy3d.web.cache import build_cache_key, resolve_simulation_cache, CacheEntry, TMP_BATCH_PREFIX +from tidy3d.web.api.webapi import _get_simulation_data_from_cache_entry, get_reduced_simulation +from tidy3d.web.cache import TMP_BATCH_PREFIX, CacheEntry, resolve_simulation_cache from tidy3d.web.core.constants import TaskId, TaskName from tidy3d.web.core.task_core import Folder from tidy3d.web.core.task_info import RunInfo, TaskInfo @@ -220,12 +217,6 @@ class Job(WebContainer): description="Specify the payment method.", ) - data_cache_path: str | None = pd.Field( - None, - title="Data Cache Path", - description="File where cache is copied to.", - ) - _upload_fields = ( "simulation", "task_name", @@ -286,7 +277,7 @@ def run( priority: int = None Priority of the simulation in the Virtual GPU (vGPU) queue (1 = lowest, 10 = highest). It affects only simulations from vGPU licenses and does not impact simulations using FlexCredits. - use_cache: bool = None + use_cache: Optional[bool] = None Override cache usage behaviour for this call. ``True`` forces cache usage when available, ``False`` bypasses it, and ``None`` defers to configuration and environment settings. Returns @@ -310,6 +301,7 @@ def run( @cached_property def data_cache_path(self) -> Optional[str]: + "Temporary path where cached results are stored." cache = resolve_simulation_cache(self.use_cache) if cache is not None: path = os.path.join(cache._root, TMP_BATCH_PREFIX, f"{self.task_name}.hdf5") @@ -317,9 +309,7 @@ def data_cache_path(self) -> Optional[str]: return None @cached_property - def load_if_cached( - self - ) -> bool: + def load_if_cached(self) -> bool: """Checks if data is already cached. Returns @@ -335,10 +325,7 @@ def load_if_cached( if entry is not None: loaded_from_cache = _get_simulation_data_from_cache_entry(entry, path) if loaded_from_cache: - print(f"+ {self.task_name} found") return True - - print(f"X {self.task_name} NOT found") return False @cached_property @@ -434,7 +421,7 @@ def download(self, path: str = DEFAULT_DATA_PATH) -> None: ---------- path : str = "./simulation_data.hdf5" Path to download data as ``.hdf5`` file (including filename). - use_cache: bool = None + use_cache: Optional[bool] = None Override cache usage behaviour for this call. ``True`` forces cache usage when available, ``False`` bypasses it, and ``None`` defers to configuration and environment settings. @@ -448,7 +435,6 @@ def download(self, path: str = DEFAULT_DATA_PATH) -> None: self._check_path_dir(path=path) web.download(task_id=self.task_id, path=path, verbose=self.verbose) - def move_cache_file(self, path: str) -> None: if self._cache_file_moved: return @@ -458,16 +444,14 @@ def move_cache_file(self, path: str) -> None: else: raise FileNotFoundError(f"Cached file does not longer exist in {path}.") - def load( - self, path: str = DEFAULT_DATA_PATH - ) -> WorkflowDataType: + def load(self, path: str = DEFAULT_DATA_PATH) -> WorkflowDataType: """Download job results and load them into a data object. Parameters ---------- path : str = "./simulation_data.hdf5" Path to download data as ``.hdf5`` file (including filename). - use_cache: bool = None + use_cache: Optional[bool] = None Whether to use local cache if identical simulation is rerun. If not provided, cache settings from config or environment variables will be used. @@ -614,7 +598,14 @@ def load_sim_data(self, task_name: str) -> WorkflowDataType: from_cache = self.cached_tasks[task_name] if self.cached_tasks else False web.get_info(task_id) - return web.load(task_id=task_id, path=task_data_path, verbose=False, from_cache=from_cache, use_cache=self.use_cache, replace_existing=False) + return web.load( + task_id=task_id, + path=task_data_path, + verbose=False, + from_cache=from_cache, + use_cache=self.use_cache, + replace_existing=False, + ) def __getitem__(self, task_name: TaskName) -> WorkflowDataType: """Get the simulation data object for a given ``task_name``.""" @@ -1227,7 +1218,13 @@ def load(self, path_dir: str = DEFAULT_DATA_DIR, replace_existing: bool = False) task_ids[task_name] = self.jobs[task_name].task_id loaded = {task_name: job.load_if_cached for task_name, job in self.jobs.items()} - data = BatchData(task_paths=task_paths, task_ids=task_ids, verbose=self.verbose, cached_tasks=loaded, use_cache=self.use_cache) + data = BatchData( + task_paths=task_paths, + task_ids=task_ids, + verbose=self.verbose, + cached_tasks=loaded, + use_cache=self.use_cache, + ) for task_name, job in self.jobs.items(): if isinstance(job.simulation, ModeSolver): diff --git a/tidy3d/web/api/webapi.py b/tidy3d/web/api/webapi.py index fd553970b7..1fe1e56812 100644 --- a/tidy3d/web/api/webapi.py +++ b/tidy3d/web/api/webapi.py @@ -19,7 +19,7 @@ from tidy3d.exceptions import WebError from tidy3d.log import get_logging_console, log from tidy3d.plugins.smatrix.component_modelers.terminal import TerminalComponentModeler -from tidy3d.web.cache import resolve_simulation_cache, SimulationCache, CacheEntry +from tidy3d.web.cache import CacheEntry, resolve_simulation_cache from tidy3d.web.core.account import Account from tidy3d.web.core.constants import ( CM_DATA_HDF5_GZ, @@ -123,6 +123,7 @@ def _task_dict_to_url_bullet_list(data_dict: dict) -> str: # and then join them together with newline characters. return "\n".join([f"- {key}: '{value}'" for key, value in data_dict.items()]) + def _get_simulation_data_from_cache_entry(entry: CacheEntry, path: str) -> bool: if entry is not None: try: @@ -132,6 +133,7 @@ def _get_simulation_data_from_cache_entry(entry: CacheEntry, path: str) -> bool: return False return False + @wait_for_connection def run( simulation: WorkflowType, @@ -150,6 +152,7 @@ def run( pay_type: Union[PayType, str] = PayType.AUTO, priority: Optional[int] = None, use_cache: Optional[bool] = None, + lazy: bool = False, ) -> WorkflowDataType: """ Submits a :class:`.Simulation` to server, starts running, monitors progress, downloads, @@ -187,9 +190,11 @@ def run( priority: int = None Priority of the simulation in the Virtual GPU (vGPU) queue (1 = lowest, 10 = highest). It affects only simulations from vGPU licenses and does not impact simulations using FlexCredits. - use_cache: bool = None + use_cache: Optional[bool] = None Whether to use local cache if identical simulation is rerun. If not provided, cache settings from config or environment variables will be used. + lazy: bool = False + Whether to load the simulation data lazily (not until data access). Returns ------- Union[:class:`.SimulationData`, :class:`.HeatSimulationData`, :class:`.EMESimulationData`] @@ -240,9 +245,7 @@ def run( sim_for_cache = simulation if isinstance(simulation, (ModeSolver, ModeSimulation)): sim_for_cache = get_reduced_simulation(simulation, reduce_simulation) - entry = simulation_cache.try_fetch( - simulation=sim_for_cache - ) + entry = simulation_cache.try_fetch(simulation=sim_for_cache) loaded_from_cache = _get_simulation_data_from_cache_entry(entry, path) if not loaded_from_cache: @@ -277,6 +280,7 @@ def run( progress_callback=progress_callback_download, use_cache=use_cache, from_cache=loaded_from_cache, + lazy=lazy, ) if isinstance(simulation, ModeSolver): @@ -1023,8 +1027,9 @@ def load( replace_existing: bool = True, verbose: bool = True, progress_callback: Optional[Callable[[float], None]] = None, - use_cache: bool = False, + use_cache: Optional[bool] = False, from_cache: bool = False, + lazy: bool = False, ) -> WorkflowDataType: """ Download and Load simulation results into :class:`.SimulationData` object. @@ -1054,9 +1059,11 @@ def load( If ``True``, will print progressbars and status, otherwise, will run silently. progress_callback : Callable[[float], None] = None Optional callback function called when downloading file with ``bytes_in_chunk`` as argument. - use_cache: bool = None + use_cache: Optional[bool] = None Whether to use local cache if identical simulation is rerun. If not provided, cache settings from config or environment variables will be used. + from_cache: bool = None + Whether data will be loaded from cache. lazy : bool = False Whether to load the actual data (``lazy=False``) or return a proxy that loads the data when accessed (``lazy=True``). @@ -1069,7 +1076,11 @@ def load( assert from_cache or task_id, "Either task_id or from_cache must be provided." # For component modeler batches, default to a clearer filename if the default was used. - if not from_cache and _is_modeler_batch(task_id) and os.path.basename(path) == "simulation_data.hdf5": + if ( + not from_cache + and _is_modeler_batch(task_id) + and os.path.basename(path) == "simulation_data.hdf5" + ): base_dir = os.path.dirname(path) or "." path = os.path.join(base_dir, "cm_data.hdf5") @@ -1077,18 +1088,17 @@ def load( if not os.path.exists(path): raise FileNotFoundError("Cached file not found.") elif not os.path.exists(path) or replace_existing: - download(task_id=task_id, path=path, verbose=verbose, progress_callback=progress_callback) + download(task_id=task_id, path=path, verbose=verbose, progress_callback=progress_callback) if verbose: console = get_logging_console() - if not from_cache and _is_modeler_batch(task_id): # TODO inspect + if not from_cache and _is_modeler_batch(task_id): # TODO inspect console.log(f"loading component modeler data from {path}") else: console.log(f"loading simulation from {path}") stub_data = Tidy3dStubData.postprocess(path, lazy=lazy) - simulation_cache = resolve_simulation_cache(use_cache) if simulation_cache is not None and not from_cache: info = get_info(task_id, verbose=False) @@ -1099,7 +1109,6 @@ def load( path=path, workflow_type=workflow_type, ) - print("STORED", task_id) return stub_data diff --git a/tidy3d/web/cache.py b/tidy3d/web/cache.py index d319757fed..9eff7db204 100644 --- a/tidy3d/web/cache.py +++ b/tidy3d/web/cache.py @@ -8,7 +8,6 @@ import shutil import tempfile import threading -import traceback from collections.abc import Iterable from dataclasses import dataclass, field, replace from datetime import datetime, timezone @@ -16,12 +15,12 @@ from functools import lru_cache from pathlib import Path from typing import Any, Optional + from tidy3d import config from tidy3d.components.types.workflow import WorkflowDataType, WorkflowType from tidy3d.log import log from tidy3d.web.api.tidy3d_stub import Tidy3dStub from tidy3d.web.core.constants import TaskId -from tidy3d.web.core.environment import Env from tidy3d.web.core.http_util import get_version as _get_protocol_version DEFAULT_CACHE_RELATIVE_DIR = Path(".tidy3d") / "cache" / "simulations" @@ -40,7 +39,6 @@ _CONFIG_LOCK = threading.RLock() - @dataclass(frozen=True) class SimulationCacheConfig: """Configuration for the simulation cache.""" @@ -80,7 +78,6 @@ def _coerce_int(value: str) -> Optional[int]: return None - def _load_env_overrides() -> dict[str, Any]: overrides: dict[str, Any] = {} @@ -102,6 +99,7 @@ def _load_env_overrides() -> dict[str, Any]: return overrides + def _load_effective_config() -> SimulationCacheConfig: """ Build the initial, global cache config at import-time. @@ -164,7 +162,6 @@ def get_cache() -> SimulationCache: return SimulationCache(cfg) - def _merge_from_tidy3d_config() -> SimulationCacheConfig: """Overlay app-level persisted settings (if any) onto the current global config snapshot.""" simulation_cache_settings = config.simulation_cache @@ -176,15 +173,14 @@ def _merge_from_tidy3d_config() -> SimulationCacheConfig: ) -def _apply_overrides(cfg: SimulationCacheConfig, overrides: dict[str, Any]) -> SimulationCacheConfig: +def _apply_overrides( + cfg: SimulationCacheConfig, overrides: dict[str, Any] +) -> SimulationCacheConfig: """Apply dict-based overrides (enabled/directory/max_size_gb/max_entries).""" if not overrides: return cfg # Filter to fields that exist on the dataclass and are not None - allowed = { - k: v for k, v in overrides.items() - if v is not None and hasattr(cfg, k) - } + allowed = {k: v for k, v in overrides.items() if v is not None and hasattr(cfg, k)} return replace(cfg, **allowed) if allowed else cfg @@ -202,6 +198,8 @@ def resolve_simulation_cache(use_cache: Optional[bool] = None) -> Optional[Simul desired = _load_effective_config() if use_cache is not None: + if desired.directory != current.directory: + get_cache().clear(hard=True) desired = replace(desired, enabled=use_cache) if desired != current: @@ -217,9 +215,6 @@ def resolve_simulation_cache(use_cache: Optional[bool] = None) -> Optional[Simul return None - - - @dataclass class CacheEntry: """Internal representation of a cache entry.""" @@ -270,6 +265,7 @@ def materialize(self, target: Path) -> Path: shutil.copy2(self.artifact_path, target) return target + class SimulationCache: """Manages storing and retrieving cached simulation artifacts.""" @@ -293,17 +289,17 @@ def list(self) -> list[dict[str, Any]]: with self._lock: return [entry.metadata for entry in self._iter_entries()] - def clear(self) -> None: + def clear(self, hard=False) -> None: """Remove all cache contents.""" with self._lock: if self._root.exists(): try: shutil.rmtree(self._root) - self._root.mkdir(parents=True, exist_ok=True) + if not hard: + self._root.mkdir(parents=True, exist_ok=True) except (FileNotFoundError, OSError): pass - def _fetch(self, key: str) -> Optional[CacheEntry]: """Retrieve an entry by key, verifying checksum.""" with self._lock: @@ -524,13 +520,14 @@ def try_fetch( return None # self._store(key=cache_key, task_id=task_id, source_path=path, metadata={}) if verbose: - log.info("Simulation cache hit for workflow '%s'; using local results.", workflow_type) + log.info( + "Simulation cache hit for workflow '%s'; using local results.", workflow_type + ) return entry except Exception as e: log.error("Failed to fetch cache results." + str(e)) - def store_result( self, stub_data: WorkflowDataType, @@ -573,9 +570,8 @@ def store_result( source_path=Path(path), metadata=metadata, ) - except Exception as e: + except Exception: log.error("Could not store cache entry.") - print("ERROR", e, traceback.format_exc()) def _copy_and_hash( @@ -643,10 +639,8 @@ def hexdigest(self) -> str: return self._hasher.hexdigest() - def clear() -> None: """Remove all cache entries.""" - get_cache().clear() From 589212394179bec666173194c1f5b865b32c976a Mon Sep 17 00:00:00 2001 From: marcorudolphflex Date: Tue, 7 Oct 2025 13:31:02 +0200 Subject: [PATCH 10/22] FXC-3294-add-opt-in-local-cache-for-simulation-results-hashed-by-simulation-runtime-context --- tests/test_web/test_simulation_cache.py | 308 +++++++++++ tidy3d/config.py | 34 ++ tidy3d/web/api/asynchronous.py | 6 + tidy3d/web/api/autograd/autograd.py | 13 + tidy3d/web/api/autograd/engine.py | 2 +- tidy3d/web/api/container.py | 199 ++++++- tidy3d/web/api/webapi.py | 119 +++- tidy3d/web/cache.py | 706 ++++++++++++++++++++++++ 8 files changed, 1332 insertions(+), 55 deletions(-) create mode 100644 tests/test_web/test_simulation_cache.py create mode 100644 tidy3d/web/cache.py diff --git a/tests/test_web/test_simulation_cache.py b/tests/test_web/test_simulation_cache.py new file mode 100644 index 0000000000..3742aa1eb2 --- /dev/null +++ b/tests/test_web/test_simulation_cache.py @@ -0,0 +1,308 @@ +from __future__ import annotations + +from pathlib import Path + +import pytest + +import tidy3d as td +from tidy3d import config +from tidy3d.web import Job, run_async +from tidy3d.web.api import webapi as web +from tidy3d.web.cache import ( + CACHE_ARTIFACT_NAME, + get_cache, + resolve_simulation_cache, +) + +MOCK_TASK_ID = "task-xyz" +# --- Fake pipeline global maps / queue --- +TASK_TO_SIM: dict[str, td.Simulation] = {} # task_id -> Simulation +PATH_TO_SIM: dict[str, td.Simulation] = {} # artifact path -> Simulation +SIM_ORDER: list[td.Simulation] = [] # fallback queue when upload isn't called + + +def _reset_fake_maps(): + TASK_TO_SIM.clear() + PATH_TO_SIM.clear() + SIM_ORDER.clear() + + +class _FakeStubData: + def __init__(self, simulation: td.Simulation): + self.simulation = simulation + + +@pytest.fixture +def basic_simulation(): + pulse = td.GaussianPulse(freq0=200e12, fwidth=20e12) + pt_dipole = td.PointDipole(source_time=pulse, polarization="Ex") + return td.Simulation( + size=(1, 1, 1), + grid_spec=td.GridSpec.auto(wavelength=1.0), + run_time=1e-12, + sources=[pt_dipole], + ) + + +@pytest.fixture(autouse=True) +def fake_data(monkeypatch, basic_simulation): + """Patch postprocess to return stub data bound to the correct simulation.""" + calls = {"postprocess": 0} + + def _fake_postprocess(path: str, lazy: bool = False): + calls["postprocess"] += 1 + p = Path(path) + sim = PATH_TO_SIM.get(str(p)) + if sim is None: + # Try to recover task_id from file payload written by _fake_download + try: + txt = p.read_text() + if "payload:" in txt: + task_id = txt.split("payload:", 1)[1].strip() + sim = TASK_TO_SIM.get(task_id) + except Exception: + pass + if sim is None: + # Last-resort fallback (keeps tests from crashing even if mapping failed) + sim = basic_simulation + return _FakeStubData(sim) + + monkeypatch.setattr(web.Tidy3dStubData, "postprocess", staticmethod(_fake_postprocess)) + return calls + + +def _patch_run_pipeline(monkeypatch): + """Patch upload, start, monitor, and download to avoid network calls and map sims.""" + counters = {"upload": 0, "start": 0, "monitor": 0, "download": 0} + _reset_fake_maps() # isolate between tests + + def _extract_simulation(kwargs): + """Extract the first td.Simulation object from upload kwargs.""" + if "simulation" in kwargs and isinstance(kwargs["simulation"], td.Simulation): + return kwargs["simulation"] + if "simulations" in kwargs: + sims = kwargs["simulations"] + if isinstance(sims, dict): + for sim in sims.values(): + if isinstance(sim, td.Simulation): + return sim + elif isinstance(sims, (list, tuple)): + for sim in sims: + if isinstance(sim, td.Simulation): + return sim + return None + + def _fake_upload(**kwargs): + counters["upload"] += 1 + task_id = f"{MOCK_TASK_ID}{counters['upload']}" + sim = _extract_simulation(kwargs) + if sim is None and SIM_ORDER: + # Upload wasn't given the sim (or async path differs) -> fallback + sim = SIM_ORDER.pop(0) + if sim is not None: + TASK_TO_SIM[task_id] = sim + return task_id + + def _fake_start(task_id, **kwargs): + counters["start"] += 1 + + def _fake_monitor(task_id, verbose=True): + counters["monitor"] += 1 + + def _fake_download(*, task_id, path, **kwargs): + counters["download"] += 1 + # Ensure we have a simulation for this task id (even if upload wasn't called) + sim = TASK_TO_SIM.get(task_id) + if sim is None and SIM_ORDER: + sim = SIM_ORDER.pop(0) + TASK_TO_SIM[task_id] = sim + Path(path).write_text(f"payload:{task_id}") + if sim is not None: + PATH_TO_SIM[str(Path(path))] = sim + + def _fake_status(self): + return "success" + + monkeypatch.setattr(web, "upload", _fake_upload) + monkeypatch.setattr(web, "start", _fake_start) + monkeypatch.setattr(web, "monitor", _fake_monitor) + monkeypatch.setattr(web, "download", _fake_download) + monkeypatch.setattr(web, "estimate_cost", lambda *args, **kwargs: 0.0) + monkeypatch.setattr(Job, "status", property(_fake_status)) + monkeypatch.setattr( + web, + "get_info", + lambda task_id, verbose=True: type( + "_Info", (), {"solverVersion": "solver-1", "taskType": "FDTD"} + )(), + ) + return counters + + +def _reset_counters(counters: dict[str, int]) -> None: + for key in counters: + counters[key] = 0 + + +def _test_run_cache_hit(monkeypatch, tmp_path, basic_simulation, fake_data): + counters = _patch_run_pipeline(monkeypatch) + out_path = tmp_path / "result.hdf5" + get_cache().clear() + + data = web.run(basic_simulation, task_name="demo", path=str(out_path), use_cache=True) + assert isinstance(data, _FakeStubData) + assert counters == {"upload": 1, "start": 1, "monitor": 1, "download": 1} + + _reset_counters(counters) + data2 = web.run(basic_simulation, task_name="demo", path=str(out_path), use_cache=True) + assert isinstance(data2, _FakeStubData) + assert counters == {"upload": 0, "start": 0, "monitor": 0, "download": 0} + + +def _test_run_cache_hit_async(monkeypatch, basic_simulation): + counters = _patch_run_pipeline(monkeypatch) + monkeypatch.setattr(config.simulation_cache, "max_entries", 128) + monkeypatch.setattr(config.simulation_cache, "max_size_gb", 10) + cache = resolve_simulation_cache(use_cache=True) + cache.clear() + + _reset_counters(counters) + sim2 = basic_simulation.updated_copy(shutoff=1e-4) + sim3 = basic_simulation.updated_copy(shutoff=1e-3) + SIM_ORDER[:] = [basic_simulation, sim2, sim3] + + data = run_async({"task1": basic_simulation, "task2": sim2}, use_cache=True) + data_task1 = data["task1"] # access to store in cache + data_task2 = data["task2"] # access to store in cache + assert counters["download"] == 2 + assert isinstance(data_task1, _FakeStubData) + assert isinstance(data_task2, _FakeStubData) + assert len(cache) == 2 + + _reset_counters(counters) + run_async({"task1": basic_simulation, "task2": sim2}, use_cache=True) + assert counters["download"] == 0 + assert isinstance(data_task1, _FakeStubData) + assert len(cache) == 2 + + _reset_counters(counters) + data = run_async({"task1": basic_simulation, "task3": sim3}, use_cache=True) + + data_task1 = data["task1"] + data_task2 = data["task3"] # access to store in cache + assert counters["download"] == 1 # sim3 is new + assert isinstance(data_task1, _FakeStubData) + assert isinstance(data_task2, _FakeStubData) + assert len(cache) == 3 + + +def _test_load_cache_hit(monkeypatch, tmp_path, basic_simulation, fake_data): + get_cache().clear() + counters = _patch_run_pipeline(monkeypatch) + out_path = tmp_path / "load.hdf5" + + cache = get_cache() + + web.run(basic_simulation, task_name="demo", path=str(out_path), use_cache=True) + assert counters["download"] == 1 + assert len(cache) == 1 + + _reset_counters(counters) + data = web.load(None, path=str(out_path), from_cache=True) + assert isinstance(data, _FakeStubData) + assert counters["download"] == 0 # served from cache + assert len(cache) == 1 # still 1 item in cache + + +def _test_checksum_mismatch_triggers_refresh(monkeypatch, tmp_path, basic_simulation): + out_path = tmp_path / "checksum.hdf5" + get_cache().clear() + + web.run(basic_simulation, task_name="demo", path=str(out_path), use_cache=True) + + cache = get_cache() + metadata = cache.list()[0] + corrupted_path = cache.root / metadata["cache_key"] / CACHE_ARTIFACT_NAME + corrupted_path.write_text("corrupted") + + cache._fetch(metadata["cache_key"]) + assert len(cache) == 0 + + +def _test_cache_eviction_by_entries(monkeypatch, tmp_path_factory, basic_simulation): + monkeypatch.setattr(config.simulation_cache, "max_entries", 1) + cache = resolve_simulation_cache(use_cache=True) + cache.clear() + + file1 = tmp_path_factory.mktemp("art1") / CACHE_ARTIFACT_NAME + file1.write_text("a") + cache.store_result(_FakeStubData(basic_simulation), MOCK_TASK_ID, str(file1), "FDTD") + assert len(cache) == 1 + + sim2 = basic_simulation.updated_copy(shutoff=1e-4) + file2 = tmp_path_factory.mktemp("art2") / CACHE_ARTIFACT_NAME + file2.write_text("b") + cache.store_result(_FakeStubData(sim2), MOCK_TASK_ID, str(file2), "FDTD") + + entries = cache.list() + assert len(entries) == 1 + assert entries[0]["simulation_hash"] == sim2._hash_self() + + +def _test_cache_eviction_by_size(monkeypatch, tmp_path_factory, basic_simulation): + monkeypatch.setattr(config.simulation_cache, "max_size_gb", float(10_000 * 1e-9)) + cache = resolve_simulation_cache(use_cache=True) + cache.clear() + + file1 = tmp_path_factory.mktemp("art1") / CACHE_ARTIFACT_NAME + file1.write_text("a" * 8_000) + cache.store_result(_FakeStubData(basic_simulation), MOCK_TASK_ID, str(file1), "FDTD") + assert len(cache) == 1 + + sim2 = basic_simulation.updated_copy(shutoff=1e-4) + file2 = tmp_path_factory.mktemp("art2") / CACHE_ARTIFACT_NAME + file2.write_text("b" * 8_000) + cache.store_result(_FakeStubData(sim2), MOCK_TASK_ID, str(file2), "FDTD") + + entries = cache.list() + print("len(entries)", len(entries)) + assert len(cache) == 1 + assert entries[0]["simulation_hash"] == sim2._hash_self() + + +def test_configure_cache_roundtrip(monkeypatch, tmp_path): + monkeypatch.setattr(config.simulation_cache, "enabled", True) + monkeypatch.setattr(config.simulation_cache, "directory", tmp_path) + monkeypatch.setattr(config.simulation_cache, "max_size_gb", 1.23) + monkeypatch.setattr(config.simulation_cache, "max_entries", 5) + + cfg = resolve_simulation_cache().config + assert cfg.enabled is True + assert cfg.directory == tmp_path + assert cfg.max_size_gb == 1.23 + assert cfg.max_entries == 5 + + +def test_env_var_overrides(monkeypatch, tmp_path): + monkeypatch.setenv("TIDY3D_CACHE_ENABLED", "true") + monkeypatch.setenv("TIDY3D_CACHE_DIR", str(tmp_path)) + monkeypatch.setenv("TIDY3D_CACHE_MAX_SIZE_GB", "0.5") + + monkeypatch.setattr(config.simulation_cache, "max_entries", 5) + monkeypatch.setenv("TIDY3D_CACHE_MAX_ENTRIES", "7") + + cfg = resolve_simulation_cache().config + assert cfg.enabled is True + assert cfg.directory == tmp_path + assert cfg.max_size_gb == 0.5 + assert cfg.max_entries == 7 + + +def test_cache_end_to_end(monkeypatch, tmp_path, tmp_path_factory, basic_simulation, fake_data): + """Run all critical cache tests in sequence to ensure end-to-end stability.""" + _test_run_cache_hit(monkeypatch, tmp_path, basic_simulation, fake_data) + _test_load_cache_hit(monkeypatch, tmp_path, basic_simulation, fake_data) + _test_checksum_mismatch_triggers_refresh(monkeypatch, tmp_path, basic_simulation) + _test_cache_eviction_by_entries(monkeypatch, tmp_path_factory, basic_simulation) + _test_cache_eviction_by_size(monkeypatch, tmp_path_factory, basic_simulation) + _test_run_cache_hit_async(monkeypatch, basic_simulation) \ No newline at end of file diff --git a/tidy3d/config.py b/tidy3d/config.py index 9dfc7b702c..7d093a1502 100644 --- a/tidy3d/config.py +++ b/tidy3d/config.py @@ -2,13 +2,41 @@ from __future__ import annotations +from pathlib import Path from typing import Optional import pydantic.v1 as pd from .log import DEFAULT_LEVEL, LogLevel, set_log_suppression, set_logging_level +_DEFAULT_CACHE_DIR = Path.home() / ".tidy3d" / "cache" / "simulations" +class SimulationCacheSettings(pd.BaseModel): + """Settings controlling the optional local simulation cache.""" + + enabled: bool = pd.Field( + False, + description="Enable or disable the local simulation cache.", + ) + directory: Path = pd.Field( + _DEFAULT_CACHE_DIR, + description="Directory where cached simulation artifacts are stored.", + ) + max_size_gb: float = pd.Field( + 10.0, + description="Maximum cache size in gigabytes. Set to 0 for no size limit.", + ge=0.0, + ) + max_entries: int = pd.Field( + 128, + description="Maximum number of cache entries. Set to 0 for no limit.", + ge=0, + ) + + @pd.validator("directory", pre=True, always=True) + def _validate_directory(cls, value): + return Path(value).expanduser() + class Tidy3dConfig(pd.BaseModel): """configuration of tidy3d""" @@ -43,6 +71,12 @@ class Config: "averaging will be used if 'tidy3d-extras' is installed and not used otherwise.", ) + simulation_cache: SimulationCacheSettings = pd.Field( + default_factory=SimulationCacheSettings, + title="Simulation Cache", + description="Configuration for the optional local simulation cache.", + ) + @pd.validator("logging_level", pre=True, always=True) def _set_logging_level(cls, val): """Set the logging level if logging_level is changed.""" diff --git a/tidy3d/web/api/asynchronous.py b/tidy3d/web/api/asynchronous.py index da628261f3..34c03569c1 100644 --- a/tidy3d/web/api/asynchronous.py +++ b/tidy3d/web/api/asynchronous.py @@ -24,6 +24,7 @@ def run_async( reduce_simulation: Literal["auto", True, False] = "auto", pay_type: Union[PayType, str] = PayType.AUTO, priority: Optional[int] = None, + use_cache: Optional[bool] = None, ) -> BatchData: """Submits a set of Union[:class:`.Simulation`, :class:`.HeatSimulation`, :class:`.EMESimulation`] objects to server, starts running, monitors progress, downloads, and loads results as a :class:`.BatchData` object. @@ -56,6 +57,10 @@ def run_async( priority: int = None Priority of the simulation in the Virtual GPU (vGPU) queue (1 = lowest, 10 = highest). It affects only simulations from vGPU licenses and does not impact simulations using FlexCredits. + use_cache: Optional[bool] = None + Whether to use local cache if identical simulation is rerun. If not provided, cache settings from config or + environment variables will be used. + Returns ------ :class:`BatchData` @@ -91,6 +96,7 @@ def run_async( parent_tasks=parent_tasks, reduce_simulation=reduce_simulation, pay_type=pay_type, + use_cache=use_cache, ) batch_data = batch.run(path_dir=path_dir, priority=priority) diff --git a/tidy3d/web/api/autograd/autograd.py b/tidy3d/web/api/autograd/autograd.py index 7958dc57c9..928b953c02 100644 --- a/tidy3d/web/api/autograd/autograd.py +++ b/tidy3d/web/api/autograd/autograd.py @@ -117,6 +117,7 @@ def run( reduce_simulation: typing.Literal["auto", True, False] = "auto", pay_type: typing.Union[PayType, str] = PayType.AUTO, priority: typing.Optional[int] = None, + use_cache: typing.Optional[bool] = None, ) -> WorkflowDataType: """ Submits a :class:`.Simulation` to server, starts running, monitors progress, downloads, @@ -158,6 +159,9 @@ def run( Which method to pay for the simulation. priority: int = None Task priority for vGPU queue (1=lowest, 10=highest). + use_cache: Optional[bool] = None + Whether to use local cache if identical simulation is rerun. If not provided, cache settings from config or + environment variables will be used. Returns ------- Union[:class:`.SimulationData`, :class:`.HeatSimulationData`, :class:`.EMESimulationData`, :class:`.ModalComponentModelerData`, :class:`.TerminalComponentModelerData`] @@ -248,6 +252,7 @@ def run( max_num_adjoint_per_fwd=max_num_adjoint_per_fwd, pay_type=pay_type, priority=priority, + use_cache=use_cache, ) return run_webapi( @@ -266,6 +271,7 @@ def run( reduce_simulation=reduce_simulation, pay_type=pay_type, priority=priority, + use_cache=use_cache, ) @@ -284,6 +290,7 @@ def run_async( reduce_simulation: typing.Literal["auto", True, False] = "auto", pay_type: typing.Union[PayType, str] = PayType.AUTO, priority: typing.Optional[int] = None, + use_cache: typing.Optional[bool] = None, ) -> BatchData: """Submits a set of Union[:class:`.Simulation`, :class:`.HeatSimulation`, :class:`.EMESimulation`] objects to server, starts running, monitors progress, downloads, and loads results as a :class:`.BatchData` object. @@ -318,6 +325,10 @@ def run_async( Whether to reduce structures in the simulation to the simulation domain only. Note: currently only implemented for the mode solver. pay_type: typing.Union[PayType, str] = PayType.AUTO Specify the payment method. + Whether to reduce structures in the simulation to the simulation domain only. Note: currently only implemented for the mode solver. + use_cache: Optional[bool] = None + Whether to use local cache if identical simulation is rerun. If not provided, cache settings from config or + environment variables will be used. Returns ------ @@ -360,6 +371,7 @@ def run_async( max_num_adjoint_per_fwd=max_num_adjoint_per_fwd, pay_type=pay_type, priority=priority, + use_cache=use_cache, ) return run_async_webapi( @@ -375,6 +387,7 @@ def run_async( reduce_simulation=reduce_simulation, pay_type=pay_type, priority=priority, + use_cache=use_cache, ) diff --git a/tidy3d/web/api/autograd/engine.py b/tidy3d/web/api/autograd/engine.py index 2cd0abe451..bf7713f534 100644 --- a/tidy3d/web/api/autograd/engine.py +++ b/tidy3d/web/api/autograd/engine.py @@ -10,7 +10,7 @@ def parse_run_kwargs(**run_kwargs): """Parse the ``run_kwargs`` to extract what should be passed to the ``Job``/``Batch`` init.""" - job_fields = [*list(Job._upload_fields), "solver_version", "pay_type"] + job_fields = [*list(Job._upload_fields), "solver_version", "pay_type", "use_cache"] job_init_kwargs = {k: v for k, v in run_kwargs.items() if k in job_fields} return job_init_kwargs diff --git a/tidy3d/web/api/container.py b/tidy3d/web/api/container.py index dc6a96b5a3..2c85f67eb5 100644 --- a/tidy3d/web/api/container.py +++ b/tidy3d/web/api/container.py @@ -4,7 +4,9 @@ import concurrent import os +import shutil import time +import uuid from abc import ABC from collections.abc import Mapping from concurrent.futures import ThreadPoolExecutor @@ -15,12 +17,15 @@ from tidy3d.components.base import Tidy3dBaseModel, cached_property from tidy3d.components.mode.mode_solver import ModeSolver +from tidy3d.components.mode.simulation import ModeSimulation from tidy3d.components.types import annotate_type from tidy3d.components.types.workflow import WorkflowDataType, WorkflowType from tidy3d.exceptions import DataError from tidy3d.log import get_logging_console, log from tidy3d.web.api import webapi as web -from tidy3d.web.api.tidy3d_stub import Tidy3dStub +from tidy3d.web.api.tidy3d_stub import Tidy3dStub, Tidy3dStubData +from tidy3d.web.api.webapi import _get_simulation_data_from_cache_entry, get_reduced_simulation +from tidy3d.web.cache import TMP_BATCH_PREFIX, CacheEntry, resolve_simulation_cache from tidy3d.web.core.constants import TaskId, TaskName from tidy3d.web.core.task_core import Folder from tidy3d.web.core.task_info import RunInfo, TaskInfo @@ -212,6 +217,12 @@ class Job(WebContainer): description="Specify the payment method.", ) + data_cache_path: str | None = pd.Field( + None, + title="Data Cache Path", + description="File where cache is copied to.", + ) + _upload_fields = ( "simulation", "task_name", @@ -224,6 +235,14 @@ class Job(WebContainer): "reduce_simulation", ) + _cache_file_moved = False + + use_cache: Optional[bool] = pd.Field( + None, + title="Use Cache", + description="Whether to use local cache for retrieving Simulation results.", + ) + def to_file(self, fname: str) -> None: """Exports :class:`Tidy3dBaseModel` instance to .yaml, .json, or .hdf5 file @@ -240,8 +259,20 @@ def to_file(self, fname: str) -> None: self = self.updated_copy(task_id_cached=task_id_cached) super(Job, self).to_file(fname=fname) # noqa: UP008 + def get_cache_hit_entry(self) -> Optional[CacheEntry]: + simulation_cache = resolve_simulation_cache(self.use_cache) + if simulation_cache is not None: + sim_for_cache = self.simulation + if isinstance(self.simulation, (ModeSolver, ModeSimulation)): + sim_for_cache = get_reduced_simulation(self.simulation, self.reduce_simulation) + entry = simulation_cache.try_fetch(simulation=sim_for_cache) + return entry + return None + def run( - self, path: str = DEFAULT_DATA_PATH, priority: Optional[int] = None + self, + path: str = DEFAULT_DATA_PATH, + priority: Optional[int] = None, ) -> WorkflowDataType: """Run :class:`Job` all the way through and return data. @@ -252,22 +283,62 @@ def run( priority: int = None Priority of the simulation in the Virtual GPU (vGPU) queue (1 = lowest, 10 = highest). It affects only simulations from vGPU licenses and does not impact simulations using FlexCredits. + use_cache: Optional[bool] = None + Override cache usage behaviour for this call. ``True`` forces cache usage when available, + ``False`` bypasses it, and ``None`` defers to configuration and environment settings. Returns ------- :class:`WorkflowDataType` Object containing simulation results. """ - self.upload() - if priority is None: - self.start() - else: - self.start(priority=priority) - self.monitor() - return self.load(path=path) + self._check_path_dir(path=path) + + loaded_from_cache = self.load_if_cached + if not loaded_from_cache: + self.upload() + if priority is None: + self.start() + else: + self.start(priority=priority) + self.monitor() + data = self.load(path=path) + + return data + + @cached_property + def data_cache_path(self) -> Optional[str]: + "Temporary path where cached results are stored." + cache = resolve_simulation_cache(self.use_cache) + if cache is not None: + path = os.path.join(cache._root, TMP_BATCH_PREFIX, f"{self.task_name}.hdf5") + return path + return None + + @cached_property + def load_if_cached(self) -> bool: + """Checks if data is already cached. + + Returns + ------- + bool + Whether item was found in cache. + """ + path = self.data_cache_path + if path is None: + return False + self._check_path_dir(path=path) + entry = self.get_cache_hit_entry() + if entry is not None: + loaded_from_cache = _get_simulation_data_from_cache_entry(entry, path) + if loaded_from_cache: + return True + return False @cached_property def task_id(self) -> TaskId: """The task ID for this ``Job``. Uploads the ``Job`` if it hasn't already been uploaded.""" + if self.load_if_cached: + return "cached_" + self.task_name + "_" + str(uuid.uuid4()) if self.task_id_cached: return self.task_id_cached self._check_folder(self.folder_name) @@ -281,7 +352,9 @@ def _upload(self) -> TaskId: return task_id def upload(self) -> None: - """Upload this ``Job``.""" + """Upload this ``Job`` if not already got cached results.""" + if self.load_if_cached: + return _ = self.task_id def get_info(self) -> TaskInfo: @@ -298,6 +371,8 @@ def get_info(self) -> TaskInfo: @property def status(self): """Return current status of :class:`Job`.""" + if self.load_if_cached: + return "success" return self.get_info().status def start(self, priority: Optional[int] = None) -> None: @@ -312,13 +387,16 @@ def start(self, priority: Optional[int] = None) -> None: Note ---- To monitor progress of the :class:`Job`, call :meth:`Job.monitor` after started. + Function has no effect if cache is enabled and data was found in cache. """ - web.start( - self.task_id, - solver_version=self.solver_version, - pay_type=self.pay_type, - priority=priority, - ) + loaded = self.load_if_cached + if not loaded: + web.start( + self.task_id, + solver_version=self.solver_version, + pay_type=self.pay_type, + priority=priority, + ) def get_run_info(self) -> RunInfo: """Return information about the running :class:`Job`. @@ -338,6 +416,8 @@ def monitor(self) -> None: To load the output of completed simulation into :class:`.SimulationData` objects, call :meth:`Job.load`. """ + if self.load_if_cached: + return web.monitor(self.task_id, verbose=self.verbose) def download(self, path: str = DEFAULT_DATA_PATH) -> None: @@ -347,14 +427,29 @@ def download(self, path: str = DEFAULT_DATA_PATH) -> None: ---------- path : str = "./simulation_data.hdf5" Path to download data as ``.hdf5`` file (including filename). + use_cache: Optional[bool] = None + Override cache usage behaviour for this call. ``True`` forces cache usage when available, + ``False`` bypasses it, and ``None`` defers to configuration and environment settings. Note ---- To load the data after download, use :meth:`Job.load`. """ + if self.load_if_cached: + self.move_cache_file(path=path) + return self._check_path_dir(path=path) web.download(task_id=self.task_id, path=path, verbose=self.verbose) + def move_cache_file(self, path: str) -> None: + if self._cache_file_moved: + return + if os.path.exists(self.data_cache_path): + shutil.move(self.data_cache_path, path) + self._cache_file_moved = True + else: + raise FileNotFoundError(f"Cached file does not longer exist in {path}.") + def load(self, path: str = DEFAULT_DATA_PATH) -> WorkflowDataType: """Download job results and load them into a data object. @@ -362,14 +457,26 @@ def load(self, path: str = DEFAULT_DATA_PATH) -> WorkflowDataType: ---------- path : str = "./simulation_data.hdf5" Path to download data as ``.hdf5`` file (including filename). + use_cache: Optional[bool] = None + Whether to use local cache if identical simulation is rerun. If not provided, cache settings from config or + environment variables will be used. Returns ------- Union[:class:`.SimulationData`, :class:`.HeatSimulationData`, :class:`.EMESimulationData`] Object containing simulation results. """ + if self.load_if_cached: + self.move_cache_file(path=path) + data = Tidy3dStubData.postprocess(path) + return data self._check_path_dir(path=path) - data = web.load(task_id=self.task_id, path=path, verbose=self.verbose) + data = web.load( + task_id=self.task_id, + path=path, + verbose=self.verbose, + use_cache=self.use_cache, + ) if isinstance(self.simulation, ModeSolver): self.simulation._patch_data(data=data) return data @@ -411,6 +518,8 @@ def estimate_cost(self, verbose: bool = True) -> float: Cost is calculated assuming the simulation runs for the full ``run_time``. If early shut-off is triggered, the cost is adjusted proportionately. """ + if self.load_if_cached: + return 0.0 return web.estimate_cost(self.task_id, verbose=verbose, solver_version=self.solver_version) @staticmethod @@ -476,14 +585,33 @@ class BatchData(Tidy3dBaseModel, Mapping): verbose: bool = pd.Field( True, title="Verbose", description="Whether to print info messages and progressbars." ) + cached_tasks: Optional[dict[TaskName, bool]] = pd.Field( + None, + title="Cached Tasks", + description="Whether the data of a task came from the cache.", + ) + + use_cache: Optional[bool] = pd.Field( + None, + title="Use Cache", + description="Whether to use local cache for retrieving Simulation results.", + ) def load_sim_data(self, task_name: str) -> WorkflowDataType: """Load a simulation data object from file by task name.""" task_data_path = self.task_paths[task_name] task_id = self.task_ids[task_name] + from_cache = self.cached_tasks[task_name] if self.cached_tasks else False web.get_info(task_id) - return web.load(task_id=task_id, path=task_data_path, verbose=False) + return web.load( + task_id=task_id, + path=task_data_path, + verbose=False, + from_cache=from_cache, + use_cache=self.use_cache, + replace_existing=False, + ) def __getitem__(self, task_name: TaskName) -> WorkflowDataType: """Get the simulation data object for a given ``task_name``.""" @@ -623,6 +751,12 @@ class Batch(WebContainer): "fields that were not used to create the task will cause errors.", ) + use_cache: Optional[bool] = pd.Field( + None, + title="Use Cache", + description="Whether to use local cache for retrieving Simulation results.", + ) + _job_type = Job def run( @@ -659,14 +793,16 @@ def run( rather it iterates over the task names and loads the corresponding data from file one by one. If no file exists for that task, it downloads it. """ - self._check_path_dir(path_dir) - self.upload() - self.to_file(self._batch_path(path_dir=path_dir)) - if priority is None: - self.start() - else: - self.start(priority=priority) - self.monitor() + loaded = [job.load_if_cached for job in self.jobs.values()] + if not all(loaded): + self._check_path_dir(path_dir) + self.upload() + self.to_file(self._batch_path(path_dir=path_dir)) + if priority is None: + self.start() + else: + self.start(priority=priority) + self.monitor() return self.load(path_dir=path_dir) @cached_property @@ -708,6 +844,7 @@ def jobs(self) -> dict[TaskName, Job]: job_kwargs["solver_version"] = self.solver_version job_kwargs["pay_type"] = self.pay_type job_kwargs["reduce_simulation"] = self.reduce_simulation + job_kwargs["use_cache"] = self.use_cache if self.parent_tasks and task_name in self.parent_tasks: job_kwargs["parent_tasks"] = self.parent_tasks[task_name] job = JobType(**job_kwargs) @@ -1025,7 +1162,6 @@ def download(self, path_dir: str = DEFAULT_DATA_DIR, replace_existing: bool = Fa log.info(f"File '{job_path_str}' already exists. Overwriting.") else: log.info(f"File '{job_path_str}' already exists. Skipping.") - continue if "error" in job.status: log.warning(f"Not downloading '{task_name}' as the task errored.") continue @@ -1087,7 +1223,14 @@ def load(self, path_dir: str = DEFAULT_DATA_DIR, replace_existing: bool = False) task_paths[task_name] = self._job_data_path(task_id=job.task_id, path_dir=path_dir) task_ids[task_name] = self.jobs[task_name].task_id - data = BatchData(task_paths=task_paths, task_ids=task_ids, verbose=self.verbose) + loaded = {task_name: job.load_if_cached for task_name, job in self.jobs.items()} + data = BatchData( + task_paths=task_paths, + task_ids=task_ids, + verbose=self.verbose, + cached_tasks=loaded, + use_cache=self.use_cache, + ) for task_name, job in self.jobs.items(): if isinstance(job.simulation, ModeSolver): diff --git a/tidy3d/web/api/webapi.py b/tidy3d/web/api/webapi.py index e165b83b05..1fe1e56812 100644 --- a/tidy3d/web/api/webapi.py +++ b/tidy3d/web/api/webapi.py @@ -6,6 +6,7 @@ import os import tempfile import time +from pathlib import Path from typing import Callable, Literal, Optional, Union from requests import HTTPError @@ -18,6 +19,7 @@ from tidy3d.exceptions import WebError from tidy3d.log import get_logging_console, log from tidy3d.plugins.smatrix.component_modelers.terminal import TerminalComponentModeler +from tidy3d.web.cache import CacheEntry, resolve_simulation_cache from tidy3d.web.core.account import Account from tidy3d.web.core.constants import ( CM_DATA_HDF5_GZ, @@ -122,6 +124,16 @@ def _task_dict_to_url_bullet_list(data_dict: dict) -> str: return "\n".join([f"- {key}: '{value}'" for key, value in data_dict.items()]) +def _get_simulation_data_from_cache_entry(entry: CacheEntry, path: str) -> bool: + if entry is not None: + try: + entry.materialize(Path(path)) + return True + except Exception: + return False + return False + + @wait_for_connection def run( simulation: WorkflowType, @@ -139,6 +151,8 @@ def run( reduce_simulation: Literal["auto", True, False] = "auto", pay_type: Union[PayType, str] = PayType.AUTO, priority: Optional[int] = None, + use_cache: Optional[bool] = None, + lazy: bool = False, ) -> WorkflowDataType: """ Submits a :class:`.Simulation` to server, starts running, monitors progress, downloads, @@ -176,6 +190,11 @@ def run( priority: int = None Priority of the simulation in the Virtual GPU (vGPU) queue (1 = lowest, 10 = highest). It affects only simulations from vGPU licenses and does not impact simulations using FlexCredits. + use_cache: Optional[bool] = None + Whether to use local cache if identical simulation is rerun. If not provided, cache settings from config or + environment variables will be used. + lazy: bool = False + Whether to load the simulation data lazily (not until data access). Returns ------- Union[:class:`.SimulationData`, :class:`.HeatSimulationData`, :class:`.EMESimulationData`] @@ -220,30 +239,50 @@ def run( :meth:`tidy3d.web.api.container.Batch.monitor` Monitor progress of each of the running tasks. """ - task_id = upload( - simulation=simulation, - task_name=task_name, - folder_name=folder_name, - callback_url=callback_url, - verbose=verbose, - progress_callback=progress_callback_upload, - simulation_type=simulation_type, - parent_tasks=parent_tasks, - solver_version=solver_version, - reduce_simulation=reduce_simulation, - ) - start( - task_id, - verbose=verbose, - solver_version=solver_version, - worker_group=worker_group, - pay_type=pay_type, - priority=priority, - ) - monitor(task_id, verbose=verbose) + simulation_cache = resolve_simulation_cache(use_cache) + loaded_from_cache = False + if simulation_cache is not None: + sim_for_cache = simulation + if isinstance(simulation, (ModeSolver, ModeSimulation)): + sim_for_cache = get_reduced_simulation(simulation, reduce_simulation) + entry = simulation_cache.try_fetch(simulation=sim_for_cache) + loaded_from_cache = _get_simulation_data_from_cache_entry(entry, path) + + if not loaded_from_cache: + task_id = upload( + simulation=simulation, + task_name=task_name, + folder_name=folder_name, + callback_url=callback_url, + verbose=verbose, + progress_callback=progress_callback_upload, + simulation_type=simulation_type, + parent_tasks=parent_tasks, + solver_version=solver_version, + reduce_simulation=reduce_simulation, + ) + start( + task_id, + verbose=verbose, + solver_version=solver_version, + worker_group=worker_group, + pay_type=pay_type, + priority=priority, + ) + monitor(task_id, verbose=verbose) + else: + task_id = None + data = load( - task_id=task_id, path=path, verbose=verbose, progress_callback=progress_callback_download + task_id=task_id, + path=path, + verbose=verbose, + progress_callback=progress_callback_download, + use_cache=use_cache, + from_cache=loaded_from_cache, + lazy=lazy, ) + if isinstance(simulation, ModeSolver): simulation._patch_data(data=data) return data @@ -983,11 +1022,13 @@ def download_log( @wait_for_connection def load( - task_id: TaskId, + task_id: Optional[TaskId], path: str = "simulation_data.hdf5", replace_existing: bool = True, verbose: bool = True, progress_callback: Optional[Callable[[float], None]] = None, + use_cache: Optional[bool] = False, + from_cache: bool = False, lazy: bool = False, ) -> WorkflowDataType: """ @@ -1018,6 +1059,11 @@ def load( If ``True``, will print progressbars and status, otherwise, will run silently. progress_callback : Callable[[float], None] = None Optional callback function called when downloading file with ``bytes_in_chunk`` as argument. + use_cache: Optional[bool] = None + Whether to use local cache if identical simulation is rerun. If not provided, cache settings from config or + environment variables will be used. + from_cache: bool = None + Whether data will be loaded from cache. lazy : bool = False Whether to load the actual data (``lazy=False``) or return a proxy that loads the data when accessed (``lazy=True``). @@ -1027,22 +1073,43 @@ def load( Union[:class:`.SimulationData`, :class:`.HeatSimulationData`, :class:`.EMESimulationData`] Object containing simulation data. """ + assert from_cache or task_id, "Either task_id or from_cache must be provided." + # For component modeler batches, default to a clearer filename if the default was used. - if _is_modeler_batch(task_id) and os.path.basename(path) == "simulation_data.hdf5": + if ( + not from_cache + and _is_modeler_batch(task_id) + and os.path.basename(path) == "simulation_data.hdf5" + ): base_dir = os.path.dirname(path) or "." path = os.path.join(base_dir, "cm_data.hdf5") - if not os.path.exists(path) or replace_existing: + if from_cache: + if not os.path.exists(path): + raise FileNotFoundError("Cached file not found.") + elif not os.path.exists(path) or replace_existing: download(task_id=task_id, path=path, verbose=verbose, progress_callback=progress_callback) if verbose: console = get_logging_console() - if _is_modeler_batch(task_id): + if not from_cache and _is_modeler_batch(task_id): # TODO inspect console.log(f"loading component modeler data from {path}") else: console.log(f"loading simulation from {path}") stub_data = Tidy3dStubData.postprocess(path, lazy=lazy) + + simulation_cache = resolve_simulation_cache(use_cache) + if simulation_cache is not None and not from_cache: + info = get_info(task_id, verbose=False) + workflow_type = getattr(info, "taskType", None) or type(stub_data).__name__ + simulation_cache.store_result( + stub_data=stub_data, + task_id=task_id, + path=path, + workflow_type=workflow_type, + ) + return stub_data diff --git a/tidy3d/web/cache.py b/tidy3d/web/cache.py new file mode 100644 index 0000000000..9eff7db204 --- /dev/null +++ b/tidy3d/web/cache.py @@ -0,0 +1,706 @@ +"""Local simulation cache manager.""" + +from __future__ import annotations + +import hashlib +import json +import os +import shutil +import tempfile +import threading +from collections.abc import Iterable +from dataclasses import dataclass, field, replace +from datetime import datetime, timezone +from enum import Enum +from functools import lru_cache +from pathlib import Path +from typing import Any, Optional + +from tidy3d import config +from tidy3d.components.types.workflow import WorkflowDataType, WorkflowType +from tidy3d.log import log +from tidy3d.web.api.tidy3d_stub import Tidy3dStub +from tidy3d.web.core.constants import TaskId +from tidy3d.web.core.http_util import get_version as _get_protocol_version + +DEFAULT_CACHE_RELATIVE_DIR = Path(".tidy3d") / "cache" / "simulations" +CACHE_ARTIFACT_NAME = "simulation_data.hdf5" +CACHE_METADATA_NAME = "metadata.json" + +ENV_ENABLE = "TIDY3D_CACHE_ENABLED" +ENV_DIRECTORY = "TIDY3D_CACHE_DIR" +ENV_MAX_SIZE = "TIDY3D_CACHE_MAX_SIZE_GB" +ENV_MAX_ENTRIES = "TIDY3D_CACHE_MAX_ENTRIES" + +TMP_PREFIX = "tidy3d-cache-" +TMP_BATCH_PREFIX = "tmp_batch" + + +_CONFIG_LOCK = threading.RLock() + + +@dataclass(frozen=True) +class SimulationCacheConfig: + """Configuration for the simulation cache.""" + + enabled: bool = False + directory: Path = field(default_factory=lambda: Path.home() / DEFAULT_CACHE_RELATIVE_DIR) + max_size_gb: float = 8.0 + max_entries: int = 32 + + +def _coerce_bool(value: str) -> Optional[bool]: + if value is None: + return None + normalized = value.strip().lower() + if normalized in {"1", "true", "yes", "on"}: + return True + if normalized in {"0", "false", "no", "off"}: + return False + return None + + +def _coerce_float(value: str) -> Optional[float]: + if value is None: + return None + try: + return float(value) + except (TypeError, ValueError): + return None + + +def _coerce_int(value: str) -> Optional[int]: + if value is None: + return None + try: + return int(value) + except (TypeError, ValueError): + return None + + +def _load_env_overrides() -> dict[str, Any]: + overrides: dict[str, Any] = {} + + enabled_env = _coerce_bool(os.getenv(ENV_ENABLE)) + if enabled_env is not None: + overrides["enabled"] = enabled_env + + directory_env = os.getenv(ENV_DIRECTORY) + if directory_env: + overrides["directory"] = directory_env + + size_env = _coerce_float(os.getenv(ENV_MAX_SIZE)) + if size_env is not None: + overrides["max_size_gb"] = size_env + + entries_env = _coerce_int(os.getenv(ENV_MAX_ENTRIES)) + if entries_env is not None: + overrides["max_entries"] = entries_env + + return overrides + + +def _load_effective_config() -> SimulationCacheConfig: + """ + Build the initial, global cache config at import-time. + + Precedence for fields (lowest → highest): + 1) library defaults (disabled, ~/.tidy3d/cache/simulations, limits) + 2) persisted app config (config.simulation_cache_settings), if present + 3) environment overrides (TIDY3D_CACHE_*) + + Note: per-call `use_cache` is *not* applied here; that’s handled in + resolve_simulation_cache(...), which can reconfigure the singleton later. + """ + sim_cache_settings = config.simulation_cache + + cfg = SimulationCacheConfig( + enabled=sim_cache_settings.enabled, + directory=sim_cache_settings.directory, + max_size_gb=sim_cache_settings.max_size_gb, + max_entries=sim_cache_settings.max_entries, + ) + + env_overrides = _load_env_overrides() + if env_overrides: + allowed = {k: v for k, v in env_overrides.items() if v is not None} + if allowed: + cfg = replace(cfg, **allowed) + + if cfg.directory: + cfg = replace(cfg, directory=Path(cfg.directory).expanduser().resolve()) + + return cfg + + +_CACHE_CONFIG: SimulationCacheConfig = _load_effective_config() + + +def get_cache_config() -> SimulationCacheConfig: + """Thread-safe snapshot copy of the active global cache configuration.""" + with _CONFIG_LOCK: + return replace(_CACHE_CONFIG) + + +def configure_cache(new_config: SimulationCacheConfig) -> None: + """Swap the active global config and reset the cache singleton.""" + global _CACHE_CONFIG + with _CONFIG_LOCK: + _CACHE_CONFIG = new_config + get_cache.cache_clear() + + +@lru_cache +def get_cache() -> SimulationCache: + """ + Return the singleton SimulationCache built from the *current* global config. + + This is automatically refreshed whenever `configure_cache(...)` is called, + because that function clears this LRU entry. + """ + cfg = get_cache_config() + return SimulationCache(cfg) + + +def _merge_from_tidy3d_config() -> SimulationCacheConfig: + """Overlay app-level persisted settings (if any) onto the current global config snapshot.""" + simulation_cache_settings = config.simulation_cache + return SimulationCacheConfig( + enabled=simulation_cache_settings.enabled, + directory=simulation_cache_settings.directory, + max_size_gb=simulation_cache_settings.max_size_gb, + max_entries=simulation_cache_settings.max_entries, + ) + + +def _apply_overrides( + cfg: SimulationCacheConfig, overrides: dict[str, Any] +) -> SimulationCacheConfig: + """Apply dict-based overrides (enabled/directory/max_size_gb/max_entries).""" + if not overrides: + return cfg + # Filter to fields that exist on the dataclass and are not None + allowed = {k: v for k, v in overrides.items() if v is not None and hasattr(cfg, k)} + return replace(cfg, **allowed) if allowed else cfg + + +def resolve_simulation_cache(use_cache: Optional[bool] = None) -> Optional[SimulationCache]: + """ + Return a SimulationCache configured from: + 1) persisted config (directory/limits + default enabled), + 2) environment overrides (enabled + directory/limits), + 3) per-call 'use_cache' (enabled only, highest precedence). + + If effective config differs from the active global config, reconfigure the singleton. + Returns None if final 'enabled' is False. + """ + current = get_cache_config() + desired = _load_effective_config() + + if use_cache is not None: + if desired.directory != current.directory: + get_cache().clear(hard=True) + desired = replace(desired, enabled=use_cache) + + if desired != current: + configure_cache(desired) + + if not desired.enabled: + return None + + try: + return get_cache() + except Exception as err: + log.debug("Simulation cache unavailable: %s", err) + return None + + +@dataclass +class CacheEntry: + """Internal representation of a cache entry.""" + + key: str + root: Path + metadata: dict[str, Any] + + @property + def path(self) -> Path: + return self.root / self.key + + @property + def artifact_path(self) -> Path: + return self.path / CACHE_ARTIFACT_NAME + + @property + def metadata_path(self) -> Path: + return self.path / CACHE_METADATA_NAME + + def exists(self) -> bool: + return self.path.exists() and self.artifact_path.exists() and self.metadata_path.exists() + + def verify(self) -> bool: + if not self.exists(): + return False + checksum = self.metadata.get("checksum") + if not checksum: + return False + try: + actual_checksum, file_size = _copy_and_hash(self.artifact_path, None) + except FileNotFoundError: + return False + if checksum != actual_checksum: + log.warning( + "Simulation cache checksum mismatch for key '%s'. Removing stale entry.", self.key + ) + return False + if int(self.metadata.get("file_size", file_size)) != file_size: + self.metadata["file_size"] = file_size + _write_metadata(self.metadata_path, self.metadata) + return True + + def materialize(self, target: Path) -> Path: + """Copy cached artifact to ``target`` and return the resulting path.""" + target = Path(target) + target.parent.mkdir(parents=True, exist_ok=True) + shutil.copy2(self.artifact_path, target) + return target + + +class SimulationCache: + """Manages storing and retrieving cached simulation artifacts.""" + + def __init__(self, config: SimulationCacheConfig): + self._config = config + self._root = Path(config.directory).expanduser().resolve() + self._lock = threading.RLock() + if config.enabled: + self._root.mkdir(parents=True, exist_ok=True) + + @property + def config(self) -> SimulationCacheConfig: + return self._config + + @property + def root(self) -> Path: + return self._root + + def list(self) -> list[dict[str, Any]]: + """Return metadata for all cache entries.""" + with self._lock: + return [entry.metadata for entry in self._iter_entries()] + + def clear(self, hard=False) -> None: + """Remove all cache contents.""" + with self._lock: + if self._root.exists(): + try: + shutil.rmtree(self._root) + if not hard: + self._root.mkdir(parents=True, exist_ok=True) + except (FileNotFoundError, OSError): + pass + + def _fetch(self, key: str) -> Optional[CacheEntry]: + """Retrieve an entry by key, verifying checksum.""" + with self._lock: + entry = self._load_entry(key) + if not entry or not entry.exists(): + return None + if not entry.verify(): + self._remove_entry(entry) + return None + self._touch(entry) + return entry + + def fetch_by_task(self, task_id: str) -> Optional[CacheEntry]: + """Retrieve an entry by task id.""" + with self._lock: + for entry in self._iter_entries(): + metadata = entry.metadata + task_ids = metadata.get("task_ids", []) + if task_id in task_ids and entry.exists(): + if not entry.verify(): + self._remove_entry(entry) + return None + self._touch(entry) + return entry + return None + + def __len__(self) -> int: + """Return number of valid cache entries.""" + with self._lock: + return sum(1 for _ in self._iter_entries()) + + def _store( + self, key: str, task_id: Optional[str], source_path: Path, metadata: dict[str, Any] + ) -> Optional[CacheEntry]: + """Store a new cache entry from ``source_path``. + + Parameters + ---------- + key : str + Cache key computed from simulation hash and runtime context. + task_id : str, optional + Server task id associated with this artifact. + source_path : Path + Location of the artifact to cache. + metadata : dict[str, Any] + Additional metadata to persist alongside artifact. + + Returns + ------- + CacheEntry + Representation of the stored cache entry. + """ + source_path = Path(source_path) + if not source_path.exists(): + raise FileNotFoundError(f"Cannot cache missing artifact: {source_path}") + os.makedirs(self._root, exist_ok=True) + tmp_dir = Path(tempfile.mkdtemp(prefix=TMP_PREFIX, dir=self._root)) + tmp_artifact = tmp_dir / CACHE_ARTIFACT_NAME + tmp_meta = tmp_dir / CACHE_METADATA_NAME + os.makedirs(tmp_dir, exist_ok=True) + + checksum, file_size = _copy_and_hash(source_path, tmp_artifact) + now_iso = _now() + metadata = dict(metadata) + metadata.setdefault("cache_key", key) + metadata.setdefault("created_at", now_iso) + metadata["last_used"] = now_iso + metadata["checksum"] = checksum + metadata["file_size"] = file_size + if task_id: + task_ids = list(metadata.get("task_ids", [])) + if task_id not in task_ids: + task_ids.append(task_id) + metadata["task_ids"] = task_ids + + _write_metadata(tmp_meta, metadata) + try: + with self._lock: + self._root.mkdir(parents=True, exist_ok=True) + self._ensure_limits(file_size) + final_dir = self._root / key + backup_dir: Optional[Path] = None + + try: + if final_dir.exists(): + backup_dir = final_dir.with_name( + f"{final_dir.name}.bak.{_timestamp_suffix()}" + ) + os.replace(final_dir, backup_dir) + # move tmp_dir into place + os.replace(tmp_dir, final_dir) + except Exception: + # restore backup if needed + if backup_dir and backup_dir.exists(): + os.replace(backup_dir, final_dir) + raise + else: + entry = CacheEntry(key=key, root=self._root, metadata=metadata) + if backup_dir and backup_dir.exists(): + shutil.rmtree(backup_dir, ignore_errors=True) + log.debug("Stored simulation cache entry '%s' (%d bytes).", key, file_size) + return entry + finally: + try: + if tmp_dir.exists(): + shutil.rmtree(tmp_dir, ignore_errors=True) + except FileNotFoundError: + pass + + def invalidate(self, key: str) -> None: + with self._lock: + entry = self._load_entry(key) + if entry: + self._remove_entry(entry) + + def _ensure_limits(self, incoming_size: int) -> None: + max_entries = max(self._config.max_entries, 0) + max_size_bytes = int(max(0.0, self._config.max_size_gb) * (1024**3)) + + entries = list(self._iter_entries()) + if max_entries and len(entries) >= max_entries: + self._evict(entries, keep=max_entries - 1) + entries = list(self._iter_entries()) + + if not max_size_bytes: + return + + existing_size = sum(int(e.metadata.get("file_size", 0)) for e in entries) + allowed_size = max(max_size_bytes - incoming_size, 0) + if existing_size > allowed_size: + self._evict_by_size(entries, existing_size, allowed_size) + + def _evict(self, entries: Iterable[CacheEntry], keep: int) -> None: + sorted_entries = sorted(entries, key=lambda e: e.metadata.get("last_used", "")) + to_remove = sorted_entries[: max(0, len(sorted_entries) - keep)] + for entry in to_remove: + self._remove_entry(entry) + + def _evict_by_size( + self, entries: Iterable[CacheEntry], current_size: int, allowed_size: float + ) -> None: + if allowed_size < 0: + allowed_size = 0 + sorted_entries = sorted(entries, key=lambda e: e.metadata.get("last_used", "")) + reclaimed = 0 + for entry in sorted_entries: + if current_size - reclaimed <= allowed_size: + break + size = int(entry.metadata.get("file_size", 0)) + self._remove_entry(entry) + reclaimed += size + log.info(f"Simulation cache evicted entry '{entry.key}' to reclaim {size} bytes.") + + def _iter_entries(self) -> Iterable[CacheEntry]: + if not self._root.exists(): + return [] + entries: list[CacheEntry] = [] + for child in self._root.iterdir(): + if child.name.startswith(TMP_PREFIX) or child.name.startswith(TMP_BATCH_PREFIX): + continue + meta_path = child / CACHE_METADATA_NAME + if not meta_path.exists(): + continue + try: + metadata = json.loads(meta_path.read_text(encoding="utf-8")) + except Exception: + metadata = {} + entries.append(CacheEntry(key=child.name, root=self._root, metadata=metadata)) + return entries + + def _load_entry(self, key: str) -> Optional[CacheEntry]: + entry = CacheEntry(key=key, root=self._root, metadata={}) + if not entry.metadata_path.exists() or not entry.artifact_path.exists(): + return None + try: + metadata = json.loads(entry.metadata_path.read_text(encoding="utf-8")) + except Exception: + metadata = {} + entry.metadata = metadata + return entry + + def _touch(self, entry: CacheEntry) -> None: + entry.metadata["last_used"] = _now() + _write_metadata(entry.metadata_path, entry.metadata) + + def _remove_entry(self, entry: CacheEntry) -> None: + if entry.path.exists(): + shutil.rmtree(entry.path, ignore_errors=True) + + def try_fetch( + self, + simulation: WorkflowType, + verbose: bool = False, + ) -> Optional[CacheEntry]: + """ + Attempt to resolve and fetch a cached result entry for the given simulation context. + On miss or any cache error, returns None (the caller should proceed with upload/run). + + Notes + ----- + - Mirrors the exact cache key/context computation from `run`. + - Safe to call regardless of `use_cache` value; will no-op if cache is disabled. + """ + try: + simulation_hash = simulation._hash_self() + workflow_type = Tidy3dStub(simulation=simulation).get_type() + + versions = _get_protocol_version() + + cache_key = build_cache_key( + simulation_hash=simulation_hash, + workflow_type=workflow_type, + version=versions, + ) + + entry = self._fetch(cache_key) + if not entry: + return None + # self._store(key=cache_key, task_id=task_id, source_path=path, metadata={}) + if verbose: + log.info( + "Simulation cache hit for workflow '%s'; using local results.", workflow_type + ) + + return entry + except Exception as e: + log.error("Failed to fetch cache results." + str(e)) + + def store_result( + self, + stub_data: WorkflowDataType, + task_id: TaskId, + path: str, + workflow_type: str, + ) -> None: + """ + After we have the data (postprocess done), store it in the cache using the + canonical key (simulation hash + workflow type + environment + version). + Also records the task_id mapping for legacy lookups. + """ + try: + simulation_obj = getattr(stub_data, "simulation", None) + simulation_hash = simulation_obj._hash_self() if simulation_obj is not None else None + if not simulation_hash: + return + + version = _get_protocol_version() + + cache_key = build_cache_key( + simulation_hash=simulation_hash, + workflow_type=workflow_type, + version=version, + ) + + metadata = build_entry_metadata( + simulation_hash=simulation_hash, + workflow_type=workflow_type, + runtime_context={ + "task_id": task_id, + }, + version=version, + extras={"path": str(Path(path))}, + ) + + self._store( + key=cache_key, + task_id=task_id, # keeps a reverse link for legacy fetch_by_task + source_path=Path(path), + metadata=metadata, + ) + except Exception: + log.error("Could not store cache entry.") + + +def _copy_and_hash( + source: Path, dest: Optional[Path], existing_hash: Optional[str] = None +) -> tuple[str, int]: + """Copy ``source`` to ``dest`` while computing SHA256 checksum. + + Parameters + ---------- + source : Path + Source file path. + dest : Path or None + Destination file path. If ``None``, no copy is performed. + existing_hash : str, optional + If provided alongside ``dest`` and ``dest`` already exists, skip copying when hashes match. + + Returns + ------- + tuple[str, int] + The hexadecimal digest and file size in bytes. + """ + source = Path(source) + if dest is not None: + dest = Path(dest) + sha256 = _Hasher() + size = 0 + with source.open("rb") as src: + if dest is None: + while chunk := src.read(1024 * 1024): + sha256.update(chunk) + size += len(chunk) + else: + dest.parent.mkdir(parents=True, exist_ok=True) + with dest.open("wb") as dst: + while chunk := src.read(1024 * 1024): + dst.write(chunk) + sha256.update(chunk) + size += len(chunk) + return sha256.hexdigest(), size + + +def _write_metadata(path: Path, metadata: dict[str, Any]) -> None: + tmp_path = path.with_suffix(".tmp") + with tmp_path.open("w", encoding="utf-8") as fh: + json.dump(metadata, fh, indent=2, sort_keys=True) + os.replace(tmp_path, path) + + +def _now() -> str: + return datetime.now(timezone.utc).isoformat() + + +def _timestamp_suffix() -> str: + return datetime.now(timezone.utc).strftime("%Y%m%d%H%M%S%f") + + +class _Hasher: + def __init__(self): + self._hasher = hashlib.sha256() + + def update(self, data: bytes) -> None: + self._hasher.update(data) + + def hexdigest(self) -> str: + return self._hasher.hexdigest() + + +def clear() -> None: + """Remove all cache entries.""" + get_cache().clear() + + +def _canonicalize(value: Any) -> Any: + """Convert value into a JSON-serializable object for hashing/metadata.""" + + if isinstance(value, dict): + return { + str(k): _canonicalize(v) + for k, v in sorted(value.items(), key=lambda item: str(item[0])) + } + if isinstance(value, (list, tuple)): + return [_canonicalize(v) for v in value] + if isinstance(value, set): + return sorted(_canonicalize(v) for v in value) + if isinstance(value, Enum): + return value.value + if isinstance(value, Path): + return str(value) + if isinstance(value, datetime): + return value.isoformat() + if isinstance(value, bytes): + return value.decode("utf-8", errors="ignore") + return value + + +def build_cache_key( + *, + simulation_hash: str, + workflow_type: str, + version: str, +) -> str: + """Construct a deterministic cache key.""" + + payload = { + "simulation_hash": simulation_hash, + "workflow_type": workflow_type, + "versions": _canonicalize(version), + } + encoded = json.dumps(payload, sort_keys=True, separators=(",", ":")).encode("utf-8") + return hashlib.sha256(encoded).hexdigest() + + +def build_entry_metadata( + *, + simulation_hash: str, + workflow_type: str, + runtime_context: dict[str, Any], + version: str, + extras: Optional[dict[str, Any]] = None, +) -> dict[str, Any]: + """Create metadata dictionary for a cache entry.""" + + metadata: dict[str, Any] = { + "simulation_hash": simulation_hash, + "workflow_type": workflow_type, + "runtime_context": _canonicalize(runtime_context), + "versions": _canonicalize(version), + "task_ids": [], + } + if extras: + metadata.update(_canonicalize(extras)) + return metadata From 52dcb41b6a49ef585b36fa862df07b1e1364845a Mon Sep 17 00:00:00 2001 From: marcorudolphflex Date: Tue, 7 Oct 2025 13:39:41 +0200 Subject: [PATCH 11/22] tiny fixes --- playground/cache_conf_test.py | 19 +++++++++++++++++++ tests/test_web/test_simulation_cache.py | 1 - tidy3d/web/api/autograd/autograd.py | 1 - tidy3d/web/api/container.py | 8 +------- tidy3d/web/cache.py | 3 +-- 5 files changed, 21 insertions(+), 11 deletions(-) create mode 100644 playground/cache_conf_test.py diff --git a/playground/cache_conf_test.py b/playground/cache_conf_test.py new file mode 100644 index 0000000000..83e652299e --- /dev/null +++ b/playground/cache_conf_test.py @@ -0,0 +1,19 @@ +from __future__ import annotations + +from tidy3d import config +from tidy3d.web.cache import resolve_simulation_cache + +# config.simulation_cache.max_size_gb = float(10_000 * 1e-9) +# cache = resolve_simulation_cache(use_cache=True) +# print(cache.config) +tmp_path = "dfsd" + +config.simulation_cache.enabled = True +config.simulation_cache.directory = tmp_path +config.simulation_cache.max_size_gb = 1.23 +config.simulation_cache.max_entries = 5 +cfg = resolve_simulation_cache().config +assert cfg.enabled is True +assert cfg.directory == tmp_path +assert cfg.max_size_gb == 1.23 +assert cfg.max_entries == 5 diff --git a/tests/test_web/test_simulation_cache.py b/tests/test_web/test_simulation_cache.py index 3742aa1eb2..ea3a981d3e 100644 --- a/tests/test_web/test_simulation_cache.py +++ b/tests/test_web/test_simulation_cache.py @@ -265,7 +265,6 @@ def _test_cache_eviction_by_size(monkeypatch, tmp_path_factory, basic_simulation cache.store_result(_FakeStubData(sim2), MOCK_TASK_ID, str(file2), "FDTD") entries = cache.list() - print("len(entries)", len(entries)) assert len(cache) == 1 assert entries[0]["simulation_hash"] == sim2._hash_self() diff --git a/tidy3d/web/api/autograd/autograd.py b/tidy3d/web/api/autograd/autograd.py index 928b953c02..5bd4e611e5 100644 --- a/tidy3d/web/api/autograd/autograd.py +++ b/tidy3d/web/api/autograd/autograd.py @@ -325,7 +325,6 @@ def run_async( Whether to reduce structures in the simulation to the simulation domain only. Note: currently only implemented for the mode solver. pay_type: typing.Union[PayType, str] = PayType.AUTO Specify the payment method. - Whether to reduce structures in the simulation to the simulation domain only. Note: currently only implemented for the mode solver. use_cache: Optional[bool] = None Whether to use local cache if identical simulation is rerun. If not provided, cache settings from config or environment variables will be used. diff --git a/tidy3d/web/api/container.py b/tidy3d/web/api/container.py index 2c85f67eb5..e91217e8bf 100644 --- a/tidy3d/web/api/container.py +++ b/tidy3d/web/api/container.py @@ -283,9 +283,6 @@ def run( priority: int = None Priority of the simulation in the Virtual GPU (vGPU) queue (1 = lowest, 10 = highest). It affects only simulations from vGPU licenses and does not impact simulations using FlexCredits. - use_cache: Optional[bool] = None - Override cache usage behaviour for this call. ``True`` forces cache usage when available, - ``False`` bypasses it, and ``None`` defers to configuration and environment settings. Returns ------- :class:`WorkflowDataType` @@ -427,9 +424,6 @@ def download(self, path: str = DEFAULT_DATA_PATH) -> None: ---------- path : str = "./simulation_data.hdf5" Path to download data as ``.hdf5`` file (including filename). - use_cache: Optional[bool] = None - Override cache usage behaviour for this call. ``True`` forces cache usage when available, - ``False`` bypasses it, and ``None`` defers to configuration and environment settings. Note ---- @@ -448,7 +442,7 @@ def move_cache_file(self, path: str) -> None: shutil.move(self.data_cache_path, path) self._cache_file_moved = True else: - raise FileNotFoundError(f"Cached file does not longer exist in {path}.") + raise FileNotFoundError(f"Cached file does not longer exist in {self.data_cache_path}.") def load(self, path: str = DEFAULT_DATA_PATH) -> WorkflowDataType: """Download job results and load them into a data object. diff --git a/tidy3d/web/cache.py b/tidy3d/web/cache.py index 9eff7db204..85e441bd6f 100644 --- a/tidy3d/web/cache.py +++ b/tidy3d/web/cache.py @@ -518,7 +518,6 @@ def try_fetch( entry = self._fetch(cache_key) if not entry: return None - # self._store(key=cache_key, task_id=task_id, source_path=path, metadata={}) if verbose: log.info( "Simulation cache hit for workflow '%s'; using local results.", workflow_type @@ -526,7 +525,7 @@ def try_fetch( return entry except Exception as e: - log.error("Failed to fetch cache results." + str(e)) + log.error("Failed to fetch cache results.") def store_result( self, From 9c018331d51de4cc7007efebbbb684ccaa9cddb4 Mon Sep 17 00:00:00 2001 From: marcorudolphflex Date: Tue, 7 Oct 2025 13:41:38 +0200 Subject: [PATCH 12/22] lint --- tests/test_web/test_simulation_cache.py | 2 +- tidy3d/config.py | 2 ++ tidy3d/web/api/container.py | 6 ------ tidy3d/web/cache.py | 2 +- 4 files changed, 4 insertions(+), 8 deletions(-) diff --git a/tests/test_web/test_simulation_cache.py b/tests/test_web/test_simulation_cache.py index ea3a981d3e..6ae59a2b52 100644 --- a/tests/test_web/test_simulation_cache.py +++ b/tests/test_web/test_simulation_cache.py @@ -304,4 +304,4 @@ def test_cache_end_to_end(monkeypatch, tmp_path, tmp_path_factory, basic_simulat _test_checksum_mismatch_triggers_refresh(monkeypatch, tmp_path, basic_simulation) _test_cache_eviction_by_entries(monkeypatch, tmp_path_factory, basic_simulation) _test_cache_eviction_by_size(monkeypatch, tmp_path_factory, basic_simulation) - _test_run_cache_hit_async(monkeypatch, basic_simulation) \ No newline at end of file + _test_run_cache_hit_async(monkeypatch, basic_simulation) diff --git a/tidy3d/config.py b/tidy3d/config.py index 7d093a1502..7fa0c6c169 100644 --- a/tidy3d/config.py +++ b/tidy3d/config.py @@ -8,6 +8,7 @@ import pydantic.v1 as pd from .log import DEFAULT_LEVEL, LogLevel, set_log_suppression, set_logging_level + _DEFAULT_CACHE_DIR = Path.home() / ".tidy3d" / "cache" / "simulations" @@ -37,6 +38,7 @@ class SimulationCacheSettings(pd.BaseModel): def _validate_directory(cls, value): return Path(value).expanduser() + class Tidy3dConfig(pd.BaseModel): """configuration of tidy3d""" diff --git a/tidy3d/web/api/container.py b/tidy3d/web/api/container.py index e91217e8bf..926bdd0ed1 100644 --- a/tidy3d/web/api/container.py +++ b/tidy3d/web/api/container.py @@ -217,12 +217,6 @@ class Job(WebContainer): description="Specify the payment method.", ) - data_cache_path: str | None = pd.Field( - None, - title="Data Cache Path", - description="File where cache is copied to.", - ) - _upload_fields = ( "simulation", "task_name", diff --git a/tidy3d/web/cache.py b/tidy3d/web/cache.py index 85e441bd6f..ebf660e82e 100644 --- a/tidy3d/web/cache.py +++ b/tidy3d/web/cache.py @@ -524,7 +524,7 @@ def try_fetch( ) return entry - except Exception as e: + except Exception: log.error("Failed to fetch cache results.") def store_result( From 30b3aea307de3a78db2f767c1555ee6dbf66e538 Mon Sep 17 00:00:00 2001 From: marcorudolphflex Date: Tue, 7 Oct 2025 13:43:56 +0200 Subject: [PATCH 13/22] delete test file --- playground/cache_conf_test.py | 19 ------------------- 1 file changed, 19 deletions(-) delete mode 100644 playground/cache_conf_test.py diff --git a/playground/cache_conf_test.py b/playground/cache_conf_test.py deleted file mode 100644 index 83e652299e..0000000000 --- a/playground/cache_conf_test.py +++ /dev/null @@ -1,19 +0,0 @@ -from __future__ import annotations - -from tidy3d import config -from tidy3d.web.cache import resolve_simulation_cache - -# config.simulation_cache.max_size_gb = float(10_000 * 1e-9) -# cache = resolve_simulation_cache(use_cache=True) -# print(cache.config) -tmp_path = "dfsd" - -config.simulation_cache.enabled = True -config.simulation_cache.directory = tmp_path -config.simulation_cache.max_size_gb = 1.23 -config.simulation_cache.max_entries = 5 -cfg = resolve_simulation_cache().config -assert cfg.enabled is True -assert cfg.directory == tmp_path -assert cfg.max_size_gb == 1.23 -assert cfg.max_entries == 5 From 6baec0ce707525a61c75810423761ead0feb8051 Mon Sep 17 00:00:00 2001 From: marcorudolphflex Date: Tue, 7 Oct 2025 14:38:39 +0200 Subject: [PATCH 14/22] patch web container _check_folder --- tests/test_web/test_simulation_cache.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tests/test_web/test_simulation_cache.py b/tests/test_web/test_simulation_cache.py index 6ae59a2b52..fb56a59c66 100644 --- a/tests/test_web/test_simulation_cache.py +++ b/tests/test_web/test_simulation_cache.py @@ -6,14 +6,17 @@ import tidy3d as td from tidy3d import config -from tidy3d.web import Job, run_async +from tidy3d.web import Job, run_async, common from tidy3d.web.api import webapi as web +from tidy3d.web.api.container import WebContainer from tidy3d.web.cache import ( CACHE_ARTIFACT_NAME, get_cache, resolve_simulation_cache, ) +common.CONNECTION_RETRY_TIME = 0.1 + MOCK_TASK_ID = "task-xyz" # --- Fake pipeline global maps / queue --- TASK_TO_SIM: dict[str, td.Simulation] = {} # task_id -> Simulation @@ -120,9 +123,13 @@ def _fake_download(*, task_id, path, **kwargs): if sim is not None: PATH_TO_SIM[str(Path(path))] = sim + def _fake__check_folder(*args, **kwargs): + pass + def _fake_status(self): return "success" + monkeypatch.setattr(WebContainer, "_check_folder", _fake__check_folder) monkeypatch.setattr(web, "upload", _fake_upload) monkeypatch.setattr(web, "start", _fake_start) monkeypatch.setattr(web, "monitor", _fake_monitor) From d88102b69a17bdbc1234b13d4d31030a790fdcad Mon Sep 17 00:00:00 2001 From: marcorudolphflex Date: Tue, 7 Oct 2025 14:39:52 +0200 Subject: [PATCH 15/22] lint --- tests/test_web/test_simulation_cache.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_web/test_simulation_cache.py b/tests/test_web/test_simulation_cache.py index fb56a59c66..b2326bbee6 100644 --- a/tests/test_web/test_simulation_cache.py +++ b/tests/test_web/test_simulation_cache.py @@ -6,7 +6,7 @@ import tidy3d as td from tidy3d import config -from tidy3d.web import Job, run_async, common +from tidy3d.web import Job, common, run_async from tidy3d.web.api import webapi as web from tidy3d.web.api.container import WebContainer from tidy3d.web.cache import ( From f09a7d574f48b0bf6646b14caf012c623c650006 Mon Sep 17 00:00:00 2001 From: marcorudolphflex Date: Tue, 7 Oct 2025 15:18:27 +0200 Subject: [PATCH 16/22] add job test --- tests/test_web/test_simulation_cache.py | 18 ++++++++++++++++++ tidy3d/web/api/container.py | 3 ++- tidy3d/web/cache.py | 11 ----------- 3 files changed, 20 insertions(+), 12 deletions(-) diff --git a/tests/test_web/test_simulation_cache.py b/tests/test_web/test_simulation_cache.py index b2326bbee6..940c34d339 100644 --- a/tests/test_web/test_simulation_cache.py +++ b/tests/test_web/test_simulation_cache.py @@ -203,6 +203,23 @@ def _test_run_cache_hit_async(monkeypatch, basic_simulation): assert len(cache) == 3 +def _test_job_run_cache(monkeypatch, tmp_path_factory, basic_simulation): + counters = _patch_run_pipeline(monkeypatch) + cache = resolve_simulation_cache(use_cache=True) + cache.clear() + job = Job(simulation=basic_simulation, use_cache=True, task_name="test") + job.run() + + assert len(cache) == 1 + + _reset_counters(counters) + + job2 = Job(simulation=basic_simulation, use_cache=True, task_name="test") + job2.run() + assert len(cache) == 1 + assert counters["download"] == 0 + + def _test_load_cache_hit(monkeypatch, tmp_path, basic_simulation, fake_data): get_cache().clear() counters = _patch_run_pipeline(monkeypatch) @@ -312,3 +329,4 @@ def test_cache_end_to_end(monkeypatch, tmp_path, tmp_path_factory, basic_simulat _test_cache_eviction_by_entries(monkeypatch, tmp_path_factory, basic_simulation) _test_cache_eviction_by_size(monkeypatch, tmp_path_factory, basic_simulation) _test_run_cache_hit_async(monkeypatch, basic_simulation) + _test_job_run_cache(monkeypatch, tmp_path_factory, basic_simulation) diff --git a/tidy3d/web/api/container.py b/tidy3d/web/api/container.py index 926bdd0ed1..0a94ab2860 100644 --- a/tidy3d/web/api/container.py +++ b/tidy3d/web/api/container.py @@ -13,6 +13,7 @@ from typing import Literal, Optional, Union import pydantic.v1 as pd +from pydantic.v1 import PrivateAttr from rich.progress import BarColumn, Progress, TaskProgressColumn, TextColumn, TimeElapsedColumn from tidy3d.components.base import Tidy3dBaseModel, cached_property @@ -229,7 +230,7 @@ class Job(WebContainer): "reduce_simulation", ) - _cache_file_moved = False + _cache_file_moved: bool = PrivateAttr(default=False) use_cache: Optional[bool] = pd.Field( None, diff --git a/tidy3d/web/cache.py b/tidy3d/web/cache.py index ebf660e82e..693542ac9d 100644 --- a/tidy3d/web/cache.py +++ b/tidy3d/web/cache.py @@ -162,17 +162,6 @@ def get_cache() -> SimulationCache: return SimulationCache(cfg) -def _merge_from_tidy3d_config() -> SimulationCacheConfig: - """Overlay app-level persisted settings (if any) onto the current global config snapshot.""" - simulation_cache_settings = config.simulation_cache - return SimulationCacheConfig( - enabled=simulation_cache_settings.enabled, - directory=simulation_cache_settings.directory, - max_size_gb=simulation_cache_settings.max_size_gb, - max_entries=simulation_cache_settings.max_entries, - ) - - def _apply_overrides( cfg: SimulationCacheConfig, overrides: dict[str, Any] ) -> SimulationCacheConfig: From 40f893dae24393121d2a0107a73450af3847db8f Mon Sep 17 00:00:00 2001 From: marcorudolphflex Date: Fri, 10 Oct 2025 09:44:51 +0200 Subject: [PATCH 17/22] unified restore cache func, simplify test without sim order --- tests/test_web/test_simulation_cache.py | 9 ------ tidy3d/web/api/container.py | 43 +++++++++++-------------- tidy3d/web/api/webapi.py | 28 +++++++++++----- 3 files changed, 39 insertions(+), 41 deletions(-) diff --git a/tests/test_web/test_simulation_cache.py b/tests/test_web/test_simulation_cache.py index baca20ac5a..cb2f396475 100644 --- a/tests/test_web/test_simulation_cache.py +++ b/tests/test_web/test_simulation_cache.py @@ -18,13 +18,11 @@ # --- Fake pipeline global maps / queue --- TASK_TO_SIM: dict[str, td.Simulation] = {} # task_id -> Simulation PATH_TO_SIM: dict[str, td.Simulation] = {} # artifact path -> Simulation -SIM_ORDER: list[td.Simulation] = [] # fallback queue when upload isn't called def _reset_fake_maps(): TASK_TO_SIM.clear() PATH_TO_SIM.clear() - SIM_ORDER.clear() class _FakeStubData: @@ -96,9 +94,6 @@ def _fake_upload(**kwargs): counters["upload"] += 1 task_id = f"{MOCK_TASK_ID}{counters['upload']}" sim = _extract_simulation(kwargs) - if sim is None and SIM_ORDER: - # Upload wasn't given the sim (or async path differs) -> fallback - sim = SIM_ORDER.pop(0) if sim is not None: TASK_TO_SIM[task_id] = sim return task_id @@ -113,9 +108,6 @@ def _fake_download(*, task_id, path, **kwargs): counters["download"] += 1 # Ensure we have a simulation for this task id (even if upload wasn't called) sim = TASK_TO_SIM.get(task_id) - if sim is None and SIM_ORDER: - sim = SIM_ORDER.pop(0) - TASK_TO_SIM[task_id] = sim Path(path).write_text(f"payload:{task_id}") if sim is not None: PATH_TO_SIM[str(Path(path))] = sim @@ -169,7 +161,6 @@ def _test_run_cache_hit_async(monkeypatch, basic_simulation): _reset_counters(counters) sim2 = basic_simulation.updated_copy(shutoff=1e-4) sim3 = basic_simulation.updated_copy(shutoff=1e-3) - SIM_ORDER[:] = [basic_simulation, sim2, sim3] data = run_async({"task1": basic_simulation, "task2": sim2}, use_cache=True) data_task1 = data["task1"] # access to store in cache diff --git a/tidy3d/web/api/container.py b/tidy3d/web/api/container.py index 6af0e3136c..b7a7f3b5ba 100644 --- a/tidy3d/web/api/container.py +++ b/tidy3d/web/api/container.py @@ -17,15 +17,16 @@ from tidy3d.components.base import Tidy3dBaseModel, cached_property from tidy3d.components.mode.mode_solver import ModeSolver -from tidy3d.components.mode.simulation import ModeSimulation from tidy3d.components.types import annotate_type from tidy3d.components.types.workflow import WorkflowDataType, WorkflowType from tidy3d.exceptions import DataError from tidy3d.log import get_logging_console, log from tidy3d.web.api import webapi as web -from tidy3d.web.api.tidy3d_stub import Tidy3dStub, Tidy3dStubData -from tidy3d.web.api.webapi import _get_simulation_data_from_cache_entry, get_reduced_simulation -from tidy3d.web.cache import TMP_BATCH_PREFIX, CacheEntry, resolve_simulation_cache +from tidy3d.web.api.tidy3d_stub import Tidy3dStub +from tidy3d.web.api.webapi import ( + restore_simulation_if_cached, +) +from tidy3d.web.cache import TMP_BATCH_PREFIX, resolve_simulation_cache from tidy3d.web.core.constants import TaskId, TaskName from tidy3d.web.core.task_core import Folder from tidy3d.web.core.task_info import RunInfo, TaskInfo @@ -253,16 +254,6 @@ def to_file(self, fname: str) -> None: self = self.updated_copy(task_id_cached=task_id_cached) super(Job, self).to_file(fname=fname) # noqa: UP008 - def get_cache_hit_entry(self) -> Optional[CacheEntry]: - simulation_cache = resolve_simulation_cache(self.use_cache) - if simulation_cache is not None: - sim_for_cache = self.simulation - if isinstance(self.simulation, (ModeSolver, ModeSimulation)): - sim_for_cache = get_reduced_simulation(self.simulation, self.reduce_simulation) - entry = simulation_cache.try_fetch(simulation=sim_for_cache) - return entry - return None - def run( self, path: str = DEFAULT_DATA_PATH, @@ -304,7 +295,7 @@ def data_cache_path(self) -> Optional[str]: "Temporary path where cached results are stored." cache = resolve_simulation_cache(self.use_cache) if cache is not None: - path = os.path.join(cache._root, TMP_BATCH_PREFIX, f"{self.task_name}.hdf5") + path = os.path.join(cache._root, TMP_BATCH_PREFIX, f"{self.task_id_if_cached}.hdf5") return path return None @@ -321,18 +312,22 @@ def load_if_cached(self) -> bool: if path is None: return False self._check_path_dir(path=path) - entry = self.get_cache_hit_entry() - if entry is not None: - loaded_from_cache = _get_simulation_data_from_cache_entry(entry, path) - if loaded_from_cache: - return True - return False + return restore_simulation_if_cached( + simulation=self.simulation, + path=path, + use_cache=self.use_cache, + reduce_simulation=self.reduce_simulation, + ) + + @property + def task_id_if_cached(self) -> str: + return "cached_" + self.task_name + "_" + str(uuid.uuid4()) @cached_property def task_id(self) -> TaskId: """The task ID for this ``Job``. Uploads the ``Job`` if it hasn't already been uploaded.""" if self.load_if_cached: - return "cached_" + self.task_name + "_" + str(uuid.uuid4()) + return self.task_id_if_cached if self.task_id_cached: return self.task_id_cached self._check_folder(self.folder_name) @@ -462,14 +457,14 @@ def load(self, path: str = DEFAULT_DATA_PATH) -> WorkflowDataType: """ if self.load_if_cached: self.move_cache_file(path=path) - data = Tidy3dStubData.postprocess(path) - return data + self._check_path_dir(path=path) data = web.load( task_id=self.task_id, path=path, verbose=self.verbose, use_cache=self.use_cache, + from_cache=self.load_if_cached, ) if isinstance(self.simulation, ModeSolver): self.simulation._patch_data(data=data) diff --git a/tidy3d/web/api/webapi.py b/tidy3d/web/api/webapi.py index 1fe1e56812..9dc9191c29 100644 --- a/tidy3d/web/api/webapi.py +++ b/tidy3d/web/api/webapi.py @@ -134,6 +134,23 @@ def _get_simulation_data_from_cache_entry(entry: CacheEntry, path: str) -> bool: return False +def restore_simulation_if_cached( + simulation: WorkflowType, + path: str, + use_cache: Optional[bool], + reduce_simulation: Literal["auto", True, False], +) -> bool: + simulation_cache = resolve_simulation_cache(use_cache) + loaded_from_cache = False + if simulation_cache is not None: + sim_for_cache = simulation + if isinstance(simulation, (ModeSolver, ModeSimulation)): + sim_for_cache = get_reduced_simulation(simulation, reduce_simulation) + entry = simulation_cache.try_fetch(simulation=sim_for_cache) + loaded_from_cache = _get_simulation_data_from_cache_entry(entry, path) + return loaded_from_cache + + @wait_for_connection def run( simulation: WorkflowType, @@ -239,14 +256,9 @@ def run( :meth:`tidy3d.web.api.container.Batch.monitor` Monitor progress of each of the running tasks. """ - simulation_cache = resolve_simulation_cache(use_cache) - loaded_from_cache = False - if simulation_cache is not None: - sim_for_cache = simulation - if isinstance(simulation, (ModeSolver, ModeSimulation)): - sim_for_cache = get_reduced_simulation(simulation, reduce_simulation) - entry = simulation_cache.try_fetch(simulation=sim_for_cache) - loaded_from_cache = _get_simulation_data_from_cache_entry(entry, path) + loaded_from_cache = restore_simulation_if_cached( + simulation=simulation, path=path, use_cache=use_cache, reduce_simulation=reduce_simulation + ) if not loaded_from_cache: task_id = upload( From 13892d84b98a0f9e3e6ff57fce9cf2c9b9e9e1cc Mon Sep 17 00:00:00 2001 From: marcorudolphflex Date: Fri, 10 Oct 2025 10:11:13 +0200 Subject: [PATCH 18/22] fix tests --- tests/test_web/test_simulation_cache.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/tests/test_web/test_simulation_cache.py b/tests/test_web/test_simulation_cache.py index c3234d9d3a..326e316dd7 100644 --- a/tests/test_web/test_simulation_cache.py +++ b/tests/test_web/test_simulation_cache.py @@ -6,7 +6,7 @@ import tidy3d as td from tidy3d import config -from tidy3d.web import Job, common, run_async +from tidy3d.web import Job, common, run_async, download from tidy3d.web import Job, run_async from tidy3d.web.api import webapi as web from tidy3d.web.api.container import WebContainer @@ -96,7 +96,7 @@ def _extract_simulation(kwargs): def _fake_upload(**kwargs): counters["upload"] += 1 - task_id = f"{MOCK_TASK_ID}{counters['upload']}" + task_id = f"{MOCK_TASK_ID}{kwargs["simulation"]._hash_self()}" sim = _extract_simulation(kwargs) if sim is not None: TASK_TO_SIM[task_id] = sim @@ -159,18 +159,19 @@ def _test_run_cache_hit(monkeypatch, tmp_path, basic_simulation, fake_data): assert counters == {"upload": 0, "start": 0, "monitor": 0, "download": 0} -def _test_run_cache_hit_async(monkeypatch, basic_simulation): +def _test_run_cache_hit_async(monkeypatch, basic_simulation, tmp_path): counters = _patch_run_pipeline(monkeypatch) monkeypatch.setattr(config.simulation_cache, "max_entries", 128) monkeypatch.setattr(config.simulation_cache, "max_size_gb", 10) cache = resolve_simulation_cache(use_cache=True) cache.clear() + _reset_fake_maps() _reset_counters(counters) sim2 = basic_simulation.updated_copy(shutoff=1e-4) sim3 = basic_simulation.updated_copy(shutoff=1e-3) - data = run_async({"task1": basic_simulation, "task2": sim2}, use_cache=True) + data = run_async({"task1": basic_simulation, "task2": sim2}, use_cache=True, path_dir=str(tmp_path)) data_task1 = data["task1"] # access to store in cache data_task2 = data["task2"] # access to store in cache assert counters["download"] == 2 @@ -179,16 +180,17 @@ def _test_run_cache_hit_async(monkeypatch, basic_simulation): assert len(cache) == 2 _reset_counters(counters) - run_async({"task1": basic_simulation, "task2": sim2}, use_cache=True) + run_async({"task1": basic_simulation, "task2": sim2}, use_cache=True, path_dir=str(tmp_path)) assert counters["download"] == 0 assert isinstance(data_task1, _FakeStubData) assert len(cache) == 2 _reset_counters(counters) - data = run_async({"task1": basic_simulation, "task3": sim3}, use_cache=True) + data = run_async({"task1": basic_simulation, "task3": sim3}, use_cache=True, path_dir=str(tmp_path)) data_task1 = data["task1"] data_task2 = data["task3"] # access to store in cache + print(counters["download"]) assert counters["download"] == 1 # sim3 is new assert isinstance(data_task1, _FakeStubData) assert isinstance(data_task2, _FakeStubData) @@ -320,5 +322,5 @@ def test_cache_end_to_end(monkeypatch, tmp_path, tmp_path_factory, basic_simulat _test_checksum_mismatch_triggers_refresh(monkeypatch, tmp_path, basic_simulation) _test_cache_eviction_by_entries(monkeypatch, tmp_path_factory, basic_simulation) _test_cache_eviction_by_size(monkeypatch, tmp_path_factory, basic_simulation) - _test_run_cache_hit_async(monkeypatch, basic_simulation) + _test_run_cache_hit_async(monkeypatch, basic_simulation, tmp_path) _test_job_run_cache(monkeypatch, tmp_path_factory, basic_simulation) From ae541e19359b294361a364fbf83c2f288a5efbdd Mon Sep 17 00:00:00 2001 From: marcorudolphflex Date: Fri, 10 Oct 2025 10:13:54 +0200 Subject: [PATCH 19/22] delete local test --- playground/cache_conf_test.py | 19 ------------------- 1 file changed, 19 deletions(-) delete mode 100644 playground/cache_conf_test.py diff --git a/playground/cache_conf_test.py b/playground/cache_conf_test.py deleted file mode 100644 index 83e652299e..0000000000 --- a/playground/cache_conf_test.py +++ /dev/null @@ -1,19 +0,0 @@ -from __future__ import annotations - -from tidy3d import config -from tidy3d.web.cache import resolve_simulation_cache - -# config.simulation_cache.max_size_gb = float(10_000 * 1e-9) -# cache = resolve_simulation_cache(use_cache=True) -# print(cache.config) -tmp_path = "dfsd" - -config.simulation_cache.enabled = True -config.simulation_cache.directory = tmp_path -config.simulation_cache.max_size_gb = 1.23 -config.simulation_cache.max_entries = 5 -cfg = resolve_simulation_cache().config -assert cfg.enabled is True -assert cfg.directory == tmp_path -assert cfg.max_size_gb == 1.23 -assert cfg.max_entries == 5 From 1cd8a82e48770834bbf515f85e7ea88b1f1774f3 Mon Sep 17 00:00:00 2001 From: marcorudolphflex Date: Fri, 10 Oct 2025 10:49:11 +0200 Subject: [PATCH 20/22] test autograd --- .../test_components/autograd/test_autograd.py | 2 +- tests/test_web/test_simulation_cache.py | 40 +++++++++++++++---- tidy3d/web/api/container.py | 4 -- 3 files changed, 34 insertions(+), 12 deletions(-) diff --git a/tests/test_components/autograd/test_autograd.py b/tests/test_components/autograd/test_autograd.py index cfd55079b0..95204a601f 100644 --- a/tests/test_components/autograd/test_autograd.py +++ b/tests/test_components/autograd/test_autograd.py @@ -662,7 +662,7 @@ def plot_sim(sim: td.Simulation, plot_eps: bool = True) -> None: # args = [("polyslab", "mode")] -def get_functions(structure_key: str, monitor_key: str) -> typing.Callable: +def get_functions(structure_key: str, monitor_key: str) -> dict[str, typing.Callable]: if structure_key == ALL_KEY: structure_keys = structure_keys_ else: diff --git a/tests/test_web/test_simulation_cache.py b/tests/test_web/test_simulation_cache.py index 326e316dd7..4725344a28 100644 --- a/tests/test_web/test_simulation_cache.py +++ b/tests/test_web/test_simulation_cache.py @@ -5,9 +5,9 @@ import pytest import tidy3d as td +from tests.test_components.autograd.test_autograd import ALL_KEY, get_functions, params0 from tidy3d import config -from tidy3d.web import Job, common, run_async, download -from tidy3d.web import Job, run_async +from tidy3d.web import Job, common, run_async from tidy3d.web.api import webapi as web from tidy3d.web.api.container import WebContainer from tidy3d.web.cache import ( @@ -96,7 +96,7 @@ def _extract_simulation(kwargs): def _fake_upload(**kwargs): counters["upload"] += 1 - task_id = f"{MOCK_TASK_ID}{kwargs["simulation"]._hash_self()}" + task_id = f"{MOCK_TASK_ID}{kwargs['simulation']._hash_self()}" sim = _extract_simulation(kwargs) if sim is not None: TASK_TO_SIM[task_id] = sim @@ -171,7 +171,9 @@ def _test_run_cache_hit_async(monkeypatch, basic_simulation, tmp_path): sim2 = basic_simulation.updated_copy(shutoff=1e-4) sim3 = basic_simulation.updated_copy(shutoff=1e-3) - data = run_async({"task1": basic_simulation, "task2": sim2}, use_cache=True, path_dir=str(tmp_path)) + data = run_async( + {"task1": basic_simulation, "task2": sim2}, use_cache=True, path_dir=str(tmp_path) + ) data_task1 = data["task1"] # access to store in cache data_task2 = data["task2"] # access to store in cache assert counters["download"] == 2 @@ -186,7 +188,9 @@ def _test_run_cache_hit_async(monkeypatch, basic_simulation, tmp_path): assert len(cache) == 2 _reset_counters(counters) - data = run_async({"task1": basic_simulation, "task3": sim3}, use_cache=True, path_dir=str(tmp_path)) + data = run_async( + {"task1": basic_simulation, "task3": sim3}, use_cache=True, path_dir=str(tmp_path) + ) data_task1 = data["task1"] data_task2 = data["task3"] # access to store in cache @@ -197,7 +201,7 @@ def _test_run_cache_hit_async(monkeypatch, basic_simulation, tmp_path): assert len(cache) == 3 -def _test_job_run_cache(monkeypatch, tmp_path_factory, basic_simulation): +def _test_job_run_cache(monkeypatch, basic_simulation): counters = _patch_run_pipeline(monkeypatch) cache = resolve_simulation_cache(use_cache=True) cache.clear() @@ -214,6 +218,27 @@ def _test_job_run_cache(monkeypatch, tmp_path_factory, basic_simulation): assert counters["download"] == 0 +@pytest.mark.parametrize("structure_key", ["polyslab"]) +@pytest.mark.parametrize("monitor_key", ["mode"]) +def _test_autograd_cache(monkeypatch, structure_key, monitor_key): + counters = _patch_run_pipeline(monkeypatch) + cache = resolve_simulation_cache(use_cache=True) + cache.clear() + + functions = get_functions(ALL_KEY, "mode") + make_sim = functions["sim"] + sim = make_sim(params0) + web.run(sim, use_cache=True) + assert counters["download"] == 1 + assert len(cache) == 1 + + _reset_counters(counters) + sim = make_sim(params0) + web.run(sim, use_cache=True) + assert counters["download"] == 0 + assert len(cache) == 1 + + def _test_load_cache_hit(monkeypatch, tmp_path, basic_simulation, fake_data): get_cache().clear() counters = _patch_run_pipeline(monkeypatch) @@ -323,4 +348,5 @@ def test_cache_end_to_end(monkeypatch, tmp_path, tmp_path_factory, basic_simulat _test_cache_eviction_by_entries(monkeypatch, tmp_path_factory, basic_simulation) _test_cache_eviction_by_size(monkeypatch, tmp_path_factory, basic_simulation) _test_run_cache_hit_async(monkeypatch, basic_simulation, tmp_path) - _test_job_run_cache(monkeypatch, tmp_path_factory, basic_simulation) + _test_job_run_cache(monkeypatch, basic_simulation) + _test_autograd_cache(monkeypatch, basic_simulation) diff --git a/tidy3d/web/api/container.py b/tidy3d/web/api/container.py index ec9bc4099f..ce83255168 100644 --- a/tidy3d/web/api/container.py +++ b/tidy3d/web/api/container.py @@ -18,15 +18,11 @@ from tidy3d.components.base import Tidy3dBaseModel, cached_property from tidy3d.components.mode.mode_solver import ModeSolver -from tidy3d.components.mode.simulation import ModeSimulation from tidy3d.components.types import annotate_type from tidy3d.components.types.workflow import WorkflowDataType, WorkflowType from tidy3d.exceptions import DataError from tidy3d.log import get_logging_console, log from tidy3d.web.api import webapi as web -from tidy3d.web.api.tidy3d_stub import Tidy3dStub, Tidy3dStubData -from tidy3d.web.api.webapi import _get_simulation_data_from_cache_entry, get_reduced_simulation -from tidy3d.web.cache import TMP_BATCH_PREFIX, CacheEntry, resolve_simulation_cache from tidy3d.web.api.tidy3d_stub import Tidy3dStub from tidy3d.web.api.webapi import ( restore_simulation_if_cached, From b0348be0f11b4a5daa52d2e86fa0439c6a3c2be3 Mon Sep 17 00:00:00 2001 From: marcorudolphflex Date: Fri, 10 Oct 2025 11:46:23 +0200 Subject: [PATCH 21/22] add convenience loader and remove workflow type from cache key --- tidy3d/web/api/container.py | 3 ++- tidy3d/web/api/webapi.py | 39 ++++++++++++++++++++++++++++--------- tidy3d/web/cache.py | 4 ---- 3 files changed, 32 insertions(+), 14 deletions(-) diff --git a/tidy3d/web/api/container.py b/tidy3d/web/api/container.py index ce83255168..a674bfcdc1 100644 --- a/tidy3d/web/api/container.py +++ b/tidy3d/web/api/container.py @@ -583,7 +583,8 @@ def load_sim_data(self, task_name: str) -> WorkflowDataType: task_data_path = self.task_paths[task_name] task_id = self.task_ids[task_name] from_cache = self.cached_tasks[task_name] if self.cached_tasks else False - web.get_info(task_id) + if not from_cache: + web.get_info(task_id) return web.load( task_id=task_id, diff --git a/tidy3d/web/api/webapi.py b/tidy3d/web/api/webapi.py index 9dc9191c29..767e2c6f5c 100644 --- a/tidy3d/web/api/webapi.py +++ b/tidy3d/web/api/webapi.py @@ -124,7 +124,7 @@ def _task_dict_to_url_bullet_list(data_dict: dict) -> str: return "\n".join([f"- {key}: '{value}'" for key, value in data_dict.items()]) -def _get_simulation_data_from_cache_entry(entry: CacheEntry, path: str) -> bool: +def _copy_simulation_data_from_cache_entry(entry: CacheEntry, path: str) -> bool: if entry is not None: try: entry.materialize(Path(path)) @@ -137,18 +137,39 @@ def _get_simulation_data_from_cache_entry(entry: CacheEntry, path: str) -> bool: def restore_simulation_if_cached( simulation: WorkflowType, path: str, - use_cache: Optional[bool], - reduce_simulation: Literal["auto", True, False], + use_cache: Optional[bool] = None, + reduce_simulation: Literal["auto", True, False] = "auto", ) -> bool: simulation_cache = resolve_simulation_cache(use_cache) - loaded_from_cache = False + copied_from_cache = False if simulation_cache is not None: sim_for_cache = simulation if isinstance(simulation, (ModeSolver, ModeSimulation)): sim_for_cache = get_reduced_simulation(simulation, reduce_simulation) entry = simulation_cache.try_fetch(simulation=sim_for_cache) - loaded_from_cache = _get_simulation_data_from_cache_entry(entry, path) - return loaded_from_cache + if entry is not None: + copied_from_cache = _copy_simulation_data_from_cache_entry(entry, path) + return copied_from_cache + + +def load_simulation_if_cached( + simulation: WorkflowType, + path: str, + use_cache: Optional[bool] = None, + reduce_simulation: Literal["auto", True, False] = "auto", +) -> Optional[WorkflowDataType]: + restored = restore_simulation_if_cached(simulation, path, use_cache, reduce_simulation) + if restored: + data = load( + task_id=None, + path=path, + from_cache=True, + ) + if isinstance(simulation, ModeSolver): + simulation._patch_data(data=data) + return data + else: + return None @wait_for_connection @@ -256,11 +277,11 @@ def run( :meth:`tidy3d.web.api.container.Batch.monitor` Monitor progress of each of the running tasks. """ - loaded_from_cache = restore_simulation_if_cached( + copied_from_cache = restore_simulation_if_cached( simulation=simulation, path=path, use_cache=use_cache, reduce_simulation=reduce_simulation ) - if not loaded_from_cache: + if not copied_from_cache: task_id = upload( simulation=simulation, task_name=task_name, @@ -291,7 +312,7 @@ def run( verbose=verbose, progress_callback=progress_callback_download, use_cache=use_cache, - from_cache=loaded_from_cache, + from_cache=copied_from_cache, lazy=lazy, ) diff --git a/tidy3d/web/cache.py b/tidy3d/web/cache.py index 693542ac9d..ad12f3592e 100644 --- a/tidy3d/web/cache.py +++ b/tidy3d/web/cache.py @@ -500,7 +500,6 @@ def try_fetch( cache_key = build_cache_key( simulation_hash=simulation_hash, - workflow_type=workflow_type, version=versions, ) @@ -538,7 +537,6 @@ def store_result( cache_key = build_cache_key( simulation_hash=simulation_hash, - workflow_type=workflow_type, version=version, ) @@ -658,14 +656,12 @@ def _canonicalize(value: Any) -> Any: def build_cache_key( *, simulation_hash: str, - workflow_type: str, version: str, ) -> str: """Construct a deterministic cache key.""" payload = { "simulation_hash": simulation_hash, - "workflow_type": workflow_type, "versions": _canonicalize(version), } encoded = json.dumps(payload, sort_keys=True, separators=(",", ":")).encode("utf-8") From 4539ed92b97420858307beda2863ba88cd208c45 Mon Sep 17 00:00:00 2001 From: marcorudolphflex Date: Tue, 14 Oct 2025 10:27:04 +0200 Subject: [PATCH 22/22] remove upload for cache, show url --- tests/test_web/test_simulation_cache.py | 6 ++-- tidy3d/web/api/container.py | 16 +++++++-- tidy3d/web/api/webapi.py | 47 ++++++++++++++++++------- tidy3d/web/cache.py | 42 +++++----------------- 4 files changed, 58 insertions(+), 53 deletions(-) diff --git a/tests/test_web/test_simulation_cache.py b/tests/test_web/test_simulation_cache.py index 4725344a28..161e042afb 100644 --- a/tests/test_web/test_simulation_cache.py +++ b/tests/test_web/test_simulation_cache.py @@ -218,9 +218,7 @@ def _test_job_run_cache(monkeypatch, basic_simulation): assert counters["download"] == 0 -@pytest.mark.parametrize("structure_key", ["polyslab"]) -@pytest.mark.parametrize("monitor_key", ["mode"]) -def _test_autograd_cache(monkeypatch, structure_key, monitor_key): +def _test_autograd_cache(monkeypatch): counters = _patch_run_pipeline(monkeypatch) cache = resolve_simulation_cache(use_cache=True) cache.clear() @@ -349,4 +347,4 @@ def test_cache_end_to_end(monkeypatch, tmp_path, tmp_path_factory, basic_simulat _test_cache_eviction_by_size(monkeypatch, tmp_path_factory, basic_simulation) _test_run_cache_hit_async(monkeypatch, basic_simulation, tmp_path) _test_job_run_cache(monkeypatch, basic_simulation) - _test_autograd_cache(monkeypatch, basic_simulation) + _test_autograd_cache(monkeypatch) diff --git a/tidy3d/web/api/container.py b/tidy3d/web/api/container.py index a674bfcdc1..d8ecf6eb12 100644 --- a/tidy3d/web/api/container.py +++ b/tidy3d/web/api/container.py @@ -566,6 +566,7 @@ class BatchData(Tidy3dBaseModel, Mapping): verbose: bool = pd.Field( True, title="Verbose", description="Whether to print info messages and progressbars." ) + cached_tasks: Optional[dict[TaskName, bool]] = pd.Field( None, title="Cached Tasks", @@ -578,6 +579,13 @@ class BatchData(Tidy3dBaseModel, Mapping): description="Whether to use local cache for retrieving Simulation results.", ) + is_downloaded: Optional[bool] = pd.Field( + False, + title="Is Downloaded", + description="Whether the simulation data was downloaded before.", + ) + + def load_sim_data(self, task_name: str) -> WorkflowDataType: """Load a simulation data object from file by task name.""" task_data_path = self.task_paths[task_name] @@ -592,7 +600,7 @@ def load_sim_data(self, task_name: str) -> WorkflowDataType: verbose=False, from_cache=from_cache, use_cache=self.use_cache, - replace_existing=False, + replace_existing=not (from_cache or self.is_downloaded), ) def __getitem__(self, task_name: TaskName) -> WorkflowDataType: @@ -1207,12 +1215,16 @@ def load(self, path_dir: str = DEFAULT_DATA_DIR, replace_existing: bool = False) task_ids[task_name] = self.jobs[task_name].task_id loaded = {task_name: job.load_if_cached for task_name, job in self.jobs.items()} + + self.download(path_dir=path_dir, replace_existing=replace_existing) + data = BatchData( task_paths=task_paths, task_ids=task_ids, verbose=self.verbose, cached_tasks=loaded, use_cache=self.use_cache, + is_downloaded=True, ) for task_name, job in self.jobs.items(): @@ -1220,8 +1232,6 @@ def load(self, path_dir: str = DEFAULT_DATA_DIR, replace_existing: bool = False) job_data = data[task_name] job.simulation._patch_data(data=job_data) - self.download(path_dir=path_dir, replace_existing=replace_existing) - return data def delete(self) -> None: diff --git a/tidy3d/web/api/webapi.py b/tidy3d/web/api/webapi.py index 767e2c6f5c..2fd4f17856 100644 --- a/tidy3d/web/api/webapi.py +++ b/tidy3d/web/api/webapi.py @@ -139,6 +139,7 @@ def restore_simulation_if_cached( path: str, use_cache: Optional[bool] = None, reduce_simulation: Literal["auto", True, False] = "auto", + verbose: bool = True, ) -> bool: simulation_cache = resolve_simulation_cache(use_cache) copied_from_cache = False @@ -146,9 +147,18 @@ def restore_simulation_if_cached( sim_for_cache = simulation if isinstance(simulation, (ModeSolver, ModeSimulation)): sim_for_cache = get_reduced_simulation(simulation, reduce_simulation) - entry = simulation_cache.try_fetch(simulation=sim_for_cache) + entry = simulation_cache.try_fetch(simulation=sim_for_cache, verbose=verbose) if entry is not None: copied_from_cache = _copy_simulation_data_from_cache_entry(entry, path) + cached_task_id = entry.metadata.get("task_id") + cached_workflow_type = entry.metadata.get("workflow_type") + if cached_task_id is not None and cached_workflow_type is not None and verbose: + console = get_logging_console() if verbose else None + url, _ = _get_task_urls( + cached_workflow_type, + simulation, + cached_task_id) + console.log(f"Loaded simulation from local cache.\nView cached task using web UI at [link={url}]'{url}'[/link].") return copied_from_cache @@ -278,7 +288,7 @@ def run( Monitor progress of each of the running tasks. """ copied_from_cache = restore_simulation_if_cached( - simulation=simulation, path=path, use_cache=use_cache, reduce_simulation=reduce_simulation + simulation=simulation, path=path, use_cache=use_cache, reduce_simulation=reduce_simulation, verbose=verbose ) if not copied_from_cache: @@ -320,6 +330,26 @@ def run( simulation._patch_data(data=data) return data +def _get_task_urls( + task_type: str, + simulation: WorkflowType, + resource_id: str, + folder_id: Optional[str] = None, + group_id: Optional[str] = None, +) -> tuple[str, Optional[str]]: + """Log task and folder links to the web UI.""" + print("task_type:", task_type) + if (task_type in ["RF", "COMPONENT_MODELER", "TERMINAL_COMPONENT_MODELER"]) and isinstance(simulation, TerminalComponentModeler): + url = _get_url_rf(group_id or resource_id) + else: + url = _get_url(resource_id) + + if folder_id is not None: + folder_url = _get_folder_url(folder_id) + else: + folder_url = None + return url, folder_url + @wait_for_connection def upload( @@ -441,16 +471,9 @@ def upload( f"Cost of {solver_name} simulations is subject to change in the future." ) if task_type in GUI_SUPPORTED_TASK_TYPES: - if (task_type == "RF") and (isinstance(simulation, TerminalComponentModeler)): - url = _get_url_rf(group_id or resource_id) - folder_url = _get_folder_url(task.folder_id) - console.log(f"View task using web UI at [link={url}]'{url}'[/link].") - console.log(f"Task folder: [link={folder_url}]'{task.folder_name}'[/link].") - else: - url = _get_url(resource_id) - folder_url = _get_folder_url(task.folder_id) - console.log(f"View task using web UI at [link={url}]'{url}'[/link].") - console.log(f"Task folder: [link={folder_url}]'{task.folder_name}'[/link].") + url, folder_url = _get_task_urls(task_type, simulation, resource_id, task.folder_id, group_id) + console.log(f"View task using web UI at [link={url}]'{url}'[/link].") + console.log(f"Task folder: [link={folder_url}]'{task.folder_name}'[/link].") remote_sim_file = SIM_FILE_HDF5_GZ if task_type == "MODE_SOLVER": diff --git a/tidy3d/web/cache.py b/tidy3d/web/cache.py index ad12f3592e..d661e74ea2 100644 --- a/tidy3d/web/cache.py +++ b/tidy3d/web/cache.py @@ -301,27 +301,13 @@ def _fetch(self, key: str) -> Optional[CacheEntry]: self._touch(entry) return entry - def fetch_by_task(self, task_id: str) -> Optional[CacheEntry]: - """Retrieve an entry by task id.""" - with self._lock: - for entry in self._iter_entries(): - metadata = entry.metadata - task_ids = metadata.get("task_ids", []) - if task_id in task_ids and entry.exists(): - if not entry.verify(): - self._remove_entry(entry) - return None - self._touch(entry) - return entry - return None - def __len__(self) -> int: """Return number of valid cache entries.""" with self._lock: return sum(1 for _ in self._iter_entries()) def _store( - self, key: str, task_id: Optional[str], source_path: Path, metadata: dict[str, Any] + self, key: str, source_path: Path, metadata: dict[str, Any] ) -> Optional[CacheEntry]: """Store a new cache entry from ``source_path``. @@ -329,8 +315,6 @@ def _store( ---------- key : str Cache key computed from simulation hash and runtime context. - task_id : str, optional - Server task id associated with this artifact. source_path : Path Location of the artifact to cache. metadata : dict[str, Any] @@ -358,11 +342,6 @@ def _store( metadata["last_used"] = now_iso metadata["checksum"] = checksum metadata["file_size"] = file_size - if task_id: - task_ids = list(metadata.get("task_ids", [])) - if task_id not in task_ids: - task_ids.append(task_id) - metadata["task_ids"] = task_ids _write_metadata(tmp_meta, metadata) try: @@ -508,7 +487,7 @@ def try_fetch( return None if verbose: log.info( - "Simulation cache hit for workflow '%s'; using local results.", workflow_type + f"Simulation cache hit for workflow '{workflow_type}'; using local results." ) return entry @@ -543,16 +522,13 @@ def store_result( metadata = build_entry_metadata( simulation_hash=simulation_hash, workflow_type=workflow_type, - runtime_context={ - "task_id": task_id, - }, + task_id=task_id, version=version, - extras={"path": str(Path(path))}, + path=Path(path), ) self._store( key=cache_key, - task_id=task_id, # keeps a reverse link for legacy fetch_by_task source_path=Path(path), metadata=metadata, ) @@ -672,19 +648,17 @@ def build_entry_metadata( *, simulation_hash: str, workflow_type: str, - runtime_context: dict[str, Any], + task_id: str, version: str, - extras: Optional[dict[str, Any]] = None, + path: Path, ) -> dict[str, Any]: """Create metadata dictionary for a cache entry.""" metadata: dict[str, Any] = { "simulation_hash": simulation_hash, "workflow_type": workflow_type, - "runtime_context": _canonicalize(runtime_context), "versions": _canonicalize(version), - "task_ids": [], + "task_id": task_id, + "path": str(path), } - if extras: - metadata.update(_canonicalize(extras)) return metadata