diff --git a/dimos/core/native_module.py b/dimos/core/native_module.py index 74471f34d5..5e4165bdae 100644 --- a/dimos/core/native_module.py +++ b/dimos/core/native_module.py @@ -40,7 +40,6 @@ class MyCppModule(NativeModule): from __future__ import annotations -import collections import enum import inspect import json @@ -56,6 +55,7 @@ class MyCppModule(NativeModule): from dimos.core.core import rpc from dimos.core.module import Module, ModuleConfig +from dimos.utils.change_detect import PathEntry, did_change, update_cache from dimos.utils.logging_config import setup_logger if sys.version_info < (3, 13): @@ -81,9 +81,10 @@ class NativeModuleConfig(ModuleConfig): extra_env: dict[str, str] = Field(default_factory=dict) shutdown_timeout: float = 10.0 log_format: LogFormat = LogFormat.TEXT + rebuild_on_change: list[PathEntry] | None = None # Override in subclasses to exclude fields from CLI arg generation - cli_exclude: frozenset[str] = frozenset() + cli_exclude: frozenset[str] = frozenset({"rebuild_on_change"}) def to_cli_args(self) -> list[str]: """Auto-convert subclass config fields to CLI args. @@ -132,11 +133,9 @@ class NativeModule(Module[_NativeConfig]): _process: subprocess.Popen[bytes] | None = None _watchdog: threading.Thread | None = None _stopping: bool = False - _last_stderr_lines: collections.deque[str] def __init__(self, **kwargs: Any) -> None: super().__init__(**kwargs) - self._last_stderr_lines = collections.deque(maxlen=50) self._resolve_paths() @rpc @@ -158,13 +157,7 @@ def start(self) -> None: env = {**os.environ, **self.config.extra_env} cwd = self.config.cwd or str(Path(self.config.executable).resolve().parent) - module_name = type(self).__name__ - logger.info( - f"Starting native process: {module_name}", - module=module_name, - cmd=" ".join(cmd), - cwd=cwd, - ) + logger.info("Starting native process", cmd=" ".join(cmd), cwd=cwd) self._process = subprocess.Popen( cmd, env=env, @@ -172,11 +165,7 @@ def start(self) -> None: stdout=subprocess.PIPE, stderr=subprocess.PIPE, ) - logger.info( - f"Native process started: {module_name}", - module=module_name, - pid=self._process.pid, - ) + logger.info("Native process started", pid=self._process.pid) self._stopping = False self._watchdog = threading.Thread(target=self._watch_process, daemon=True) @@ -199,8 +188,11 @@ def stop(self) -> None: if self._watchdog is not None and self._watchdog is not threading.current_thread(): self._watchdog.join(timeout=2) self._watchdog = None - self._process = None + # Clean up the asyncio loop thread (from ModuleBase) BEFORE + # clearing _process — tests use _process=None as their exit + # signal, and the loop thread must be joined first. super().stop() + self._process = None def _watch_process(self) -> None: """Block until the native process exits; trigger stop() if it crashed.""" @@ -215,20 +207,10 @@ def _watch_process(self) -> None: if self._stopping: return - - module_name = type(self).__name__ - exe_name = Path(self.config.executable).name if self.config.executable else "unknown" - - # Use buffered stderr lines from the reader thread for the crash report. - last_stderr = "\n".join(self._last_stderr_lines) - logger.error( - f"Native process crashed: {module_name} ({exe_name})", - module=module_name, - executable=exe_name, + "Native process died unexpectedly", pid=self._process.pid, returncode=rc, - last_stderr=last_stderr[:500] if last_stderr else None, ) self.stop() @@ -242,13 +224,10 @@ def _read_log_stream(self, stream: IO[bytes] | None, level: str) -> None: if stream is None: return log_fn = getattr(logger, level) - is_stderr = level == "warning" for raw in stream: line = raw.decode("utf-8", errors="replace").rstrip() if not line: continue - if is_stderr: - self._last_stderr_lines.append(line) if self.config.log_format == LogFormat.JSON: try: data = json.loads(line) @@ -269,18 +248,44 @@ def _resolve_paths(self) -> None: if not Path(self.config.executable).is_absolute() and self.config.cwd is not None: self.config.executable = str(Path(self.config.cwd) / self.config.executable) + def _build_cache_name(self) -> str: + """Return a stable, unique cache name for this module's build state.""" + source_file = Path(inspect.getfile(type(self))).resolve() + return f"native_{source_file}:{type(self).__qualname__}" + def _maybe_build(self) -> None: - """Run ``build_command`` if the executable does not exist.""" + """Run ``build_command`` if the executable does not exist or sources changed.""" exe = Path(self.config.executable) - if exe.exists(): + + # Check if rebuild needed due to source changes. + # Use update=False so the cache is NOT written yet — if the build + # fails the next check will still detect changes and retry. + needs_rebuild = False + if self.config.rebuild_on_change and exe.exists(): + if did_change( + self._build_cache_name(), + self.config.rebuild_on_change, + cwd=self.config.cwd, + update=False, + ): + logger.info("Source files changed, triggering rebuild", executable=str(exe)) + needs_rebuild = True + + if exe.exists() and not needs_rebuild: return + if self.config.build_command is None: raise FileNotFoundError( f"Executable not found: {exe}. " "Set build_command in config to auto-build, or build it manually." ) + + # Don't unlink the exe before rebuilding — the build command is + # responsible for replacing it. For nix builds the exe lives inside + # a read-only store; `nix build -o` atomically swaps the output + # symlink without touching store contents. logger.info( - "Executable not found, running build", + "Rebuilding" if needs_rebuild else "Executable not found, building", executable=str(exe), build_command=self.config.build_command, ) @@ -300,16 +305,20 @@ def _maybe_build(self) -> None: if line.strip(): logger.warning(line) if proc.returncode != 0: - stderr_tail = stderr.decode("utf-8", errors="replace").strip()[-1000:] raise RuntimeError( - f"Build command failed (exit {proc.returncode}): {self.config.build_command}\n" - f"stderr: {stderr_tail}" + f"Build command failed (exit {proc.returncode}): {self.config.build_command}" ) if not exe.exists(): raise FileNotFoundError( - f"Build command succeeded but executable still not found: {exe}\n" - f"Build output may have been written to a different path. " - f"Check that build_command produces the executable at the expected location." + f"Build command succeeded but executable still not found: {exe}" + ) + + # Seed the cache after a successful build so the next check has a baseline. + # Uses update_cache (not did_change) so we only write the hash after a + # confirmed-good build — a failed build won't poison the cache. + if self.config.rebuild_on_change: + update_cache( + self._build_cache_name(), self.config.rebuild_on_change, cwd=self.config.cwd ) def _collect_topics(self) -> dict[str, str]: diff --git a/dimos/core/test_native_module.py b/dimos/core/test_native_module.py index d5976e12e2..c679713f26 100644 --- a/dimos/core/test_native_module.py +++ b/dimos/core/test_native_module.py @@ -99,13 +99,16 @@ def test_process_crash_triggers_stop() -> None: assert mod._process is not None pid = mod._process.pid - # Wait for the process to die and the watchdog to call stop() - for _ in range(30): - time.sleep(0.1) - if mod._process is None: - break + try: + # Wait for the process to die and the watchdog to call stop() + for _ in range(30): + time.sleep(0.1) + if mod._process is None: + break - assert mod._process is None, f"Watchdog did not clean up after process {pid} died" + assert mod._process is None, f"Watchdog did not clean up after process {pid} died" + finally: + mod.stop() # Wait for background threads (run_forever, _lcm_loop, _watch_process) to finish # after the watchdog-triggered stop(). Without this, monitor_threads catches them. diff --git a/dimos/core/test_native_rebuild.py b/dimos/core/test_native_rebuild.py new file mode 100644 index 0000000000..6f8a68b9aa --- /dev/null +++ b/dimos/core/test_native_rebuild.py @@ -0,0 +1,140 @@ +# Copyright 2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for NativeModule rebuild-on-change integration.""" + +from __future__ import annotations + +from pathlib import Path +import stat + +import pytest + +from dimos.core.native_module import NativeModule, NativeModuleConfig +from dimos.utils.change_detect import PathEntry + + +@pytest.fixture(autouse=True) +def _use_tmp_cache(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None: + """Redirect the change-detection cache to a temp dir for every test.""" + monkeypatch.setattr( + "dimos.utils.change_detect._get_cache_dir", + lambda: tmp_path / "cache", + ) + + +@pytest.fixture() +def build_env(tmp_path: Path) -> dict[str, Path]: + """Set up a temp directory with a source file, executable path, and marker path.""" + src = tmp_path / "src" + src.mkdir() + (src / "main.c").write_text("int main() { return 0; }") + + exe = tmp_path / "mybin" + marker = tmp_path / "build_ran.marker" + + # Build script: create the executable and a marker file + build_script = tmp_path / "build.sh" + build_script.write_text(f"#!/bin/sh\ntouch {exe}\nchmod +x {exe}\ntouch {marker}\n") + build_script.chmod(build_script.stat().st_mode | stat.S_IEXEC) + + return {"src": src, "exe": exe, "marker": marker, "build_script": build_script} + + +class _RebuildConfig(NativeModuleConfig): + executable: str = "" + rebuild_on_change: list[PathEntry] | None = None + + +class _RebuildModule(NativeModule[_RebuildConfig]): + default_config = _RebuildConfig + + +def _make_module(build_env: dict[str, Path]) -> _RebuildModule: + """Create a _RebuildModule pointing at the temp build env.""" + return _RebuildModule( + executable=str(build_env["exe"]), + build_command=f"sh {build_env['build_script']}", + rebuild_on_change=[str(build_env["src"])], + cwd=str(build_env["src"]), + ) + + +def test_rebuild_on_change_triggers_build(build_env: dict[str, Path]) -> None: + """When source files change, the build_command should re-run.""" + mod = _make_module(build_env) + try: + exe = build_env["exe"] + marker = build_env["marker"] + + # First build: exe doesn't exist → build runs + mod._maybe_build() + assert exe.exists() + assert marker.exists() + marker.unlink() + + # No change → build should NOT run + mod._maybe_build() + assert not marker.exists() + + # Modify source → build SHOULD run + (build_env["src"] / "main.c").write_text("int main() { return 1; }") + mod._maybe_build() + assert marker.exists(), "Build should have re-run after source change" + finally: + mod.stop() + + +def test_no_change_skips_rebuild(build_env: dict[str, Path]) -> None: + """When sources haven't changed, build_command must not run again.""" + mod = _make_module(build_env) + try: + marker = build_env["marker"] + + # Initial build + mod._maybe_build() + assert marker.exists() + marker.unlink() + + # Second call — nothing changed + mod._maybe_build() + assert not marker.exists(), "Build should have been skipped (no source changes)" + finally: + mod.stop() + + +def test_rebuild_on_change_none_skips_check(build_env: dict[str, Path]) -> None: + """When rebuild_on_change is None, no change detection happens at all.""" + exe = build_env["exe"] + marker = build_env["marker"] + + mod = _RebuildModule( + executable=str(exe), + build_command=f"sh {build_env['build_script']}", + rebuild_on_change=None, + cwd=str(build_env["src"]), + ) + try: + # Initial build + mod._maybe_build() + assert exe.exists() + assert marker.exists() + marker.unlink() + + # Modify source — but rebuild_on_change is None, so no rebuild + (build_env["src"] / "main.c").write_text("int main() { return 1; }") + mod._maybe_build() + assert not marker.exists(), "Should not rebuild when rebuild_on_change is None" + finally: + mod.stop() diff --git a/dimos/manipulation/planning/utils/mesh_utils.py b/dimos/manipulation/planning/utils/mesh_utils.py index 988a4e5e8e..3278fa70b2 100644 --- a/dimos/manipulation/planning/utils/mesh_utils.py +++ b/dimos/manipulation/planning/utils/mesh_utils.py @@ -38,6 +38,7 @@ import tempfile from typing import TYPE_CHECKING +from dimos.utils.change_detect import did_change from dimos.utils.logging_config import setup_logger if TYPE_CHECKING: @@ -76,14 +77,15 @@ def prepare_urdf_for_drake( package_paths = package_paths or {} xacro_args = xacro_args or {} - # Generate cache key + # Generate cache key from configuration (not file content — did_change handles that) cache_key = _generate_cache_key(urdf_path, package_paths, xacro_args, convert_meshes) cache_path = _CACHE_DIR / cache_key / urdf_path.stem cache_path.mkdir(parents=True, exist_ok=True) cached_urdf = cache_path / f"{urdf_path.stem}.urdf" - # Check cache - if cached_urdf.exists(): + # Check cache: reuse only if the output exists AND the source file hasn't changed + source_changed = did_change(f"urdf_{cache_key}", [str(urdf_path)]) + if cached_urdf.exists() and not source_changed: logger.debug(f"Using cached URDF: {cached_urdf}") return str(cached_urdf) @@ -118,16 +120,15 @@ def _generate_cache_key( ) -> str: """Generate a cache key for the URDF configuration. - Includes a version number to invalidate cache when processing logic changes. + Encodes the configuration inputs (not file content — ``did_change`` handles + content-based invalidation separately). Includes a version number to + invalidate the cache when processing logic changes. """ - # Include file modification time - mtime = urdf_path.stat().st_mtime if urdf_path.exists() else 0 - # Version number to invalidate cache when processing logic changes # Increment this when adding new processing steps (e.g., stripping transmission blocks) - processing_version = "v2" + processing_version = "v3" - key_data = f"{processing_version}:{urdf_path}:{mtime}:{sorted(package_paths.items())}:{sorted(xacro_args.items())}:{convert_meshes}" + key_data = f"{processing_version}:{urdf_path}:{sorted(package_paths.items())}:{sorted(xacro_args.items())}:{convert_meshes}" return hashlib.md5(key_data.encode()).hexdigest()[:16] diff --git a/dimos/utils/change_detect.py b/dimos/utils/change_detect.py new file mode 100644 index 0000000000..1b92e0ef64 --- /dev/null +++ b/dimos/utils/change_detect.py @@ -0,0 +1,316 @@ +# Copyright 2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Change detection utility for file content hashing. + +Tracks whether a set of files (by path, directory, or glob pattern) have +changed since the last check. Useful for skipping expensive rebuilds when +source files haven't been modified. + +Path entries are type-dispatched: + +- ``str`` / ``Path`` / ``LfsPath`` — treated as **literal** file or directory + paths (no glob expansion, even if the path contains ``*``). +- ``Glob`` — expanded with :func:`glob.glob` to match filesystem patterns. +""" + +from __future__ import annotations + +from collections.abc import Sequence +import fcntl +import glob as glob_mod +import hashlib +import os +from pathlib import Path +import threading +from typing import Union + +import xxhash + +from dimos.utils.data import LfsPath +from dimos.utils.logging_config import setup_logger + +logger = setup_logger() + + +class Glob(str): + """A string that should be interpreted as a filesystem glob pattern. + + Wraps a plain ``str`` to signal that :func:`did_change` should expand it + with :func:`glob.glob` rather than treating it as a literal path. + + Example:: + + Glob("src/**/*.c") + """ + + +PathEntry = Union[str, Path, LfsPath, Glob] +"""A single entry in a change-detection path list.""" + + +def _get_cache_dir() -> Path: + """Return the directory used to store change-detection cache files. + + Uses ``/dimos_cache/change_detect/`` when running inside a + venv, otherwise falls back to ``~/.cache/dimos/change_detect/``. + """ + venv = os.environ.get("VIRTUAL_ENV") + if venv: + return Path(venv) / "dimos_cache" / "change_detect" + return Path.home() / ".cache" / "dimos" / "change_detect" + + +def _safe_filename(cache_name: str) -> str: + """Convert an arbitrary cache name into a safe filename. + + If the cache name is already a simple identifier it is returned as-is. + Otherwise a short SHA-256 prefix is appended so that names containing + path separators or other special characters produce unique, safe filenames. + """ + safe_chars = set("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_-") + if all(c in safe_chars for c in cache_name) and len(cache_name) <= 200: + return cache_name + digest = hashlib.sha256(cache_name.encode()).hexdigest()[:16] + return digest + + +def _add_path(files: set[Path], p: Path) -> None: + """Add *p* (file or directory, walked recursively) to *files*.""" + if p.is_file(): + files.add(p.resolve()) + elif p.is_dir(): + for root, _dirs, filenames in os.walk(p): + for fname in filenames: + files.add(Path(root, fname).resolve()) + + +def _resolve_paths(paths: Sequence[PathEntry], cwd: str | Path | None = None) -> list[Path]: + """Resolve a mixed list of path entries into a sorted list of files. + + ``Glob`` entries are expanded via :func:`glob.glob`. All other types + (``str``, ``Path``, ``LfsPath``) are treated as literal paths — no + wildcard expansion is performed. + + When *cwd* is provided, relative paths are resolved against it. + When *cwd* is ``None``, relative paths raise :class:`ValueError`. + """ + files: set[Path] = set() + for entry in paths: + if isinstance(entry, Glob): + pattern = str(entry) + if not Path(pattern).is_absolute(): + if cwd is None: + raise ValueError( + f"Relative path {pattern!r} passed to change detection without a cwd. " + "Either provide an absolute path or pass cwd= so relatives can be resolved." + ) + pattern = str(Path(cwd) / pattern) + expanded = glob_mod.glob(pattern, recursive=True) + if not expanded: + logger.warning("Glob pattern matched no files", pattern=pattern) + continue + for match in expanded: + _add_path(files, Path(match)) + else: + # str, Path, LfsPath — literal path, no glob expansion + path_str = str(entry) + if not Path(path_str).is_absolute(): + if cwd is None: + raise ValueError( + f"Relative path {path_str!r} passed to change detection without a cwd. " + "Either provide an absolute path or pass cwd= so relatives can be resolved." + ) + path_str = str(Path(cwd) / path_str) + p = Path(path_str) + if not p.exists(): + logger.warning("Path does not exist", path=path_str) + continue + _add_path(files, p) + return sorted(files) + + +def _hash_files(files: list[Path]) -> str: + """Compute an aggregate xxhash digest over the sorted file list.""" + h = xxhash.xxh64() + for fpath in files: + try: + # Include the path so additions/deletions/renames are detected + h.update(str(fpath).encode()) + h.update(fpath.read_bytes()) + except (OSError, PermissionError): + logger.warning("Cannot read file for hashing", path=str(fpath)) + return h.hexdigest() + + +# Thread-level locks keyed by cache_name (flock only protects cross-process). +_thread_locks: dict[str, threading.Lock] = {} +_thread_locks_guard = threading.Lock() + + +def _get_thread_lock(cache_name: str) -> threading.Lock: + with _thread_locks_guard: + if cache_name not in _thread_locks: + _thread_locks[cache_name] = threading.Lock() + return _thread_locks[cache_name] + + +def did_change( + cache_name: str, + paths: Sequence[PathEntry], + cwd: str | Path | None = None, + *, + update: bool = True, +) -> bool: + """Check if any files/dirs matching the given paths have changed since last check. + + Examples:: + + # Absolute paths — no cwd needed + did_change("my_build", ["/src/main.cpp"]) + + # Use Glob for wildcard patterns (str is always literal) + did_change("c_sources", [Glob("/src/**/*.c"), Glob("/include/**/*.h")]) + + # Relative paths — must pass cwd + did_change("my_build", ["src/main.cpp"], cwd="/home/user/project") + + # Mix literal paths and globs + did_change("config_check", ["config.yaml", Glob("templates/*.j2")], cwd="/project") + + # Track a whole directory (walked recursively) + did_change("assets", ["/data/models/"]) + + # Check without updating (dry run) + did_change("my_build", ["/src/main.cpp"], update=False) + + # Second call with no file changes → False + did_change("my_build", ["/src/main.cpp"]) # True (first call, no cache) + did_change("my_build", ["/src/main.cpp"]) # False (nothing changed) + + # After editing a file → True again + Path("/src/main.cpp").write_text("// changed") + did_change("my_build", ["/src/main.cpp"]) # True + + # Relative path without cwd → ValueError + did_change("bad", ["src/main.cpp"]) # raises ValueError + + Args: + cache_name: Unique identifier for this change-detection cache. + paths: Files, directories, or :class:`Glob` patterns to monitor. + cwd: Working directory for resolving relative paths. + update: If ``True`` (default), update the cache with the current hash + after checking. Set to ``False`` to check without updating — this + lets the caller decide whether to update (e.g. only after a + successful build via :func:`update_cache`). + + Returns ``True`` on the first call (no previous cache), and on subsequent + calls returns ``True`` only if file contents differ from the last check. + When *update* is ``True`` the cache is updated, so two consecutive calls + with no changes return ``True`` then ``False``. + """ + if not paths: + return False + + files = _resolve_paths(paths, cwd=cwd) + + # If none of the monitored paths resolve to actual files (e.g. source + # files don't exist on this branch or checkout), don't claim anything + # changed — deleting a working binary because we can't find the sources + # to compare against is destructive. + if not files: + logger.warning( + "No source files found for change detection, skipping rebuild check", + cache_name=cache_name, + ) + return False + + current_hash = _hash_files(files) + + cache_dir = _get_cache_dir() + cache_dir.mkdir(parents=True, exist_ok=True) + cache_file = cache_dir / f"{_safe_filename(cache_name)}.hash" + lock_file = cache_dir / f"{_safe_filename(cache_name)}.lock" + + changed = True + thread_lock = _get_thread_lock(cache_name) + with thread_lock, open(lock_file, "w") as lf: + fcntl.flock(lf, fcntl.LOCK_EX) + try: + if cache_file.exists(): + previous_hash = cache_file.read_text().strip() + changed = current_hash != previous_hash + # Only update the cache when requested — allows callers to defer + # the update until after a successful build so that a failed build + # doesn't prevent future rebuild attempts. + if update: + cache_file.write_text(current_hash) + finally: + fcntl.flock(lf, fcntl.LOCK_UN) + + return changed + + +def update_cache( + cache_name: str, + paths: Sequence[PathEntry], + cwd: str | Path | None = None, +) -> None: + """Write the current file hash to the cache without checking for changes. + + Call this after a successful build to record the current state so that the + next :func:`did_change` call returns ``False`` (unless files change again). + + Example:: + + if did_change("my_build", sources, update=False): + run_build() # might fail + update_cache("my_build", sources) # only update on success + """ + if not paths: + return + + files = _resolve_paths(paths, cwd=cwd) + if not files: + return + + current_hash = _hash_files(files) + + cache_dir = _get_cache_dir() + cache_dir.mkdir(parents=True, exist_ok=True) + cache_file = cache_dir / f"{_safe_filename(cache_name)}.hash" + lock_file = cache_dir / f"{_safe_filename(cache_name)}.lock" + + thread_lock = _get_thread_lock(cache_name) + with thread_lock, open(lock_file, "w") as lf: + fcntl.flock(lf, fcntl.LOCK_EX) + try: + cache_file.write_text(current_hash) + finally: + fcntl.flock(lf, fcntl.LOCK_UN) + + +def clear_cache(cache_name: str) -> bool: + """Remove the cached hash so the next ``did_change`` call returns ``True``. + + Example:: + + clear_cache("my_build") + did_change("my_build", ["/src/main.c"]) # always True after clear + """ + cache_file = _get_cache_dir() / f"{_safe_filename(cache_name)}.hash" + if cache_file.exists(): + cache_file.unlink() + return True + return False diff --git a/dimos/utils/test_change_detect.py b/dimos/utils/test_change_detect.py new file mode 100644 index 0000000000..144436c1c3 --- /dev/null +++ b/dimos/utils/test_change_detect.py @@ -0,0 +1,164 @@ +# Copyright 2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for the change detection utility.""" + +from __future__ import annotations + +from pathlib import Path + +import pytest + +from dimos.utils.change_detect import Glob, clear_cache, did_change, update_cache + + +@pytest.fixture(autouse=True) +def _use_tmp_cache(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None: + """Redirect the change-detection cache to a temp dir for every test.""" + monkeypatch.setattr( + "dimos.utils.change_detect._get_cache_dir", + lambda: tmp_path / "cache", + ) + + +@pytest.fixture() +def src_dir(tmp_path: Path) -> Path: + """A temp directory with two source files for testing.""" + d = tmp_path / "src" + d.mkdir() + (d / "a.c").write_text("int main() { return 0; }") + (d / "b.c").write_text("void helper() {}") + return d + + +def test_first_call_returns_true(src_dir: Path) -> None: + assert did_change("test_cache", [str(src_dir)]) is True + + +def test_second_call_no_change_returns_false(src_dir: Path) -> None: + did_change("test_cache", [str(src_dir)]) + assert did_change("test_cache", [str(src_dir)]) is False + + +def test_file_modified_returns_true(src_dir: Path) -> None: + did_change("test_cache", [str(src_dir)]) + (src_dir / "a.c").write_text("int main() { return 1; }") + assert did_change("test_cache", [str(src_dir)]) is True + + +def test_file_added_to_dir_returns_true(src_dir: Path) -> None: + did_change("test_cache", [str(src_dir)]) + (src_dir / "c.c").write_text("void new_func() {}") + assert did_change("test_cache", [str(src_dir)]) is True + + +def test_file_deleted_returns_true(src_dir: Path) -> None: + did_change("test_cache", [str(src_dir)]) + (src_dir / "b.c").unlink() + assert did_change("test_cache", [str(src_dir)]) is True + + +def test_glob_pattern(src_dir: Path) -> None: + pattern = Glob(str(src_dir / "*.c")) + assert did_change("glob_cache", [pattern]) is True + assert did_change("glob_cache", [pattern]) is False + (src_dir / "a.c").write_text("changed!") + assert did_change("glob_cache", [pattern]) is True + + +def test_str_with_glob_chars_is_literal(tmp_path: Path) -> None: + """A plain str containing '*' must NOT be glob-expanded.""" + weird_name = tmp_path / "file[1].txt" + weird_name.write_text("content") + # str path — treated literally, should find the file + assert did_change("literal_test", [str(weird_name)]) is True + assert did_change("literal_test", [str(weird_name)]) is False + + +def test_separate_cache_names_independent(src_dir: Path) -> None: + paths = [str(src_dir)] + did_change("cache_a", paths) + did_change("cache_b", paths) + # Both caches are now up-to-date + assert did_change("cache_a", paths) is False + assert did_change("cache_b", paths) is False + # Modify a file — both caches should report changed independently + (src_dir / "a.c").write_text("changed") + assert did_change("cache_a", paths) is True + # cache_b hasn't been checked since the change + assert did_change("cache_b", paths) is True + + +def test_clear_cache(src_dir: Path) -> None: + paths = [str(src_dir)] + did_change("clear_test", paths) + assert did_change("clear_test", paths) is False + assert clear_cache("clear_test") is True + assert did_change("clear_test", paths) is True + + +def test_clear_cache_nonexistent() -> None: + assert clear_cache("does_not_exist") is False + + +def test_empty_paths_returns_false() -> None: + assert did_change("empty_test", []) is False + + +def test_nonexistent_path_warns() -> None: + """A non-existent absolute path logs a warning and returns False (no files → skip rebuild).""" + result = did_change("missing_test", ["/nonexistent/path/to/file.c"]) + assert result is False + + +def test_relative_path_without_cwd_raises() -> None: + """Relative paths without cwd= should raise ValueError.""" + with pytest.raises(ValueError, match="Relative path.*without a cwd"): + did_change("rel_test", ["some/relative/path.c"]) + + +def test_relative_path_with_cwd(src_dir: Path) -> None: + """Relative paths should resolve against the provided cwd.""" + assert did_change("cwd_test", ["src/a.c"], cwd=src_dir.parent) is True + assert did_change("cwd_test", ["src/a.c"], cwd=src_dir.parent) is False + + +def test_update_false_does_not_write_cache(src_dir: Path) -> None: + """With update=False, repeated calls keep returning True (cache not updated).""" + paths = [str(src_dir)] + assert did_change("no_update", paths, update=False) is True + # Cache was not written, so still reports changed + assert did_change("no_update", paths, update=False) is True + # Now update explicitly + update_cache("no_update", paths) + # Cache is current, no change + assert did_change("no_update", paths, update=False) is False + + +def test_update_cache_after_build(src_dir: Path) -> None: + """Simulates the build workflow: check without update, build, then update.""" + paths = [str(src_dir)] + # First check — no cache yet + assert did_change("build_test", paths, update=False) is True + # Simulate successful build → update cache + update_cache("build_test", paths) + # No changes since update + assert did_change("build_test", paths, update=False) is False + # Modify a file + (src_dir / "a.c").write_text("int main() { return 42; }") + # Now detects the change + assert did_change("build_test", paths, update=False) is True + # Simulate failed build — don't call update_cache + # Next check still sees the change + assert did_change("build_test", paths, update=False) is True diff --git a/pyproject.toml b/pyproject.toml index 7e2f38546e..7ff8013cc8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -63,6 +63,7 @@ dependencies = [ "annotation-protocol>=1.4.0", "lazy_loader", "plum-dispatch==2.5.7", + "xxhash>=3.0.0", # Logging "structlog>=25.5.0,<26", "colorlog==6.9.0", diff --git a/uv.lock b/uv.lock index 529842294b..449cc9e460 100644 --- a/uv.lock +++ b/uv.lock @@ -1714,6 +1714,7 @@ dependencies = [ { name = "toolz" }, { name = "typer" }, { name = "typing-extensions", marker = "python_full_version < '3.11'" }, + { name = "xxhash" }, ] [package.optional-dependencies] @@ -2150,6 +2151,7 @@ requires-dist = [ { name = "xarm-python-sdk", marker = "extra == 'manipulation'", specifier = ">=1.17.0" }, { name = "xarm-python-sdk", marker = "extra == 'misc'", specifier = ">=1.17.0" }, { name = "xformers", marker = "platform_machine == 'x86_64' and extra == 'cuda'", specifier = ">=0.0.20" }, + { name = "xxhash", specifier = ">=3.0.0" }, { name = "yapf", marker = "extra == 'misc'", specifier = "==0.40.2" }, ] provides-extras = ["misc", "visualization", "agents", "web", "perception", "unitree", "manipulation", "cpu", "cuda", "dev", "psql", "sim", "drone", "dds", "docker", "base"]