Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
d2e9446
feat(native): add rebuild-on-change for NativeModule
jeff-hykin Mar 15, 2026
bc5b44b
fix: resolve relative rebuild_on_change paths against module cwd and …
jeff-hykin Mar 16, 2026
037a0b1
fix: resolve relative rebuild_on_change paths against module cwd and …
jeff-hykin Mar 16, 2026
f2b7b0a
improve native build
jeff-hykin Mar 16, 2026
18c56ea
CI code cleanup
jeff-hykin Mar 17, 2026
416d3dd
CI code cleanup
jeff-hykin Mar 17, 2026
e01688c
fixup pathing
jeff-hykin Mar 19, 2026
bfab461
fixup pathing
jeff-hykin Mar 19, 2026
9ec44ec
Merge branch 'jeff/feat/native_rebuild' of github.com:dimensionalOS/d…
jeff-hykin Mar 19, 2026
cb5041b
-
jeff-hykin Mar 19, 2026
c9fac15
Merge remote-tracking branch 'origin/dev' into jeff/feat/native_rebuild
jeff-hykin Mar 22, 2026
ee8e936
chore: regenerate uv.lock after merge with dev
jeff-hykin Mar 22, 2026
eb9a913
fix(change_detect): add threading.Lock alongside fcntl.flock
jeff-hykin Mar 22, 2026
4ed36cc
fix(native): include class qualname in build cache key
jeff-hykin Mar 22, 2026
5b7a68e
fix(native): prevent thread leaks in crash test
jeff-hykin Mar 23, 2026
68dd4bc
fix(test): isolate LCM multicast in flaky tests
jeff-hykin Mar 23, 2026
4ab751a
Merge remote-tracking branch 'origin/dev' into jeff/feat/native_rebuild
jeff-hykin Mar 30, 2026
e4807ad
Merge branch 'jeff/feat/native_rebuild' of github.com:dimensionalOS/d…
jeff-hykin Mar 30, 2026
a76290f
fix: address Greptile review comments on native_rebuild
jeff-hykin Mar 30, 2026
cbad5a7
CI code cleanup
jeff-hykin Mar 30, 2026
e006826
Merge branch 'dev' of github.com:dimensionalOS/dimos into jeff/feat/n…
jeff-hykin Mar 30, 2026
86cd04c
fix: move update_cache import to top of file (review feedback)
jeff-hykin Mar 31, 2026
1b69981
Merge branch 'dev' into jeff/feat/native_rebuild
jeff-hykin Mar 31, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
89 changes: 49 additions & 40 deletions dimos/core/native_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@ class MyCppModule(NativeModule):

from __future__ import annotations

import collections
import enum
import inspect
import json
Expand All @@ -56,6 +55,7 @@ class MyCppModule(NativeModule):

from dimos.core.core import rpc
from dimos.core.module import Module, ModuleConfig
from dimos.utils.change_detect import PathEntry, did_change, update_cache
from dimos.utils.logging_config import setup_logger

if sys.version_info < (3, 13):
Expand All @@ -81,9 +81,10 @@ class NativeModuleConfig(ModuleConfig):
extra_env: dict[str, str] = Field(default_factory=dict)
shutdown_timeout: float = 10.0
log_format: LogFormat = LogFormat.TEXT
rebuild_on_change: list[PathEntry] | None = None

# Override in subclasses to exclude fields from CLI arg generation
cli_exclude: frozenset[str] = frozenset()
cli_exclude: frozenset[str] = frozenset({"rebuild_on_change"})

def to_cli_args(self) -> list[str]:
"""Auto-convert subclass config fields to CLI args.
Expand Down Expand Up @@ -132,11 +133,9 @@ class NativeModule(Module[_NativeConfig]):
_process: subprocess.Popen[bytes] | None = None
_watchdog: threading.Thread | None = None
_stopping: bool = False
_last_stderr_lines: collections.deque[str]

def __init__(self, **kwargs: Any) -> None:
super().__init__(**kwargs)
self._last_stderr_lines = collections.deque(maxlen=50)
self._resolve_paths()

@rpc
Expand All @@ -158,25 +157,15 @@ def start(self) -> None:
env = {**os.environ, **self.config.extra_env}
cwd = self.config.cwd or str(Path(self.config.executable).resolve().parent)

module_name = type(self).__name__
logger.info(
f"Starting native process: {module_name}",
module=module_name,
cmd=" ".join(cmd),
cwd=cwd,
)
logger.info("Starting native process", cmd=" ".join(cmd), cwd=cwd)
self._process = subprocess.Popen(
cmd,
env=env,
cwd=cwd,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
)
logger.info(
f"Native process started: {module_name}",
module=module_name,
pid=self._process.pid,
)
logger.info("Native process started", pid=self._process.pid)

self._stopping = False
self._watchdog = threading.Thread(target=self._watch_process, daemon=True)
Expand All @@ -199,8 +188,11 @@ def stop(self) -> None:
if self._watchdog is not None and self._watchdog is not threading.current_thread():
self._watchdog.join(timeout=2)
self._watchdog = None
self._process = None
# Clean up the asyncio loop thread (from ModuleBase) BEFORE
# clearing _process — tests use _process=None as their exit
# signal, and the loop thread must be joined first.
super().stop()
self._process = None

def _watch_process(self) -> None:
"""Block until the native process exits; trigger stop() if it crashed."""
Expand All @@ -215,20 +207,10 @@ def _watch_process(self) -> None:

if self._stopping:
return

module_name = type(self).__name__
exe_name = Path(self.config.executable).name if self.config.executable else "unknown"

# Use buffered stderr lines from the reader thread for the crash report.
last_stderr = "\n".join(self._last_stderr_lines)

logger.error(
f"Native process crashed: {module_name} ({exe_name})",
module=module_name,
executable=exe_name,
"Native process died unexpectedly",
pid=self._process.pid,
returncode=rc,
last_stderr=last_stderr[:500] if last_stderr else None,
)
self.stop()

Expand All @@ -242,13 +224,10 @@ def _read_log_stream(self, stream: IO[bytes] | None, level: str) -> None:
if stream is None:
return
log_fn = getattr(logger, level)
is_stderr = level == "warning"
for raw in stream:
line = raw.decode("utf-8", errors="replace").rstrip()
if not line:
continue
if is_stderr:
self._last_stderr_lines.append(line)
if self.config.log_format == LogFormat.JSON:
try:
data = json.loads(line)
Expand All @@ -269,18 +248,44 @@ def _resolve_paths(self) -> None:
if not Path(self.config.executable).is_absolute() and self.config.cwd is not None:
self.config.executable = str(Path(self.config.cwd) / self.config.executable)

def _build_cache_name(self) -> str:
"""Return a stable, unique cache name for this module's build state."""
source_file = Path(inspect.getfile(type(self))).resolve()
return f"native_{source_file}:{type(self).__qualname__}"

def _maybe_build(self) -> None:
"""Run ``build_command`` if the executable does not exist."""
"""Run ``build_command`` if the executable does not exist or sources changed."""
exe = Path(self.config.executable)
if exe.exists():

# Check if rebuild needed due to source changes.
# Use update=False so the cache is NOT written yet — if the build
# fails the next check will still detect changes and retry.
needs_rebuild = False
if self.config.rebuild_on_change and exe.exists():
if did_change(
self._build_cache_name(),
self.config.rebuild_on_change,
cwd=self.config.cwd,
update=False,
):
logger.info("Source files changed, triggering rebuild", executable=str(exe))
needs_rebuild = True

if exe.exists() and not needs_rebuild:
return

if self.config.build_command is None:
raise FileNotFoundError(
f"Executable not found: {exe}. "
"Set build_command in config to auto-build, or build it manually."
)

# Don't unlink the exe before rebuilding — the build command is
# responsible for replacing it. For nix builds the exe lives inside
# a read-only store; `nix build -o` atomically swaps the output
# symlink without touching store contents.
logger.info(
"Executable not found, running build",
"Rebuilding" if needs_rebuild else "Executable not found, building",
executable=str(exe),
build_command=self.config.build_command,
)
Expand All @@ -300,16 +305,20 @@ def _maybe_build(self) -> None:
if line.strip():
logger.warning(line)
if proc.returncode != 0:
stderr_tail = stderr.decode("utf-8", errors="replace").strip()[-1000:]
raise RuntimeError(
f"Build command failed (exit {proc.returncode}): {self.config.build_command}\n"
f"stderr: {stderr_tail}"
f"Build command failed (exit {proc.returncode}): {self.config.build_command}"
)
if not exe.exists():
raise FileNotFoundError(
f"Build command succeeded but executable still not found: {exe}\n"
f"Build output may have been written to a different path. "
f"Check that build_command produces the executable at the expected location."
f"Build command succeeded but executable still not found: {exe}"
)

# Seed the cache after a successful build so the next check has a baseline.
# Uses update_cache (not did_change) so we only write the hash after a
# confirmed-good build — a failed build won't poison the cache.
if self.config.rebuild_on_change:
update_cache(
self._build_cache_name(), self.config.rebuild_on_change, cwd=self.config.cwd
)

def _collect_topics(self) -> dict[str, str]:
Expand Down
15 changes: 9 additions & 6 deletions dimos/core/test_native_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,13 +99,16 @@ def test_process_crash_triggers_stop() -> None:
assert mod._process is not None
pid = mod._process.pid

# Wait for the process to die and the watchdog to call stop()
for _ in range(30):
time.sleep(0.1)
if mod._process is None:
break
try:
# Wait for the process to die and the watchdog to call stop()
for _ in range(30):
time.sleep(0.1)
if mod._process is None:
break

assert mod._process is None, f"Watchdog did not clean up after process {pid} died"
assert mod._process is None, f"Watchdog did not clean up after process {pid} died"
finally:
mod.stop()

# Wait for background threads (run_forever, _lcm_loop, _watch_process) to finish
# after the watchdog-triggered stop(). Without this, monitor_threads catches them.
Expand Down
140 changes: 140 additions & 0 deletions dimos/core/test_native_rebuild.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
# Copyright 2026 Dimensional Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Tests for NativeModule rebuild-on-change integration."""

from __future__ import annotations

from pathlib import Path
import stat

import pytest

from dimos.core.native_module import NativeModule, NativeModuleConfig
from dimos.utils.change_detect import PathEntry


@pytest.fixture(autouse=True)
def _use_tmp_cache(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None:
"""Redirect the change-detection cache to a temp dir for every test."""
monkeypatch.setattr(
"dimos.utils.change_detect._get_cache_dir",
lambda: tmp_path / "cache",
)


@pytest.fixture()
def build_env(tmp_path: Path) -> dict[str, Path]:
"""Set up a temp directory with a source file, executable path, and marker path."""
src = tmp_path / "src"
src.mkdir()
(src / "main.c").write_text("int main() { return 0; }")

exe = tmp_path / "mybin"
marker = tmp_path / "build_ran.marker"

# Build script: create the executable and a marker file
build_script = tmp_path / "build.sh"
build_script.write_text(f"#!/bin/sh\ntouch {exe}\nchmod +x {exe}\ntouch {marker}\n")
build_script.chmod(build_script.stat().st_mode | stat.S_IEXEC)

return {"src": src, "exe": exe, "marker": marker, "build_script": build_script}


class _RebuildConfig(NativeModuleConfig):
executable: str = ""
rebuild_on_change: list[PathEntry] | None = None


class _RebuildModule(NativeModule[_RebuildConfig]):
default_config = _RebuildConfig


def _make_module(build_env: dict[str, Path]) -> _RebuildModule:
"""Create a _RebuildModule pointing at the temp build env."""
return _RebuildModule(
executable=str(build_env["exe"]),
build_command=f"sh {build_env['build_script']}",
rebuild_on_change=[str(build_env["src"])],
cwd=str(build_env["src"]),
)


def test_rebuild_on_change_triggers_build(build_env: dict[str, Path]) -> None:
"""When source files change, the build_command should re-run."""
mod = _make_module(build_env)
try:
exe = build_env["exe"]
marker = build_env["marker"]

# First build: exe doesn't exist → build runs
mod._maybe_build()
assert exe.exists()
assert marker.exists()
marker.unlink()

# No change → build should NOT run
mod._maybe_build()
assert not marker.exists()

# Modify source → build SHOULD run
(build_env["src"] / "main.c").write_text("int main() { return 1; }")
mod._maybe_build()
assert marker.exists(), "Build should have re-run after source change"
finally:
mod.stop()


def test_no_change_skips_rebuild(build_env: dict[str, Path]) -> None:
"""When sources haven't changed, build_command must not run again."""
mod = _make_module(build_env)
try:
marker = build_env["marker"]

# Initial build
mod._maybe_build()
assert marker.exists()
marker.unlink()

# Second call — nothing changed
mod._maybe_build()
assert not marker.exists(), "Build should have been skipped (no source changes)"
finally:
mod.stop()


def test_rebuild_on_change_none_skips_check(build_env: dict[str, Path]) -> None:
"""When rebuild_on_change is None, no change detection happens at all."""
exe = build_env["exe"]
marker = build_env["marker"]

mod = _RebuildModule(
executable=str(exe),
build_command=f"sh {build_env['build_script']}",
rebuild_on_change=None,
cwd=str(build_env["src"]),
)
try:
# Initial build
mod._maybe_build()
assert exe.exists()
assert marker.exists()
marker.unlink()

# Modify source — but rebuild_on_change is None, so no rebuild
(build_env["src"] / "main.c").write_text("int main() { return 1; }")
mod._maybe_build()
assert not marker.exists(), "Should not rebuild when rebuild_on_change is None"
finally:
mod.stop()
Loading
Loading