From 79dac55c42d06e05b979c58b3df38b74a28d5e47 Mon Sep 17 00:00:00 2001 From: Yusuke Watanabe Date: Wed, 29 Apr 2026 00:06:16 +0900 Subject: [PATCH] fix(resource): defer log_processor_usages import to avoid side-effect chain MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Importing scitex.resource was eagerly pulling the scitex.io → scitex_io → scitex_dev chain via _log_processor_usages, which historically carried dashboard / Flask machinery and risked spawning a listener on :5000 at import time. Sidecars / CLI tools shell out to `from scitex.resource import get_specs` on every metrics tick, so the import path must stay lightweight and side-effect-free. Move log_processor_usages and main to PEP 562 lazy attributes; eager imports now stop at the lightweight psutil/scitex.str spec probes. The public API is unchanged — `from scitex.resource import log_processor_usages` and `scitex.resource.main` still resolve, just on first access. Adds tests/scitex/resource/test_import_no_side_effect.py with three regression tests (subprocess-isolated): no listener appears on :5000 after import, no scitex.io / scitex_dev modules are eagerly loaded, and log_processor_usages remains accessible lazily. Refs: https://github.com/ywatanabe1989/todo/issues/157 Co-Authored-By: Claude Opus 4.7 (1M context) --- src/scitex/resource/__init__.py | 40 ++++- .../resource/test_import_no_side_effect.py | 138 ++++++++++++++++++ 2 files changed, 176 insertions(+), 2 deletions(-) create mode 100644 tests/scitex/resource/test_import_no_side_effect.py diff --git a/src/scitex/resource/__init__.py b/src/scitex/resource/__init__.py index aae3e2886..a3bdee28a 100755 --- a/src/scitex/resource/__init__.py +++ b/src/scitex/resource/__init__.py @@ -1,5 +1,18 @@ #!/usr/bin/env python3 -"""Scitex resource module.""" +"""Scitex resource module. + +Import discipline (todo #157): + Importing this module must be side-effect-free — no servers, no threads, + no dashboards. Sidecars / CLI tools shell out via + ``python -c "from scitex.resource import get_specs; ..."`` every metrics + tick, so import time matters and surprise listeners are unacceptable. + + To honour that, the eager imports below are limited to the lightweight + spec/processor probes (``psutil``, ``scitex.str``). + ``log_processor_usages`` / ``main`` pull in the heavy ``scitex.io`` + save/load chain, so they are exposed through PEP 562 module-level + ``__getattr__`` and only imported on first access. +""" from ._get_processor_usages import get_processor_usages from ._get_specs import ( @@ -13,7 +26,6 @@ _system_info, get_specs, ) -from ._log_processor_usages import log_processor_usages, main __all__ = [ "get_processor_usages", @@ -29,3 +41,27 @@ "_supple_python_info", "_system_info", ] + + +# Lazy attributes — defer the scitex.io import chain (and any transitive +# dashboard / Flask machinery) until a caller actually wants the logger. +_LAZY_ATTRS = { + "log_processor_usages": ("._log_processor_usages", "log_processor_usages"), + "main": ("._log_processor_usages", "main"), +} + + +def __getattr__(name): + if name in _LAZY_ATTRS: + from importlib import import_module + + module_path, attr = _LAZY_ATTRS[name] + module = import_module(module_path, package=__name__) + value = getattr(module, attr) + globals()[name] = value # cache for subsequent lookups + return value + raise AttributeError(f"module {__name__!r} has no attribute {name!r}") + + +def __dir__(): + return sorted(set(globals()) | set(_LAZY_ATTRS) | set(__all__)) diff --git a/tests/scitex/resource/test_import_no_side_effect.py b/tests/scitex/resource/test_import_no_side_effect.py new file mode 100644 index 000000000..931cfbffc --- /dev/null +++ b/tests/scitex/resource/test_import_no_side_effect.py @@ -0,0 +1,138 @@ +#!/usr/bin/env python3 +# File: ./tests/scitex/resource/test_import_no_side_effect.py + +"""Regression tests for todo #157. + +`from scitex.resource import get_specs` must be silent — no dashboard +server, no background thread, no listener on the canonical Flask port +(5000). Sidecars / CLI tools shell out via this import on every metrics +tick, so any spawned listener is a real bug. + +References: + https://github.com/ywatanabe1989/todo/issues/157 +""" + +from __future__ import annotations + +import socket +import subprocess +import sys +import textwrap + + +def _is_listening(port: int, host: str = "127.0.0.1") -> bool: + """Return True if *something* is currently listening on host:port.""" + s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + s.settimeout(0.5) + try: + return s.connect_ex((host, port)) == 0 + finally: + s.close() + + +def _run_isolated(script: str) -> subprocess.CompletedProcess: + """Run *script* in a clean Python subprocess so test-time state can't + pollute the assertion (e.g. a dashboard server already imported by a + previous test, dev tooling, or the user's REPL). + """ + return subprocess.run( + [sys.executable, "-c", textwrap.dedent(script)], + capture_output=True, + text=True, + timeout=60, + ) + + +def test_import_does_not_start_dashboard(): + """Importing scitex.resource must not spawn a listener on :5000. + + We compare listener-on-:5000 state before vs. after the import inside + a fresh subprocess; the test passes when the import does not change + that state from "not-listening" to "listening". + """ + script = """ + import json, socket, sys, time + + def listening(port): + s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + s.settimeout(0.5) + try: + return s.connect_ex(("127.0.0.1", port)) == 0 + finally: + s.close() + + before = listening(5000) + from scitex.resource import get_specs # the canonical sidecar entry point # noqa: F401 + # Allow any (errant) background thread to bind before we sample. + time.sleep(1.0) + after = listening(5000) + + json.dump({"before": before, "after": after}, sys.stdout) + """ + proc = _run_isolated(script) + assert proc.returncode == 0, ( + f"subprocess crashed during import:\nSTDOUT:{proc.stdout}\nSTDERR:{proc.stderr}" + ) + + import json + + state = json.loads(proc.stdout) + # If :5000 was already taken by something unrelated (CI runner, dev's + # own dashboard) we cannot assert about post-import state — but we can + # still assert the import did not flip the bit from free → bound. + if not state["before"]: + assert not state["after"], ( + "Importing scitex.resource started a listener on :5000 — " + "see todo #157. Dashboard / server launches must be opt-in via " + "an explicit start_dashboard() call or `python -m ...` entry." + ) + + +def test_import_does_not_pull_heavy_io_chain(): + """Importing scitex.resource must stay lightweight. + + The historical regression vector is the `_log_processor_usages` module + pulling `scitex.io` → `scitex_io` → `scitex_dev` (which has, in past + versions, pulled dashboard-bearing modules at import time). Keep this + chain *out* of the eager import path. + """ + script = """ + import json, sys + + before = set(sys.modules) + from scitex.resource import get_specs # noqa: F401 + new = set(sys.modules) - before + + offenders = sorted( + m for m in new + if m.startswith(("scitex.io", "scitex_io", "scitex_dev")) + ) + json.dump(offenders, sys.stdout) + """ + proc = _run_isolated(script) + assert proc.returncode == 0, ( + f"subprocess crashed during import:\nSTDOUT:{proc.stdout}\nSTDERR:{proc.stderr}" + ) + + import json + + offenders = json.loads(proc.stdout) + assert offenders == [], ( + "Importing scitex.resource eagerly pulled in the scitex.io / " + "scitex_dev chain, which has historically carried dashboard " + "side effects (todo #157). Keep log_processor_usages lazy. " + f"Offending modules: {offenders}" + ) + + +def test_log_processor_usages_still_accessible_lazily(): + """Lazy attribute access must still resolve `log_processor_usages`.""" + import scitex.resource as resource + + fn = resource.log_processor_usages # triggers lazy import + assert callable(fn) + # `main` is the alias used by ``python -m scitex.resource``-style entry. + assert callable(resource.main) + + +# EOF