shader-slang · ccummingsNV · Sep 4, 2025 · Sep 5, 2025 · Sep 5, 2025 · Sep 5, 2025
@@ -72,3 +72,6 @@ reportInvalidTypeForm = "warning"
 
 [tool.pytest.ini_options]
 pythonpath = ["."]
+markers = [
+    "memory_leak: Marks test as known to leak objects"
+]
@@ -1,5 +1,6 @@
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
+import gc
 import hashlib
 import sys
 from pathlib import Path
@@ -26,7 +27,7 @@
     LogLevel,
     NativeHandle,
 )
-from slangpy.types.buffer import NDBuffer
+from slangpy.types.buffer import NDBuffer, get_lookup_module
 from slangpy.core.function import Function
 
 # Global variables for device isolation. If SELECTED_DEVICE_TYPES is None, no restriction.
@@ -44,6 +45,9 @@
 else:
     raise RuntimeError("Unsupported platform")
 
+# If live object tracking is supported, enable leak tracking
+LEAK_TRACKING_ENABLED = hasattr(spy.Object, "report_live_objects")
+
 
 # Called from pytest plugin if 'device-types' argument is provided
 def set_device_types(device_types_str: Optional[str]) -> None:
@@ -82,6 +86,59 @@ def set_device_types(device_types_str: Optional[str]) -> None:
 USED_TORCH_DEVICES: bool = False
 METAL_PARAMETER_BLOCK_SUPPORT: Optional[bool] = None
 
+TRACKED_LIVE_OBJECTS: Optional[list[Any]] = None
+
+# Types to ignore when checking for leaked objects
+# - The reflection types are created and cached per device when buffers are loaded, so are hard
+#   to identify as actual leaks.
+# - CoopVec is created on demand within the device when the coopvec api is used, and so will appear
-# - CoopVec is created on demand within the device when the coopvec api is used, and so will appear
+# - The reflection types are created and cached per device when buffers are loaded, so are hard
+#   to identify as actual leaks.
+# - CoopVec is created on demand within the device when the coopvec API is used, and so will appear
+#   as a leak for cached devices.
+IGNORE_LIVE_OBJECT_TYPES = ["NativeSlangType", "TypeLayoutReflection", "TypeReflection", "CoopVec"]
+
-# - CoopVec is created on demand within the device when the coopvec api is used, and so will appear
+# - The reflection types are created and cached per device when buffers are loaded, so are hard
+#   to identify as actual leaks.
+# - CoopVec is created on demand within the device when the coopvec API is used, and so will appear
+#   as a leak for cached devices.
+IGNORE_LIVE_OBJECT_TYPES = ["NativeSlangType", "TypeLayoutReflection", "TypeReflection", "CoopVec"]
+
+#   as a leak for cached devices.
+IGNORE_LIVE_OBJECT_TYPES = ["NativeSlangType", "TypeLayoutReflection", "TypeReflection", "CoopVec"]
+
+
+def save_live_objects():
+    if LEAK_TRACKING_ENABLED:
+        global TRACKED_LIVE_OBJECTS
+        TRACKED_LIVE_OBJECTS = spy.Object.report_live_objects(True)
+
+
+def compare_and_save_live_objects(allowed_leaks: Optional[dict[str, int]] = None):
+    if LEAK_TRACKING_ENABLED:
+        while gc.collect() > 0:
+            pass
+
+        # Make a copy of allowed_leaks so we don't modify the original dict
+        allowed_leaks = allowed_leaks.copy() if allowed_leaks else {}
+
+        # Get current live objects and compare to previous captured list
+        global TRACKED_LIVE_OBJECTS
+        new = spy.Object.report_live_objects(True)
+        if TRACKED_LIVE_OBJECTS:
+            errors = []
+
+            # Build a lookup by address for fast comparison
+            current_by_address = {x["object"]: x for x in TRACKED_LIVE_OBJECTS}
+
+            # Find any new objects, and build list of errors
+            for obj in new:
+                if obj["object"] not in current_by_address:
+                    cn = obj["class_name"]
+                    if not cn in IGNORE_LIVE_OBJECT_TYPES:
+                        if cn in allowed_leaks:
+                            if allowed_leaks[cn] > 0:
+                                allowed_leaks[cn] -= 1
+                                continue
+                        errors.append(obj)
+
+            # If any errors, raise runtime error with all of them in
+            if len(errors) > 0:
+                msg = "\n".join([f"  {e}" for e in errors])
+                raise RuntimeError(f"Leaked objects detected:\n{msg}")
+
+        # Store updated live objects list
+        TRACKED_LIVE_OBJECTS = new
+
+
 # Always dump stuff when testing
 spy.set_dump_generated_shaders(True)
 # spy.set_dump_slang_intermediates(True)
@@ -104,6 +161,9 @@ def close_all_devices():
 
         torch.cuda.synchronize()
 
+    # Clear device cache
+    DEVICE_CACHE.clear()
+
     # Close all devices that were created during the tests.
     for device in Device.get_created_devices():
         print(f"Closing device on shutdown {device.desc.label}")
@@ -231,7 +291,15 @@ def get_device(
             )
 
     if use_cache:
+        # Cache device
         DEVICE_CACHE[cache_key] = device
+
+        # When leak tracking, init the slangpy loopup cache up front and save live
+        # objects so that we don't report cached device resources as leaks.
+        if LEAK_TRACKING_ENABLED:
+            get_lookup_module(device)
+            save_live_objects()
+
     return device
 
 

@@ -1,5 +1,6 @@
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
+import gc
 import pytest
 import inspect
 from typing import Any
@@ -12,6 +13,10 @@
     should_skip_test_for_device,
     should_skip_non_device_test,
     SELECTED_DEVICE_TYPES,
+    DEVICE_CACHE,
+    save_live_objects,
+    compare_and_save_live_objects,
+    LEAK_TRACKING_ENABLED,
 )
 
 
@@ -38,6 +43,63 @@ def pytest_sessionstart(session: pytest.Session):
     set_device_types(device_types_option)
 
 
+def check_live_objects():
+    gc.collect()
+    gc.collect()
+    gc.collect()
+
+    objs = spy.Object.report_live_objects(False)
+
+    num_cache_devices = len(DEVICE_CACHE)
+
+    # Estimate how many of these global types can exist based on number of cached devices.
+    # Most are 1-to-1, however slangpy can load an extra module per device for type lookups,
+    # which also results in the potential creation of a program layout per device.
+    max_expected_counts = {
+        "Logger": num_cache_devices,
+        "Device": num_cache_devices,
+        "HotReload": num_cache_devices,
+        "SlangSession": num_cache_devices,
+        "SlangModule": num_cache_devices * 2,
+        "SlangModuleData": num_cache_devices * 2,
+        "SlangSessionData": num_cache_devices,
+        "Fence": num_cache_devices,
+        "FileSystemWatcher": num_cache_devices,
+        "ProgramLayout": num_cache_devices,
+        "CoopVec": num_cache_devices,
+    }
+
+    # Loggers are known to persist, and the type info is not strictly bounded, as
+    # type infos used by buffers in slangpy are cached per device.
+    ignore_classes = [
+        "Logger",
+        "LoggerOutput",
+        "TypeReflection",
+        "TypeLayoutReflection",
+        "NativeSlangType",
+    ]
+
+    actual_count_by_class_name = {}
+    for obj in objs:
+        class_name = obj["class_name"]
+        if class_name in actual_count_by_class_name:
+            actual_count_by_class_name[class_name] += 1
+        else:
+            actual_count_by_class_name[class_name] = 1
+
+    for class_name, count in actual_count_by_class_name.items():
+        if class_name in ignore_classes:
+            continue
+        if class_name in max_expected_counts:
+            if count > max_expected_counts[class_name]:
+                print(
+                    f"Warning: {class_name} count mismatch (expected: {max_expected_counts[class_name]}, actual: {count})"
+                )
+        else:
+            print(f"Warning: Unexpected {class_name} count (actual: {count})")
+            raise RuntimeError(f"Unexpected {class_name} count (actual: {count})")
+
+
 @pytest.hookimpl(trylast=True)
 def pytest_sessionfinish(session: pytest.Session, exitstatus: int):
     close_all_devices()
@@ -83,3 +145,34 @@ def pytest_runtest_setup(item: Any) -> None:
                 pytest.skip(
                     f"Skipping non-device test (target devices: {', '.join(target_device_names)})"
                 )
+
+
+@pytest.hookimpl(wrapper=True)
+def pytest_pyfunc_call(pyfuncitem: pytest.Function):
+
+    if LEAK_TRACKING_ENABLED:
+        # Check if leak tests enabled, and optionally read list of allowed leaks
+        leak_check = True
+        allowed_leaks = None
+        leaks_mem_marker = pyfuncitem.get_closest_marker("memory_leak")
+        if leaks_mem_marker != None:
+            if hasattr(leaks_mem_marker, "kwargs"):
+                allowed_leaks = leaks_mem_marker.kwargs.get("details", None)
+            leak_check = allowed_leaks != None
+
+        # If checks enabled, save current live objects.
+        if leak_check:
+            save_live_objects()
+
+    # If the outcome is an exception, will raise the exception.
+    res = yield
+
+    if LEAK_TRACKING_ENABLED:
+        # If checks enabled, immediately close any left over devices, then
+        # check for leaked objects.
+        if leak_check:
+            close_leaked_devices()
+            compare_and_save_live_objects(allowed_leaks)
+
+    # Return result
+    return res
@@ -6,20 +6,23 @@
 from slangpy.testing import helpers
 
 
+@pytest.mark.memory_leak("Leaks logger", details={"Logger": 1})
 @pytest.mark.parametrize("device_type", helpers.DEFAULT_DEVICE_TYPES)
 def test_create_and_destroy_device_via_del(device_type: spy.DeviceType):
     device = helpers.get_device(device_type, use_cache=False)
     assert device is not None
     del device
 
 
+@pytest.mark.memory_leak("Leaks logger", details={"Logger": 1})
 @pytest.mark.parametrize("device_type", helpers.DEFAULT_DEVICE_TYPES)
 def test_create_and_destroy_device_via_none(device_type: spy.DeviceType):
     device = helpers.get_device(device_type, use_cache=False)
     assert device is not None
     device = None
 
 
+@pytest.mark.memory_leak("Leaks logger", details={"Logger": 1})
 @pytest.mark.parametrize("device_type", helpers.DEFAULT_DEVICE_TYPES)
 def test_load_module_and_cleanup_in_order(device_type: spy.DeviceType):
     device = helpers.get_device(device_type, use_cache=False)
@@ -39,6 +42,7 @@ def test_load_module_and_cleanup_in_order(device_type: spy.DeviceType):
     device = None
 
 
+@pytest.mark.memory_leak("Leaks logger", details={"Logger": 1})
 @pytest.mark.parametrize("device_type", helpers.DEFAULT_DEVICE_TYPES)
 def test_load_module_and_cleanup_in_reverse_order(device_type: spy.DeviceType):
     device = helpers.get_device(device_type, use_cache=False)

@@ -7,6 +7,7 @@
 from slangpy.testing import helpers
 
 
+@pytest.mark.memory_leak("Leaks logger", details={"Logger": 1})
 @pytest.mark.parametrize("device_type", helpers.DEFAULT_DEVICE_TYPES)
 def test_print(device_type: spy.DeviceType):
     device = spy.Device(type=device_type, enable_print=True, label=f"print-{device_type.name}")

@@ -1157,6 +1157,10 @@ def test_is_sub_type(test_id: str, device_type: spy.DeviceType):
     assert module.layout.is_sub_type(t, i)
 
 
+@pytest.mark.memory_leak(
+    "Leaks a module",
+    details={"SlangModule": 1, "SlangModuleData": 2, "SlangSessionData": 1, "ProgramLayout": 1},
+)
 @pytest.mark.parametrize("device_type", helpers.DEFAULT_DEVICE_TYPES)
 def test_hot_reload_invalid(test_id: str, device_type: spy.DeviceType):
     device = helpers.get_device(type=device_type)

@@ -8,6 +8,7 @@
 from slangpy.testing import helpers
 
 
+@pytest.mark.memory_leak("Leaks a whole device!")
 @pytest.mark.parametrize("device_type", helpers.DEFAULT_DEVICE_TYPES)
 def test_buffer_to_torch(device_type: spy.DeviceType):
     if device_type == spy.DeviceType.cuda:
@@ -50,6 +51,7 @@ def test_buffer_to_torch(device_type: spy.DeviceType):
     )
 
 
+@pytest.mark.memory_leak("Leaks logger", details={"Logger": 1})
 @pytest.mark.parametrize("device_type", helpers.DEFAULT_DEVICE_TYPES)
 def test_torch_interop(device_type: spy.DeviceType):
     if device_type == spy.DeviceType.cuda:

@@ -114,6 +114,10 @@ def test_call_mutable_func(device_type: DeviceType):
     assert np.allclose(data[:2], [0.05, 0.1])
 
 
+@pytest.mark.memory_leak(
+    "Leaks modules, probably issue with looking up types by name",
+    details={"ShaderProgram": 1, "ShaderProgramData": 1, "SlangModuleData": 2, "SlangModule": 2},
+)
 @pytest.mark.parametrize("device_type", helpers.DEFAULT_DEVICE_TYPES)
 def test_read_back_with_global_func(device_type: DeviceType):
     m = load_test_module(device_type)

@@ -7,6 +7,7 @@
 from slangpy.testing import helpers
 
 
+@pytest.mark.memory_leak("Leaks call data cache", details={"NativeCallDataCache": 1})
 @pytest.mark.parametrize("device_type", helpers.DEFAULT_DEVICE_TYPES)
 def test_simple_int_call(device_type: DeviceType):
 
@@ -28,6 +29,7 @@ def test_simple_int_call(device_type: DeviceType):
     assert result == 42
 
 
+@pytest.mark.memory_leak("Leaks call data cache", details={"NativeCallDataCache": 1})
 @pytest.mark.parametrize("device_type", helpers.DEFAULT_DEVICE_TYPES)
 def test_simple_struct_call(device_type: DeviceType):
 
@@ -53,6 +55,7 @@ def test_simple_struct_call(device_type: DeviceType):
     assert result == 42
 
 
+@pytest.mark.memory_leak("Leaks call data cache", details={"NativeCallDataCache": 1})
 @pytest.mark.parametrize("device_type", helpers.DEFAULT_DEVICE_TYPES)
 def test_vectorize_struct_array(device_type: DeviceType):
 
@@ -88,6 +91,7 @@ def test_vectorize_struct_array(device_type: DeviceType):
     assert np.array_equal(results, np.array([2, 3, 4, 5], dtype=np.int32))
 
 
+@pytest.mark.memory_leak("Leaks call data cache", details={"NativeCallDataCache": 1})
 @pytest.mark.parametrize("device_type", helpers.DEFAULT_DEVICE_TYPES)
 def test_vectorize_struct_with_tensor_array(device_type: DeviceType):
     if device_type == DeviceType.metal:

@@ -502,6 +502,10 @@ def texture_return_value_impl(
     assert np.allclose(result_np, data.squeeze())
 
 
+@pytest.mark.memory_leak(
+    "Leaks modules, probably issue with looking up types by name",
+    details={"ShaderProgram": 1, "ShaderProgramData": 1, "SlangModuleData": 2, "SlangModule": 2},
+)
 @pytest.mark.parametrize(
     "texel_name", ["uint8_t", "uint16_t", "int8_t", "int16_t", "float", "half", "uint"]
 )
@@ -516,6 +520,10 @@ def test_texture_return_value(device_type: DeviceType, texel_name: str, dims: in
 
 # This case checks for when the return type is the string "texture".
 # This checks a subset of the "test_texture_return_value" parameters.
+@pytest.mark.memory_leak(
+    "Leaks modules, probably issue with looking up types by name",
+    details={"ShaderProgram": 1, "ShaderProgramData": 1, "SlangModuleData": 2, "SlangModule": 2},
+)
 @pytest.mark.parametrize("texel_name", ["float"])
 @pytest.mark.parametrize("dims", [1, 2, 3])
 @pytest.mark.parametrize("channels", [4])

@@ -163,6 +163,7 @@ def run_tensor_race_condition_tests(
 
 # Pytest for our most common default cuda-interop case, in which we've configured pytorch
 # and slangpy to share the same context and stream.
+@pytest.mark.memory_leak("Leaks logger", details={"Logger": 1})
 @pytest.mark.parametrize("device_type", [spy.DeviceType.cuda])
 def test_shared_context_and_stream(device_type: spy.DeviceType):
     assert (
@@ -174,12 +175,14 @@ def test_shared_context_and_stream(device_type: spy.DeviceType):
 # Pytest for none-shared context case, which appears to avoid race conditions through some level
 # of synchronization in the default streams of separate contexts. For now this has shown not
 # to cause race conditions, so testing for that behaviour.
+@pytest.mark.memory_leak("Leaks logger", details={"Logger": 1})
 @pytest.mark.parametrize("device_type", [spy.DeviceType.cuda])
 def test_non_shared_context(device_type: spy.DeviceType):
     assert run_tensor_race_condition_tests(share_context=False) == False
 
 
 # Pytest for known race condition case, where we use a custom stream in torch but not sharing it with slangpy.
+@pytest.mark.memory_leak("Leaks logger", details={"Logger": 1})
 @pytest.mark.parametrize("device_type", [spy.DeviceType.cuda])
 def test_custom_stream_no_share(device_type: spy.DeviceType):
     pytest.skip("Race condition doesn't reproduce reliably on CI machines of varying specs")
@@ -190,6 +193,7 @@ def test_custom_stream_no_share(device_type: spy.DeviceType):
 
 
 # Pytest that removes the race condition by sharing the custom stream
+@pytest.mark.memory_leak("Leaks logger", details={"Logger": 1})
 @pytest.mark.parametrize("device_type", [spy.DeviceType.cuda])
 def test_custom_stream_share(device_type: spy.DeviceType):
     assert (