From 6e826e65b107e902f932e222b88aa367416d436b Mon Sep 17 00:00:00 2001 From: Chris Cummings Date: Thu, 4 Sep 2025 14:02:46 +0100 Subject: [PATCH 1/8] Fixes for slangpy leaking python object refs --- src/sgl/device/shader.h | 7 +++++++ src/sgl/device/types.h | 18 ++++++++++++++++++ src/slangpy_ext/utils/slangpy.cpp | 16 ++++++++++++++++ src/slangpy_ext/utils/slangpy.h | 13 +++++++++++++ src/slangpy_ext/utils/slangpyfunction.cpp | 17 ++++++++++++++++- src/slangpy_ext/utils/slangpyfunction.h | 7 +++++++ src/slangpy_ext/utils/slangpypackedarg.h | 1 + .../utils/slangpystridedbufferview.h | 1 + 8 files changed, 79 insertions(+), 1 deletion(-) diff --git a/src/sgl/device/shader.h b/src/sgl/device/shader.h index ec1dcca8a..7daff68d0 100644 --- a/src/sgl/device/shader.h +++ b/src/sgl/device/shader.h @@ -236,6 +236,8 @@ struct SlangSessionDesc { /// Internal data stored once the slang session has been created. struct SlangSessionData : Object { + SGL_OBJECT(SlangSessionData) + /// Pointer to internal slang session. Slang::ComPtr slang_session; @@ -353,6 +355,8 @@ struct SlangModuleDesc { }; struct SlangModuleData : Object { + SGL_OBJECT(SlangModuleData) + slang::IModule* slang_module = nullptr; std::string name; std::filesystem::path path; @@ -429,6 +433,8 @@ struct SlangEntryPointDesc { std::vector type_conformances; }; struct SlangEntryPointData : Object { + SGL_OBJECT(SlangEntryPointData) + Slang::ComPtr slang_entry_point; std::string name; ShaderStage stage; @@ -477,6 +483,7 @@ struct ShaderProgramDesc { std::string label; }; struct ShaderProgramData : Object { + SGL_OBJECT(ShaderProgramData) Slang::ComPtr linked_program; Slang::ComPtr rhi_shader_program; }; diff --git a/src/sgl/device/types.h b/src/sgl/device/types.h index 128877814..b26b57c2d 100644 --- a/src/sgl/device/types.h +++ b/src/sgl/device/types.h @@ -795,6 +795,24 @@ struct HeapReport { uint64_t total_mem_usage{0}; /// Number of allocations. uint64_t num_allocations{0}; + + std::string to_string() const + { + return fmt::format( + "HeapReport(\n" + " name = \"{}\",\n" + " num_pages = {},\n" + " total_allocated = {},\n" + " total_mem_usage = {},\n" + " num_allocations = {}\n" + ")", + name, + num_pages, + total_allocated, + total_mem_usage, + num_allocations + ); + } }; } // namespace sgl diff --git a/src/slangpy_ext/utils/slangpy.cpp b/src/slangpy_ext/utils/slangpy.cpp index 1b98e4bed..fc3be62e5 100644 --- a/src/slangpy_ext/utils/slangpy.cpp +++ b/src/slangpy_ext/utils/slangpy.cpp @@ -28,6 +28,16 @@ extern void buffer_copy_from_numpy(Buffer* self, nb::ndarray data); extern nb::ndarray buffer_to_torch(Buffer* self, DataType type, std::vector shape, std::vector strides, size_t offset); +template<> +struct GcHelper { + void traverse(slangpy::NativeCallRuntimeOptions*, GcVisitor& visitor) + { + visitor("uniforms"); + visitor("this_ref"); + } + void clear(slangpy::NativeCallRuntimeOptions* opts) { opts->clear(); } +}; + } // namespace sgl namespace sgl::slangpy { @@ -1271,6 +1281,12 @@ SGL_PY_EXPORT(utils_slangpy) &NativeCallRuntimeOptions::set_uniforms, D_NA(NativeCallRuntimeOptions, uniforms) ) + .def_prop_rw( + "this_ref", + &NativeCallRuntimeOptions::get_this, + &NativeCallRuntimeOptions::set_this, + D_NA(NativeCallRuntimeOptions, this_ref) + ) .def_prop_rw( "cuda_stream", &NativeCallRuntimeOptions::get_cuda_stream, diff --git a/src/slangpy_ext/utils/slangpy.h b/src/slangpy_ext/utils/slangpy.h index 81f75477f..e2702e563 100644 --- a/src/slangpy_ext/utils/slangpy.h +++ b/src/slangpy_ext/utils/slangpy.h @@ -127,6 +127,7 @@ class NativeObject : public Object { /// Nanobind trampoline class for NativeObject class PyNativeObject : public NativeObject { + SGL_OBJECT(PyNativeObject) public: NB_TRAMPOLINE(NativeObject, 1); @@ -529,6 +530,7 @@ class NativeBoundVariableRuntime : public Object { /// Binding information for a call to a compute kernel. Includes a set of positional /// and keyword arguments as bound variables. class NativeBoundCallRuntime : Object { + SGL_OBJECT(NativeBoundCallRuntime) public: NativeBoundCallRuntime() = default; @@ -579,6 +581,7 @@ class NativeBoundCallRuntime : Object { }; class NativeCallRuntimeOptions : Object { + SGL_OBJECT(NativeCallRuntimeOptions) public: /// Get the uniforms. nb::list get_uniforms() const { return m_uniforms; } @@ -598,6 +601,13 @@ class NativeCallRuntimeOptions : Object { /// Set the CUDA stream. void set_cuda_stream(NativeHandle cuda_stream) { m_cuda_stream = cuda_stream; } + /// Clear internal data for garbage collection + void clear() + { + m_uniforms.clear(); + m_this = nb::none(); + } + private: nb::list m_uniforms; nb::object m_this{nb::none()}; @@ -622,6 +632,7 @@ class NativeCallRuntimeOptions : Object { /// Contains the compute kernel for a call, the corresponding bindings and any additional /// options provided by the user. class NativeCallData : Object { + SGL_OBJECT(NativeCallData) public: NativeCallData() = default; @@ -778,6 +789,8 @@ typedef std::function& builder, nb::handle)> Bu /// Native side of system for caching call data info for given function signatures. class NativeCallDataCache : Object { + SGL_OBJECT(NativeCallDataCache) + public: NativeCallDataCache(); diff --git a/src/slangpy_ext/utils/slangpyfunction.cpp b/src/slangpy_ext/utils/slangpyfunction.cpp index a3d9694f3..0308160f3 100644 --- a/src/slangpy_ext/utils/slangpyfunction.cpp +++ b/src/slangpy_ext/utils/slangpyfunction.cpp @@ -7,10 +7,21 @@ namespace sgl { +template<> +struct GcHelper { + void traverse(slangpy::NativeFunctionNode*, GcVisitor& visitor) + { + visitor("_native_data"); + visitor("_native_parent"); + } + void clear(slangpy::NativeFunctionNode* node) { node->garbage_collect(); } +}; + } // namespace sgl namespace sgl::slangpy { + ref NativeFunctionNode::build_call_data(NativeCallDataCache* cache, nb::args args, nb::kwargs kwargs) { auto options = make_ref(); @@ -128,7 +139,11 @@ SGL_PY_EXPORT(utils_slangpy_function) nb::sgl_enum(slangpy, "FunctionNodeType"); - nb::class_(slangpy, "NativeFunctionNode") + nb::class_( + slangpy, + "NativeFunctionNode", + gc_helper_type_slots() + ) .def( "__init__", [](NativeFunctionNode& self, diff --git a/src/slangpy_ext/utils/slangpyfunction.h b/src/slangpy_ext/utils/slangpyfunction.h index 309231daf..a56eb6551 100644 --- a/src/slangpy_ext/utils/slangpyfunction.h +++ b/src/slangpy_ext/utils/slangpyfunction.h @@ -31,6 +31,7 @@ SGL_ENUM_INFO( SGL_ENUM_REGISTER(FunctionNodeType); class NativeFunctionNode : NativeObject { + SGL_OBJECT(NativeFunctionNode) public: NativeFunctionNode(NativeFunctionNode* parent, FunctionNodeType type, nb::object data) : m_parent(parent) @@ -108,6 +109,12 @@ class NativeFunctionNode : NativeObject { return nullptr; } + void garbage_collect() + { + m_parent = nullptr; + m_data = nb::none(); + } + private: ref m_parent; FunctionNodeType m_type; diff --git a/src/slangpy_ext/utils/slangpypackedarg.h b/src/slangpy_ext/utils/slangpypackedarg.h index 7d392b6ab..3a4fe1cf4 100644 --- a/src/slangpy_ext/utils/slangpypackedarg.h +++ b/src/slangpy_ext/utils/slangpypackedarg.h @@ -16,6 +16,7 @@ namespace sgl::slangpy { class NativeMarshall; class NativePackedArg : public NativeObject { + SGL_OBJECT(NativePackedArg) public: NativePackedArg(ref python, ref shader_object, nb::object python_object) : m_python(std::move(python)) diff --git a/src/slangpy_ext/utils/slangpystridedbufferview.h b/src/slangpy_ext/utils/slangpystridedbufferview.h index cf93fafb7..601332fd6 100644 --- a/src/slangpy_ext/utils/slangpystridedbufferview.h +++ b/src/slangpy_ext/utils/slangpystridedbufferview.h @@ -29,6 +29,7 @@ struct StridedBufferViewDesc { }; class StridedBufferView : public NativeObject { + SGL_OBJECT(StridedBufferView) public: StridedBufferView(Device* device, const StridedBufferViewDesc& desc, ref storage); virtual ~StridedBufferView() { } From 1aca68a9ef2a751666ee29758ca81fbb0ea6bbe6 Mon Sep 17 00:00:00 2001 From: Chris Cummings Date: Fri, 5 Sep 2025 09:20:42 +0100 Subject: [PATCH 2/8] Resolve type reflections by full name --- slangpy/types/buffer.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/slangpy/types/buffer.py b/slangpy/types/buffer.py index 7a23b22de..15eadeb84 100644 --- a/slangpy/types/buffer.py +++ b/slangpy/types/buffer.py @@ -134,11 +134,11 @@ def resolve_element_type(program_layout: SlangProgramLayout, element_type: Any) if element_type.module.layout == program_layout: element_type = element_type.struct else: - element_type = program_layout.find_type_by_name(element_type.name) + element_type = program_layout.find_type_by_name(element_type.full_name) elif isinstance(element_type, TypeReflection): - element_type = program_layout.find_type(element_type) + element_type = program_layout.find_type_by_name(element_type.full_name) elif isinstance(element_type, TypeLayoutReflection): - element_type = program_layout.find_type(element_type.type) + element_type = program_layout.find_type_by_name(element_type.type.full_name) elif isinstance(element_type, Marshall): if element_type.slang_type.program == program_layout: element_type = element_type.slang_type From daf76d9cf06d3661c6d3bbcb4ac843466bdc565c Mon Sep 17 00:00:00 2001 From: Chris Cummings Date: Fri, 5 Sep 2025 09:21:03 +0100 Subject: [PATCH 3/8] Add more object labels --- src/sgl/device/reflection.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/sgl/device/reflection.h b/src/sgl/device/reflection.h index c8920e25a..2e299215c 100644 --- a/src/sgl/device/reflection.h +++ b/src/sgl/device/reflection.h @@ -229,6 +229,7 @@ class SGL_API BaseReflectionObjectImpl : public BaseReflectionObject { }; class SGL_API DeclReflection : public BaseReflectionObjectImpl { + SGL_OBJECT(DeclReflection) public: DeclReflection(ref owner, slang::DeclReflection* target) @@ -346,6 +347,7 @@ class SGL_API DeclReflectionIndexedChildList : public BaseReflectionIndexedList< }; class SGL_API TypeReflection : public BaseReflectionObjectImpl { + SGL_OBJECT(TypeReflection) public: enum class Kind { none = SLANG_TYPE_KIND_NONE, @@ -676,6 +678,7 @@ class SGL_API TypeReflectionFieldList : public BaseReflectionList { + SGL_OBJECT(TypeLayoutReflection) public: static ref from_slang(ref owner, slang::TypeLayoutReflection* type_layout_reflection) @@ -798,6 +801,7 @@ class SGL_API TypeLayoutReflectionFieldList }; class SGL_API FunctionReflection : public BaseReflectionObjectImpl { + SGL_OBJECT(FunctionReflection) public: FunctionReflection(ref owner, slang::FunctionReflection* target) : BaseReflectionObjectImpl(std::move(owner), target) @@ -911,6 +915,7 @@ class SGL_API FunctionReflectionOverloadList : public BaseReflectionList { + SGL_OBJECT(VariableReflection) public: static ref from_slang(ref owner, slang::VariableReflection* variable_reflection) @@ -964,6 +969,7 @@ class SGL_API VariableLayoutReflection : public BaseReflectionObjectImpl { + SGL_OBJECT(EntryPointLayout) public: static ref from_slang(ref owner, slang::EntryPointLayout* entry_point_reflection) @@ -1027,6 +1033,7 @@ class SGL_API EntryPointLayoutParameterList : public BaseReflectionList { + SGL_OBJECT(ProgramLayout) public: static ref from_slang(ref owner, slang::ProgramLayout* program_layout) { From 35e11c05e9b68d9896ca312faf9df1fd13f69f78 Mon Sep 17 00:00:00 2001 From: Chris Cummings Date: Fri, 5 Sep 2025 09:21:23 +0100 Subject: [PATCH 4/8] Bind heap tostring --- src/slangpy_ext/device/device.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/slangpy_ext/device/device.cpp b/src/slangpy_ext/device/device.cpp index 3816475f3..6bc77b5cf 100644 --- a/src/slangpy_ext/device/device.cpp +++ b/src/slangpy_ext/device/device.cpp @@ -244,7 +244,8 @@ SGL_PY_EXPORT(device_device) .def_rw("num_pages", &HeapReport::num_pages, D_NA(HeapReport, num_pages)) .def_rw("total_allocated", &HeapReport::total_allocated, D_NA(HeapReport, total_allocated)) .def_rw("total_mem_usage", &HeapReport::total_mem_usage, D_NA(HeapReport, total_mem_usage)) - .def_rw("num_allocations", &HeapReport::num_allocations, D_NA(HeapReport, num_allocations)); + .def_rw("num_allocations", &HeapReport::num_allocations, D_NA(HeapReport, num_allocations)) + .def("__repr__", &HeapReport::to_string); nb::class_ device(m, "Device", nb::is_weak_referenceable(), D(Device)); device.def( From 836635cc25f84d46e628c7e08f8d778958ad291b Mon Sep 17 00:00:00 2001 From: Chris Cummings Date: Fri, 5 Sep 2025 09:22:05 +0100 Subject: [PATCH 5/8] Correctly use full name in type util --- slangpy/core/utils.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/slangpy/core/utils.py b/slangpy/core/utils.py index cf301ccad..7bc41e59f 100644 --- a/slangpy/core/utils.py +++ b/slangpy/core/utils.py @@ -127,8 +127,10 @@ def find_type_layout_for_buffer( ): if isinstance(slang_type, str): slang_type_name = slang_type - elif isinstance(slang_type, (TypeReflection, TypeLayoutReflection)): - slang_type_name = slang_type.name + elif isinstance(slang_type, TypeReflection): + slang_type_name = slang_type.full_name + elif isinstance(slang_type, TypeLayoutReflection): + slang_type_name = slang_type.type.full_name buffer_type = program_layout.find_type_by_name(f"StructuredBuffer<{slang_type_name}>") buffer_layout = program_layout.get_type_layout(buffer_type) return buffer_layout.element_type_layout From 444137f4184dcf618bd6e7aee8084f10848cc9b7 Mon Sep 17 00:00:00 2001 From: Chris Cummings Date: Fri, 5 Sep 2025 10:17:42 +0100 Subject: [PATCH 6/8] Initial work on mem leak system --- pyproject.toml | 3 + slangpy/testing/helpers.py | 44 +++++++++++- slangpy/testing/plugin.py | 79 +++++++++++++++++++++ slangpy/tests/slangpy_tests/test_modules.py | 1 + src/sgl/core/object.cpp | 29 +++++--- src/sgl/core/object.h | 17 ++++- src/slangpy_ext/core/object.cpp | 22 +++++- 7 files changed, 182 insertions(+), 13 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index db9059056..d10f84d66 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -72,3 +72,6 @@ reportInvalidTypeForm = "warning" [tool.pytest.ini_options] pythonpath = ["."] +markers = [ + "memory_leak: Marks test as known to leak objects" +] diff --git a/slangpy/testing/helpers.py b/slangpy/testing/helpers.py index 8abd0f930..c39fc9bb2 100644 --- a/slangpy/testing/helpers.py +++ b/slangpy/testing/helpers.py @@ -1,11 +1,13 @@ # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +import gc import hashlib import sys from pathlib import Path from typing import Any, Optional, Sequence, Union, cast from os import PathLike import inspect +import objgraph import pytest import numpy as np @@ -26,7 +28,7 @@ LogLevel, NativeHandle, ) -from slangpy.types.buffer import NDBuffer +from slangpy.types.buffer import NDBuffer, get_lookup_module from slangpy.core.function import Function # Global variables for device isolation. If SELECTED_DEVICE_TYPES is None, no restriction. @@ -82,6 +84,38 @@ def set_device_types(device_types_str: Optional[str]) -> None: USED_TORCH_DEVICES: bool = False METAL_PARAMETER_BLOCK_SUPPORT: Optional[bool] = None +TRACKED_LIVE_OBJECTS: Optional[list[Any]] = None + +IGNORE_LIVE_OBJECT_TYPES = ["NativeSlangType", "TypeLayoutReflection", "TypeReflection"] + + +def save_live_objects(): + print("Saving live objects") + global TRACKED_LIVE_OBJECTS + TRACKED_LIVE_OBJECTS = spy.Object.report_live_objects(True) + # objgraph.show_growth(limit=1) + + +def compare_and_save_live_objects(): + while gc.collect() > 0: + pass + # objgraph.show_growth() + # objgraph.show_backrefs(objgraph.by_type('ArrayType')[0], max_depth=4, filename='backref.svg') + global TRACKED_LIVE_OBJECTS + new = spy.Object.report_live_objects(True) + if TRACKED_LIVE_OBJECTS: + errors = [] + current_by_address = {x["object"]: x for x in TRACKED_LIVE_OBJECTS} + for obj in new: + if obj["object"] not in current_by_address: + if not obj["class_name"] in IGNORE_LIVE_OBJECT_TYPES: + errors.append(obj) + if len(errors) > 0: + msg = "\n".join([f" {e}" for e in errors]) + raise RuntimeError(f"Leaked objects detected:\n{msg}") + TRACKED_LIVE_OBJECTS = new + + # Always dump stuff when testing spy.set_dump_generated_shaders(True) # spy.set_dump_slang_intermediates(True) @@ -104,6 +138,9 @@ def close_all_devices(): torch.cuda.synchronize() + # Clear device cache + DEVICE_CACHE.clear() + # Close all devices that were created during the tests. for device in Device.get_created_devices(): print(f"Closing device on shutdown {device.desc.label}") @@ -232,6 +269,11 @@ def get_device( if use_cache: DEVICE_CACHE[cache_key] = device + get_lookup_module( + device + ) # Init the slangpy loopup cache up front to make tracking more robust + save_live_objects() # Update live object tracking if caching device as it won't get freed + return device diff --git a/slangpy/testing/plugin.py b/slangpy/testing/plugin.py index 78387f89c..233891242 100644 --- a/slangpy/testing/plugin.py +++ b/slangpy/testing/plugin.py @@ -1,5 +1,6 @@ # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +import gc import pytest import inspect from typing import Any @@ -12,6 +13,9 @@ should_skip_test_for_device, should_skip_non_device_test, SELECTED_DEVICE_TYPES, + DEVICE_CACHE, + save_live_objects, + compare_and_save_live_objects, ) @@ -38,6 +42,63 @@ def pytest_sessionstart(session: pytest.Session): set_device_types(device_types_option) +def check_live_objects(): + gc.collect() + gc.collect() + gc.collect() + + objs = spy.Object.report_live_objects(False) + + num_cache_devices = len(DEVICE_CACHE) + + # Estimate how many of these global types can exist based on number of cached devices. + # Most are 1-to-1, however slangpy can load an extra module per device for type lookups, + # which also results in the potential creation of a program layout per device. + max_expected_counts = { + "Logger": num_cache_devices, + "Device": num_cache_devices, + "HotReload": num_cache_devices, + "SlangSession": num_cache_devices, + "SlangModule": num_cache_devices * 2, + "SlangModuleData": num_cache_devices * 2, + "SlangSessionData": num_cache_devices, + "Fence": num_cache_devices, + "FileSystemWatcher": num_cache_devices, + "ProgramLayout": num_cache_devices, + "CoopVec": num_cache_devices, + } + + # Loggers are known to persist, and the type info is not strictly bounded, as + # type infos used by buffers in slangpy are cached per device. + ignore_classes = [ + "Logger", + "LoggerOutput", + "TypeReflection", + "TypeLayoutReflection", + "NativeSlangType", + ] + + actual_count_by_class_name = {} + for obj in objs: + class_name = obj["class_name"] + if class_name in actual_count_by_class_name: + actual_count_by_class_name[class_name] += 1 + else: + actual_count_by_class_name[class_name] = 1 + + for class_name, count in actual_count_by_class_name.items(): + if class_name in ignore_classes: + continue + if class_name in max_expected_counts: + if count > max_expected_counts[class_name]: + print( + f"Warning: {class_name} count mismatch (expected: {max_expected_counts[class_name]}, actual: {count})" + ) + else: + print(f"Warning: Unexpected {class_name} count (actual: {count})") + raise RuntimeError(f"Unexpected {class_name} count (actual: {count})") + + @pytest.hookimpl(trylast=True) def pytest_sessionfinish(session: pytest.Session, exitstatus: int): close_all_devices() @@ -83,3 +144,21 @@ def pytest_runtest_setup(item: Any) -> None: pytest.skip( f"Skipping non-device test (target devices: {', '.join(target_device_names)})" ) + + +@pytest.hookimpl(wrapper=True) +def pytest_pyfunc_call(pyfuncitem: pytest.Function): + + leaks_mem = pyfuncitem.get_closest_marker("memory_leak") != None + + if not leaks_mem: + save_live_objects() + + # If the outcome is an exception, will raise the exception. + res = yield + + if not leaks_mem: + compare_and_save_live_objects() + + # Return result + return res diff --git a/slangpy/tests/slangpy_tests/test_modules.py b/slangpy/tests/slangpy_tests/test_modules.py index d7439614f..2a539103e 100644 --- a/slangpy/tests/slangpy_tests/test_modules.py +++ b/slangpy/tests/slangpy_tests/test_modules.py @@ -114,6 +114,7 @@ def test_call_mutable_func(device_type: DeviceType): assert np.allclose(data[:2], [0.05, 0.1]) +@pytest.mark.memory_leak("Leaks modules") @pytest.mark.parametrize("device_type", helpers.DEFAULT_DEVICE_TYPES) def test_read_back_with_global_func(device_type: DeviceType): m = load_test_module(device_type) diff --git a/src/sgl/core/object.cpp b/src/sgl/core/object.cpp index 8518e877a..7b531201a 100644 --- a/src/sgl/core/object.cpp +++ b/src/sgl/core/object.cpp @@ -117,26 +117,37 @@ PyObject* Object::self_py() const noexcept #if SGL_ENABLE_OBJECT_TRACKING -void Object::report_live_objects() +std::string LiveObjectInfo::to_string() +{ + return fmt::format( + "address={}, self_py={}, ref_count={}, class_name={}", + fmt::ptr(object), + self_py ? fmt::ptr(self_py) : "null", + ref_count, + class_name + ); +} + +std::vector Object::report_live_objects(bool log_to_tty) { std::lock_guard lock(s_tracked_objects_mutex); + std::vector res; if (!s_tracked_objects.empty()) { - fmt::println("Found {} live objects!", s_tracked_objects.size()); + if (log_to_tty) + fmt::println("Found {} live objects!", s_tracked_objects.size()); for (const Object* object : s_tracked_objects) { uint64_t ref_count = object->ref_count(); PyObject* self_py = object->self_py(); if (self_py) ref_count = object_ref_cnt_py(self_py); - fmt::println( - "Live object: {} self_py={} ref_count={} class_name=\"{}\"", - fmt::ptr(object), - self_py ? fmt::ptr(self_py) : "null", - ref_count, - object->class_name() - ); + LiveObjectInfo info{object, ref_count, self_py, object->class_name()}; + if (log_to_tty) + fmt::println("Live object: {}", info.to_string()); + res.push_back(info); object->report_refs(); } } + return res; } void Object::report_refs() const diff --git a/src/sgl/core/object.h b/src/sgl/core/object.h index 64e2296b6..9ff4ad8c7 100644 --- a/src/sgl/core/object.h +++ b/src/sgl/core/object.h @@ -22,7 +22,7 @@ static_assert(sizeof(Py_ssize_t_) == sizeof(size_t)); /// When enabled, each object derived from Object will have its /// lifetime tracked. This is useful for debugging memory leaks. #ifndef SGL_ENABLE_OBJECT_TRACKING -#define SGL_ENABLE_OBJECT_TRACKING 0 +#define SGL_ENABLE_OBJECT_TRACKING 1 #endif /// Enable/disable reference tracking. @@ -48,6 +48,19 @@ static constexpr bool SGL_TRACK_ALL_REFS{false}; namespace sgl { +class Object; + +#if SGL_ENABLE_OBJECT_TRACKING +struct LiveObjectInfo { + const Object* object; + uint64_t ref_count; + PyObject* self_py; + const char* class_name; + + std::string to_string(); +}; +#endif + /** * \brief Object base class with intrusive reference counting * @@ -143,7 +156,7 @@ class SGL_API Object { #if SGL_ENABLE_OBJECT_TRACKING /// Reports current set of live objects. - static void report_live_objects(); + static std::vector report_live_objects(bool log_to_tty = true); /// Report references of this object. void report_refs() const; diff --git a/src/slangpy_ext/core/object.cpp b/src/slangpy_ext/core/object.cpp index ab5ab8d74..9e0940bd5 100644 --- a/src/slangpy_ext/core/object.cpp +++ b/src/slangpy_ext/core/object.cpp @@ -32,7 +32,27 @@ SGL_PY_EXPORT(core_object) "Base class for all reference counted objects." ) #if SGL_ENABLE_OBJECT_TRACKING - .def_static("report_live_objects", &Object::report_live_objects) + .def_static( + "report_live_objects", + [](bool log_to_tty = true) + { + // We want to avoid creating new live objects by reporting objects, so instead of + // creating bindings for LiveObjectInfo, convert each to a dictionary. + auto live_objects = Object::report_live_objects(log_to_tty); + nb::list result; + for (const auto& info : live_objects) { + nb::dict obj_dict; + obj_dict["object"] = reinterpret_cast(info.object); + obj_dict["ref_count"] = info.ref_count; + obj_dict["self_py"] = reinterpret_cast(info.self_py); + obj_dict["class_name"] = info.class_name; + result.append(obj_dict); + } + return result; + }, + "log_to_tty"_a = true, + "Returns a list of dictionaries containing information about live objects" + ) #endif .def("__repr__", &Object::to_string); } From 6321330cf261ec962d4d3c24ef0977e7b67c65fa Mon Sep 17 00:00:00 2001 From: Chris Cummings Date: Fri, 5 Sep 2025 10:21:32 +0100 Subject: [PATCH 7/8] PR fixes --- src/sgl/device/types.h | 4 ++-- src/slangpy_ext/utils/slangpy.cpp | 8 ++++---- src/slangpy_ext/utils/slangpy.h | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/sgl/device/types.h b/src/sgl/device/types.h index e16b6150f..15613ce84 100644 --- a/src/sgl/device/types.h +++ b/src/sgl/device/types.h @@ -800,13 +800,13 @@ struct HeapReport { { return fmt::format( "HeapReport(\n" - " name = \"{}\",\n" + " label = \"{}\",\n" " num_pages = {},\n" " total_allocated = {},\n" " total_mem_usage = {},\n" " num_allocations = {}\n" ")", - name, + label, num_pages, total_allocated, total_mem_usage, diff --git a/src/slangpy_ext/utils/slangpy.cpp b/src/slangpy_ext/utils/slangpy.cpp index fc3be62e5..490d4276e 100644 --- a/src/slangpy_ext/utils/slangpy.cpp +++ b/src/slangpy_ext/utils/slangpy.cpp @@ -33,9 +33,9 @@ struct GcHelper { void traverse(slangpy::NativeCallRuntimeOptions*, GcVisitor& visitor) { visitor("uniforms"); - visitor("this_ref"); + visitor("_native_this"); } - void clear(slangpy::NativeCallRuntimeOptions* opts) { opts->clear(); } + void clear(slangpy::NativeCallRuntimeOptions* opts) { opts->garbage_collect(); } }; } // namespace sgl @@ -1282,10 +1282,10 @@ SGL_PY_EXPORT(utils_slangpy) D_NA(NativeCallRuntimeOptions, uniforms) ) .def_prop_rw( - "this_ref", + "_native_this", &NativeCallRuntimeOptions::get_this, &NativeCallRuntimeOptions::set_this, - D_NA(NativeCallRuntimeOptions, this_ref) + D_NA(NativeCallRuntimeOptions, _native_this) ) .def_prop_rw( "cuda_stream", diff --git a/src/slangpy_ext/utils/slangpy.h b/src/slangpy_ext/utils/slangpy.h index e2702e563..0fecbda3c 100644 --- a/src/slangpy_ext/utils/slangpy.h +++ b/src/slangpy_ext/utils/slangpy.h @@ -602,7 +602,7 @@ class NativeCallRuntimeOptions : Object { void set_cuda_stream(NativeHandle cuda_stream) { m_cuda_stream = cuda_stream; } /// Clear internal data for garbage collection - void clear() + void garbage_collect() { m_uniforms.clear(); m_this = nb::none(); From 22af5d0244154391ad7602e90c279ecc79ad3bc0 Mon Sep 17 00:00:00 2001 From: Chris Cummings Date: Fri, 5 Sep 2025 12:00:43 +0100 Subject: [PATCH 8/8] Mark lots of things as leaking, clean up tracking system --- slangpy/testing/helpers.py | 86 ++++++++++++------- slangpy/testing/plugin.py | 24 ++++-- slangpy/tests/device/test_lifetimes.py | 4 + slangpy/tests/device/test_print.py | 1 + slangpy/tests/device/test_reflection.py | 4 + slangpy/tests/device/test_torch_interop.py | 2 + slangpy/tests/slangpy_tests/test_modules.py | 5 +- .../tests/slangpy_tests/test_packed_arg.py | 4 + slangpy/tests/slangpy_tests/test_textures.py | 8 ++ .../tests/slangpy_tests/test_torchbuffers.py | 4 + src/sgl/core/object.h | 2 +- 11 files changed, 107 insertions(+), 37 deletions(-) diff --git a/slangpy/testing/helpers.py b/slangpy/testing/helpers.py index c39fc9bb2..d1b32d9ee 100644 --- a/slangpy/testing/helpers.py +++ b/slangpy/testing/helpers.py @@ -7,7 +7,6 @@ from typing import Any, Optional, Sequence, Union, cast from os import PathLike import inspect -import objgraph import pytest import numpy as np @@ -46,6 +45,9 @@ else: raise RuntimeError("Unsupported platform") +# If live object tracking is supported, enable leak tracking +LEAK_TRACKING_ENABLED = hasattr(spy.Object, "report_live_objects") + # Called from pytest plugin if 'device-types' argument is provided def set_device_types(device_types_str: Optional[str]) -> None: @@ -86,34 +88,55 @@ def set_device_types(device_types_str: Optional[str]) -> None: TRACKED_LIVE_OBJECTS: Optional[list[Any]] = None -IGNORE_LIVE_OBJECT_TYPES = ["NativeSlangType", "TypeLayoutReflection", "TypeReflection"] +# Types to ignore when checking for leaked objects +# - The reflection types are created and cached per device when buffers are loaded, so are hard +# to identify as actual leaks. +# - CoopVec is created on demand within the device when the coopvec api is used, and so will appear +# as a leak for cached devices. +IGNORE_LIVE_OBJECT_TYPES = ["NativeSlangType", "TypeLayoutReflection", "TypeReflection", "CoopVec"] def save_live_objects(): - print("Saving live objects") - global TRACKED_LIVE_OBJECTS - TRACKED_LIVE_OBJECTS = spy.Object.report_live_objects(True) - # objgraph.show_growth(limit=1) - - -def compare_and_save_live_objects(): - while gc.collect() > 0: - pass - # objgraph.show_growth() - # objgraph.show_backrefs(objgraph.by_type('ArrayType')[0], max_depth=4, filename='backref.svg') - global TRACKED_LIVE_OBJECTS - new = spy.Object.report_live_objects(True) - if TRACKED_LIVE_OBJECTS: - errors = [] - current_by_address = {x["object"]: x for x in TRACKED_LIVE_OBJECTS} - for obj in new: - if obj["object"] not in current_by_address: - if not obj["class_name"] in IGNORE_LIVE_OBJECT_TYPES: - errors.append(obj) - if len(errors) > 0: - msg = "\n".join([f" {e}" for e in errors]) - raise RuntimeError(f"Leaked objects detected:\n{msg}") - TRACKED_LIVE_OBJECTS = new + if LEAK_TRACKING_ENABLED: + global TRACKED_LIVE_OBJECTS + TRACKED_LIVE_OBJECTS = spy.Object.report_live_objects(True) + + +def compare_and_save_live_objects(allowed_leaks: Optional[dict[str, int]] = None): + if LEAK_TRACKING_ENABLED: + while gc.collect() > 0: + pass + + # Make a copy of allowed_leaks so we don't modify the original dict + allowed_leaks = allowed_leaks.copy() if allowed_leaks else {} + + # Get current live objects and compare to previous captured list + global TRACKED_LIVE_OBJECTS + new = spy.Object.report_live_objects(True) + if TRACKED_LIVE_OBJECTS: + errors = [] + + # Build a lookup by address for fast comparison + current_by_address = {x["object"]: x for x in TRACKED_LIVE_OBJECTS} + + # Find any new objects, and build list of errors + for obj in new: + if obj["object"] not in current_by_address: + cn = obj["class_name"] + if not cn in IGNORE_LIVE_OBJECT_TYPES: + if cn in allowed_leaks: + if allowed_leaks[cn] > 0: + allowed_leaks[cn] -= 1 + continue + errors.append(obj) + + # If any errors, raise runtime error with all of them in + if len(errors) > 0: + msg = "\n".join([f" {e}" for e in errors]) + raise RuntimeError(f"Leaked objects detected:\n{msg}") + + # Store updated live objects list + TRACKED_LIVE_OBJECTS = new # Always dump stuff when testing @@ -268,11 +291,14 @@ def get_device( ) if use_cache: + # Cache device DEVICE_CACHE[cache_key] = device - get_lookup_module( - device - ) # Init the slangpy loopup cache up front to make tracking more robust - save_live_objects() # Update live object tracking if caching device as it won't get freed + + # When leak tracking, init the slangpy loopup cache up front and save live + # objects so that we don't report cached device resources as leaks. + if LEAK_TRACKING_ENABLED: + get_lookup_module(device) + save_live_objects() return device diff --git a/slangpy/testing/plugin.py b/slangpy/testing/plugin.py index 233891242..2281f48ec 100644 --- a/slangpy/testing/plugin.py +++ b/slangpy/testing/plugin.py @@ -16,6 +16,7 @@ DEVICE_CACHE, save_live_objects, compare_and_save_live_objects, + LEAK_TRACKING_ENABLED, ) @@ -149,16 +150,29 @@ def pytest_runtest_setup(item: Any) -> None: @pytest.hookimpl(wrapper=True) def pytest_pyfunc_call(pyfuncitem: pytest.Function): - leaks_mem = pyfuncitem.get_closest_marker("memory_leak") != None + if LEAK_TRACKING_ENABLED: + # Check if leak tests enabled, and optionally read list of allowed leaks + leak_check = True + allowed_leaks = None + leaks_mem_marker = pyfuncitem.get_closest_marker("memory_leak") + if leaks_mem_marker != None: + if hasattr(leaks_mem_marker, "kwargs"): + allowed_leaks = leaks_mem_marker.kwargs.get("details", None) + leak_check = allowed_leaks != None - if not leaks_mem: - save_live_objects() + # If checks enabled, save current live objects. + if leak_check: + save_live_objects() # If the outcome is an exception, will raise the exception. res = yield - if not leaks_mem: - compare_and_save_live_objects() + if LEAK_TRACKING_ENABLED: + # If checks enabled, immediately close any left over devices, then + # check for leaked objects. + if leak_check: + close_leaked_devices() + compare_and_save_live_objects(allowed_leaks) # Return result return res diff --git a/slangpy/tests/device/test_lifetimes.py b/slangpy/tests/device/test_lifetimes.py index 4ffa76dd5..f1de2312b 100644 --- a/slangpy/tests/device/test_lifetimes.py +++ b/slangpy/tests/device/test_lifetimes.py @@ -6,6 +6,7 @@ from slangpy.testing import helpers +@pytest.mark.memory_leak("Leaks logger", details={"Logger": 1}) @pytest.mark.parametrize("device_type", helpers.DEFAULT_DEVICE_TYPES) def test_create_and_destroy_device_via_del(device_type: spy.DeviceType): device = helpers.get_device(device_type, use_cache=False) @@ -13,6 +14,7 @@ def test_create_and_destroy_device_via_del(device_type: spy.DeviceType): del device +@pytest.mark.memory_leak("Leaks logger", details={"Logger": 1}) @pytest.mark.parametrize("device_type", helpers.DEFAULT_DEVICE_TYPES) def test_create_and_destroy_device_via_none(device_type: spy.DeviceType): device = helpers.get_device(device_type, use_cache=False) @@ -20,6 +22,7 @@ def test_create_and_destroy_device_via_none(device_type: spy.DeviceType): device = None +@pytest.mark.memory_leak("Leaks logger", details={"Logger": 1}) @pytest.mark.parametrize("device_type", helpers.DEFAULT_DEVICE_TYPES) def test_load_module_and_cleanup_in_order(device_type: spy.DeviceType): device = helpers.get_device(device_type, use_cache=False) @@ -39,6 +42,7 @@ def test_load_module_and_cleanup_in_order(device_type: spy.DeviceType): device = None +@pytest.mark.memory_leak("Leaks logger", details={"Logger": 1}) @pytest.mark.parametrize("device_type", helpers.DEFAULT_DEVICE_TYPES) def test_load_module_and_cleanup_in_reverse_order(device_type: spy.DeviceType): device = helpers.get_device(device_type, use_cache=False) diff --git a/slangpy/tests/device/test_print.py b/slangpy/tests/device/test_print.py index 95b7b02f2..e46d63104 100644 --- a/slangpy/tests/device/test_print.py +++ b/slangpy/tests/device/test_print.py @@ -7,6 +7,7 @@ from slangpy.testing import helpers +@pytest.mark.memory_leak("Leaks logger", details={"Logger": 1}) @pytest.mark.parametrize("device_type", helpers.DEFAULT_DEVICE_TYPES) def test_print(device_type: spy.DeviceType): device = spy.Device(type=device_type, enable_print=True, label=f"print-{device_type.name}") diff --git a/slangpy/tests/device/test_reflection.py b/slangpy/tests/device/test_reflection.py index 63ebd1ab5..47fefb303 100644 --- a/slangpy/tests/device/test_reflection.py +++ b/slangpy/tests/device/test_reflection.py @@ -1157,6 +1157,10 @@ def test_is_sub_type(test_id: str, device_type: spy.DeviceType): assert module.layout.is_sub_type(t, i) +@pytest.mark.memory_leak( + "Leaks a module", + details={"SlangModule": 1, "SlangModuleData": 2, "SlangSessionData": 1, "ProgramLayout": 1}, +) @pytest.mark.parametrize("device_type", helpers.DEFAULT_DEVICE_TYPES) def test_hot_reload_invalid(test_id: str, device_type: spy.DeviceType): device = helpers.get_device(type=device_type) diff --git a/slangpy/tests/device/test_torch_interop.py b/slangpy/tests/device/test_torch_interop.py index 414bddecc..6018abdf2 100644 --- a/slangpy/tests/device/test_torch_interop.py +++ b/slangpy/tests/device/test_torch_interop.py @@ -8,6 +8,7 @@ from slangpy.testing import helpers +@pytest.mark.memory_leak("Leaks a whole device!") @pytest.mark.parametrize("device_type", helpers.DEFAULT_DEVICE_TYPES) def test_buffer_to_torch(device_type: spy.DeviceType): if device_type == spy.DeviceType.cuda: @@ -50,6 +51,7 @@ def test_buffer_to_torch(device_type: spy.DeviceType): ) +@pytest.mark.memory_leak("Leaks logger", details={"Logger": 1}) @pytest.mark.parametrize("device_type", helpers.DEFAULT_DEVICE_TYPES) def test_torch_interop(device_type: spy.DeviceType): if device_type == spy.DeviceType.cuda: diff --git a/slangpy/tests/slangpy_tests/test_modules.py b/slangpy/tests/slangpy_tests/test_modules.py index 2a539103e..f80f06bd6 100644 --- a/slangpy/tests/slangpy_tests/test_modules.py +++ b/slangpy/tests/slangpy_tests/test_modules.py @@ -114,7 +114,10 @@ def test_call_mutable_func(device_type: DeviceType): assert np.allclose(data[:2], [0.05, 0.1]) -@pytest.mark.memory_leak("Leaks modules") +@pytest.mark.memory_leak( + "Leaks modules, probably issue with looking up types by name", + details={"ShaderProgram": 1, "ShaderProgramData": 1, "SlangModuleData": 2, "SlangModule": 2}, +) @pytest.mark.parametrize("device_type", helpers.DEFAULT_DEVICE_TYPES) def test_read_back_with_global_func(device_type: DeviceType): m = load_test_module(device_type) diff --git a/slangpy/tests/slangpy_tests/test_packed_arg.py b/slangpy/tests/slangpy_tests/test_packed_arg.py index 0cb6ca598..990a5361e 100644 --- a/slangpy/tests/slangpy_tests/test_packed_arg.py +++ b/slangpy/tests/slangpy_tests/test_packed_arg.py @@ -7,6 +7,7 @@ from slangpy.testing import helpers +@pytest.mark.memory_leak("Leaks call data cache", details={"NativeCallDataCache": 1}) @pytest.mark.parametrize("device_type", helpers.DEFAULT_DEVICE_TYPES) def test_simple_int_call(device_type: DeviceType): @@ -28,6 +29,7 @@ def test_simple_int_call(device_type: DeviceType): assert result == 42 +@pytest.mark.memory_leak("Leaks call data cache", details={"NativeCallDataCache": 1}) @pytest.mark.parametrize("device_type", helpers.DEFAULT_DEVICE_TYPES) def test_simple_struct_call(device_type: DeviceType): @@ -53,6 +55,7 @@ def test_simple_struct_call(device_type: DeviceType): assert result == 42 +@pytest.mark.memory_leak("Leaks call data cache", details={"NativeCallDataCache": 1}) @pytest.mark.parametrize("device_type", helpers.DEFAULT_DEVICE_TYPES) def test_vectorize_struct_array(device_type: DeviceType): @@ -88,6 +91,7 @@ def test_vectorize_struct_array(device_type: DeviceType): assert np.array_equal(results, np.array([2, 3, 4, 5], dtype=np.int32)) +@pytest.mark.memory_leak("Leaks call data cache", details={"NativeCallDataCache": 1}) @pytest.mark.parametrize("device_type", helpers.DEFAULT_DEVICE_TYPES) def test_vectorize_struct_with_tensor_array(device_type: DeviceType): if device_type == DeviceType.metal: diff --git a/slangpy/tests/slangpy_tests/test_textures.py b/slangpy/tests/slangpy_tests/test_textures.py index bc8da4692..84bdfc514 100644 --- a/slangpy/tests/slangpy_tests/test_textures.py +++ b/slangpy/tests/slangpy_tests/test_textures.py @@ -502,6 +502,10 @@ def texture_return_value_impl( assert np.allclose(result_np, data.squeeze()) +@pytest.mark.memory_leak( + "Leaks modules, probably issue with looking up types by name", + details={"ShaderProgram": 1, "ShaderProgramData": 1, "SlangModuleData": 2, "SlangModule": 2}, +) @pytest.mark.parametrize( "texel_name", ["uint8_t", "uint16_t", "int8_t", "int16_t", "float", "half", "uint"] ) @@ -516,6 +520,10 @@ def test_texture_return_value(device_type: DeviceType, texel_name: str, dims: in # This case checks for when the return type is the string "texture". # This checks a subset of the "test_texture_return_value" parameters. +@pytest.mark.memory_leak( + "Leaks modules, probably issue with looking up types by name", + details={"ShaderProgram": 1, "ShaderProgramData": 1, "SlangModuleData": 2, "SlangModule": 2}, +) @pytest.mark.parametrize("texel_name", ["float"]) @pytest.mark.parametrize("dims", [1, 2, 3]) @pytest.mark.parametrize("channels", [4]) diff --git a/slangpy/tests/slangpy_tests/test_torchbuffers.py b/slangpy/tests/slangpy_tests/test_torchbuffers.py index bd1e90bbd..c48c02738 100644 --- a/slangpy/tests/slangpy_tests/test_torchbuffers.py +++ b/slangpy/tests/slangpy_tests/test_torchbuffers.py @@ -163,6 +163,7 @@ def run_tensor_race_condition_tests( # Pytest for our most common default cuda-interop case, in which we've configured pytorch # and slangpy to share the same context and stream. +@pytest.mark.memory_leak("Leaks logger", details={"Logger": 1}) @pytest.mark.parametrize("device_type", [spy.DeviceType.cuda]) def test_shared_context_and_stream(device_type: spy.DeviceType): assert ( @@ -174,12 +175,14 @@ def test_shared_context_and_stream(device_type: spy.DeviceType): # Pytest for none-shared context case, which appears to avoid race conditions through some level # of synchronization in the default streams of separate contexts. For now this has shown not # to cause race conditions, so testing for that behaviour. +@pytest.mark.memory_leak("Leaks logger", details={"Logger": 1}) @pytest.mark.parametrize("device_type", [spy.DeviceType.cuda]) def test_non_shared_context(device_type: spy.DeviceType): assert run_tensor_race_condition_tests(share_context=False) == False # Pytest for known race condition case, where we use a custom stream in torch but not sharing it with slangpy. +@pytest.mark.memory_leak("Leaks logger", details={"Logger": 1}) @pytest.mark.parametrize("device_type", [spy.DeviceType.cuda]) def test_custom_stream_no_share(device_type: spy.DeviceType): pytest.skip("Race condition doesn't reproduce reliably on CI machines of varying specs") @@ -190,6 +193,7 @@ def test_custom_stream_no_share(device_type: spy.DeviceType): # Pytest that removes the race condition by sharing the custom stream +@pytest.mark.memory_leak("Leaks logger", details={"Logger": 1}) @pytest.mark.parametrize("device_type", [spy.DeviceType.cuda]) def test_custom_stream_share(device_type: spy.DeviceType): assert ( diff --git a/src/sgl/core/object.h b/src/sgl/core/object.h index 9ff4ad8c7..bec4512de 100644 --- a/src/sgl/core/object.h +++ b/src/sgl/core/object.h @@ -22,7 +22,7 @@ static_assert(sizeof(Py_ssize_t_) == sizeof(size_t)); /// When enabled, each object derived from Object will have its /// lifetime tracked. This is useful for debugging memory leaks. #ifndef SGL_ENABLE_OBJECT_TRACKING -#define SGL_ENABLE_OBJECT_TRACKING 1 +#define SGL_ENABLE_OBJECT_TRACKING 0 #endif /// Enable/disable reference tracking.