Skip to content
Open
3 changes: 3 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -72,3 +72,6 @@ reportInvalidTypeForm = "warning"

[tool.pytest.ini_options]
pythonpath = ["."]
markers = [
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we move this definition to the slangpy pytest plugin somehow?

"memory_leak: Marks test as known to leak objects"
]
70 changes: 69 additions & 1 deletion slangpy/testing/helpers.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

import gc
import hashlib
import sys
from pathlib import Path
Expand All @@ -26,7 +27,7 @@
LogLevel,
NativeHandle,
)
from slangpy.types.buffer import NDBuffer
from slangpy.types.buffer import NDBuffer, get_lookup_module
from slangpy.core.function import Function

# Global variables for device isolation. If SELECTED_DEVICE_TYPES is None, no restriction.
Expand All @@ -44,6 +45,9 @@
else:
raise RuntimeError("Unsupported platform")

# If live object tracking is supported, enable leak tracking
LEAK_TRACKING_ENABLED = hasattr(spy.Object, "report_live_objects")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we move all the leak checking specifics into a new leak_tracking.py module?



# Called from pytest plugin if 'device-types' argument is provided
def set_device_types(device_types_str: Optional[str]) -> None:
Expand Down Expand Up @@ -82,6 +86,59 @@ def set_device_types(device_types_str: Optional[str]) -> None:
USED_TORCH_DEVICES: bool = False
METAL_PARAMETER_BLOCK_SUPPORT: Optional[bool] = None

TRACKED_LIVE_OBJECTS: Optional[list[Any]] = None

# Types to ignore when checking for leaked objects
# - The reflection types are created and cached per device when buffers are loaded, so are hard
# to identify as actual leaks.
# - CoopVec is created on demand within the device when the coopvec api is used, and so will appear
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
# - CoopVec is created on demand within the device when the coopvec api is used, and so will appear
# - The reflection types are created and cached per device when buffers are loaded, so are hard
# to identify as actual leaks.
# - CoopVec is created on demand within the device when the coopvec API is used, and so will appear
# as a leak for cached devices.
IGNORE_LIVE_OBJECT_TYPES = ["NativeSlangType", "TypeLayoutReflection", "TypeReflection", "CoopVec"]

# as a leak for cached devices.
IGNORE_LIVE_OBJECT_TYPES = ["NativeSlangType", "TypeLayoutReflection", "TypeReflection", "CoopVec"]


def save_live_objects():
if LEAK_TRACKING_ENABLED:
global TRACKED_LIVE_OBJECTS
TRACKED_LIVE_OBJECTS = spy.Object.report_live_objects(True)


def compare_and_save_live_objects(allowed_leaks: Optional[dict[str, int]] = None):
if LEAK_TRACKING_ENABLED:
while gc.collect() > 0:
pass

# Make a copy of allowed_leaks so we don't modify the original dict
allowed_leaks = allowed_leaks.copy() if allowed_leaks else {}

# Get current live objects and compare to previous captured list
global TRACKED_LIVE_OBJECTS
new = spy.Object.report_live_objects(True)
if TRACKED_LIVE_OBJECTS:
errors = []

# Build a lookup by address for fast comparison
current_by_address = {x["object"]: x for x in TRACKED_LIVE_OBJECTS}

# Find any new objects, and build list of errors
for obj in new:
if obj["object"] not in current_by_address:
cn = obj["class_name"]
if not cn in IGNORE_LIVE_OBJECT_TYPES:
if cn in allowed_leaks:
if allowed_leaks[cn] > 0:
allowed_leaks[cn] -= 1
continue
errors.append(obj)

# If any errors, raise runtime error with all of them in
if len(errors) > 0:
msg = "\n".join([f" {e}" for e in errors])
raise RuntimeError(f"Leaked objects detected:\n{msg}")

# Store updated live objects list
TRACKED_LIVE_OBJECTS = new


# Always dump stuff when testing
spy.set_dump_generated_shaders(True)
# spy.set_dump_slang_intermediates(True)
Expand All @@ -104,6 +161,9 @@ def close_all_devices():

torch.cuda.synchronize()

# Clear device cache
DEVICE_CACHE.clear()

# Close all devices that were created during the tests.
for device in Device.get_created_devices():
print(f"Closing device on shutdown {device.desc.label}")
Expand Down Expand Up @@ -231,7 +291,15 @@ def get_device(
)

if use_cache:
# Cache device
DEVICE_CACHE[cache_key] = device

# When leak tracking, init the slangpy loopup cache up front and save live
# objects so that we don't report cached device resources as leaks.
if LEAK_TRACKING_ENABLED:
get_lookup_module(device)
save_live_objects()

return device


Expand Down
93 changes: 93 additions & 0 deletions slangpy/testing/plugin.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

import gc
import pytest
import inspect
from typing import Any
Expand All @@ -12,6 +13,10 @@
should_skip_test_for_device,
should_skip_non_device_test,
SELECTED_DEVICE_TYPES,
DEVICE_CACHE,
save_live_objects,
compare_and_save_live_objects,
LEAK_TRACKING_ENABLED,
)


Expand All @@ -38,6 +43,63 @@ def pytest_sessionstart(session: pytest.Session):
set_device_types(device_types_option)


def check_live_objects():
gc.collect()
gc.collect()
gc.collect()

objs = spy.Object.report_live_objects(False)

num_cache_devices = len(DEVICE_CACHE)

# Estimate how many of these global types can exist based on number of cached devices.
# Most are 1-to-1, however slangpy can load an extra module per device for type lookups,
# which also results in the potential creation of a program layout per device.
max_expected_counts = {
"Logger": num_cache_devices,
"Device": num_cache_devices,
"HotReload": num_cache_devices,
"SlangSession": num_cache_devices,
"SlangModule": num_cache_devices * 2,
"SlangModuleData": num_cache_devices * 2,
"SlangSessionData": num_cache_devices,
"Fence": num_cache_devices,
"FileSystemWatcher": num_cache_devices,
"ProgramLayout": num_cache_devices,
"CoopVec": num_cache_devices,
}

# Loggers are known to persist, and the type info is not strictly bounded, as
# type infos used by buffers in slangpy are cached per device.
ignore_classes = [
"Logger",
"LoggerOutput",
"TypeReflection",
"TypeLayoutReflection",
"NativeSlangType",
]

actual_count_by_class_name = {}
for obj in objs:
class_name = obj["class_name"]
if class_name in actual_count_by_class_name:
actual_count_by_class_name[class_name] += 1
else:
actual_count_by_class_name[class_name] = 1

for class_name, count in actual_count_by_class_name.items():
if class_name in ignore_classes:
continue
if class_name in max_expected_counts:
if count > max_expected_counts[class_name]:
print(
f"Warning: {class_name} count mismatch (expected: {max_expected_counts[class_name]}, actual: {count})"
)
else:
print(f"Warning: Unexpected {class_name} count (actual: {count})")
raise RuntimeError(f"Unexpected {class_name} count (actual: {count})")


@pytest.hookimpl(trylast=True)
def pytest_sessionfinish(session: pytest.Session, exitstatus: int):
close_all_devices()
Expand Down Expand Up @@ -83,3 +145,34 @@ def pytest_runtest_setup(item: Any) -> None:
pytest.skip(
f"Skipping non-device test (target devices: {', '.join(target_device_names)})"
)


@pytest.hookimpl(wrapper=True)
def pytest_pyfunc_call(pyfuncitem: pytest.Function):

if LEAK_TRACKING_ENABLED:
# Check if leak tests enabled, and optionally read list of allowed leaks
leak_check = True
allowed_leaks = None
leaks_mem_marker = pyfuncitem.get_closest_marker("memory_leak")
if leaks_mem_marker != None:
if hasattr(leaks_mem_marker, "kwargs"):
allowed_leaks = leaks_mem_marker.kwargs.get("details", None)
leak_check = allowed_leaks != None

# If checks enabled, save current live objects.
if leak_check:
save_live_objects()

# If the outcome is an exception, will raise the exception.
res = yield

if LEAK_TRACKING_ENABLED:
# If checks enabled, immediately close any left over devices, then
# check for leaked objects.
if leak_check:
close_leaked_devices()
compare_and_save_live_objects(allowed_leaks)

# Return result
return res
4 changes: 4 additions & 0 deletions slangpy/tests/device/test_lifetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,20 +6,23 @@
from slangpy.testing import helpers


@pytest.mark.memory_leak("Leaks logger", details={"Logger": 1})
@pytest.mark.parametrize("device_type", helpers.DEFAULT_DEVICE_TYPES)
def test_create_and_destroy_device_via_del(device_type: spy.DeviceType):
device = helpers.get_device(device_type, use_cache=False)
assert device is not None
del device


@pytest.mark.memory_leak("Leaks logger", details={"Logger": 1})
@pytest.mark.parametrize("device_type", helpers.DEFAULT_DEVICE_TYPES)
def test_create_and_destroy_device_via_none(device_type: spy.DeviceType):
device = helpers.get_device(device_type, use_cache=False)
assert device is not None
device = None


@pytest.mark.memory_leak("Leaks logger", details={"Logger": 1})
@pytest.mark.parametrize("device_type", helpers.DEFAULT_DEVICE_TYPES)
def test_load_module_and_cleanup_in_order(device_type: spy.DeviceType):
device = helpers.get_device(device_type, use_cache=False)
Expand All @@ -39,6 +42,7 @@ def test_load_module_and_cleanup_in_order(device_type: spy.DeviceType):
device = None


@pytest.mark.memory_leak("Leaks logger", details={"Logger": 1})
@pytest.mark.parametrize("device_type", helpers.DEFAULT_DEVICE_TYPES)
def test_load_module_and_cleanup_in_reverse_order(device_type: spy.DeviceType):
device = helpers.get_device(device_type, use_cache=False)
Expand Down
1 change: 1 addition & 0 deletions slangpy/tests/device/test_print.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from slangpy.testing import helpers


@pytest.mark.memory_leak("Leaks logger", details={"Logger": 1})
@pytest.mark.parametrize("device_type", helpers.DEFAULT_DEVICE_TYPES)
def test_print(device_type: spy.DeviceType):
device = spy.Device(type=device_type, enable_print=True, label=f"print-{device_type.name}")
Expand Down
4 changes: 4 additions & 0 deletions slangpy/tests/device/test_reflection.py
Original file line number Diff line number Diff line change
Expand Up @@ -1157,6 +1157,10 @@ def test_is_sub_type(test_id: str, device_type: spy.DeviceType):
assert module.layout.is_sub_type(t, i)


@pytest.mark.memory_leak(
"Leaks a module",
details={"SlangModule": 1, "SlangModuleData": 2, "SlangSessionData": 1, "ProgramLayout": 1},
)
@pytest.mark.parametrize("device_type", helpers.DEFAULT_DEVICE_TYPES)
def test_hot_reload_invalid(test_id: str, device_type: spy.DeviceType):
device = helpers.get_device(type=device_type)
Expand Down
2 changes: 2 additions & 0 deletions slangpy/tests/device/test_torch_interop.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from slangpy.testing import helpers


@pytest.mark.memory_leak("Leaks a whole device!")
@pytest.mark.parametrize("device_type", helpers.DEFAULT_DEVICE_TYPES)
def test_buffer_to_torch(device_type: spy.DeviceType):
if device_type == spy.DeviceType.cuda:
Expand Down Expand Up @@ -50,6 +51,7 @@ def test_buffer_to_torch(device_type: spy.DeviceType):
)


@pytest.mark.memory_leak("Leaks logger", details={"Logger": 1})
@pytest.mark.parametrize("device_type", helpers.DEFAULT_DEVICE_TYPES)
def test_torch_interop(device_type: spy.DeviceType):
if device_type == spy.DeviceType.cuda:
Expand Down
4 changes: 4 additions & 0 deletions slangpy/tests/slangpy_tests/test_modules.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,10 @@ def test_call_mutable_func(device_type: DeviceType):
assert np.allclose(data[:2], [0.05, 0.1])


@pytest.mark.memory_leak(
"Leaks modules, probably issue with looking up types by name",
details={"ShaderProgram": 1, "ShaderProgramData": 1, "SlangModuleData": 2, "SlangModule": 2},
)
@pytest.mark.parametrize("device_type", helpers.DEFAULT_DEVICE_TYPES)
def test_read_back_with_global_func(device_type: DeviceType):
m = load_test_module(device_type)
Expand Down
4 changes: 4 additions & 0 deletions slangpy/tests/slangpy_tests/test_packed_arg.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from slangpy.testing import helpers


@pytest.mark.memory_leak("Leaks call data cache", details={"NativeCallDataCache": 1})
@pytest.mark.parametrize("device_type", helpers.DEFAULT_DEVICE_TYPES)
def test_simple_int_call(device_type: DeviceType):

Expand All @@ -28,6 +29,7 @@ def test_simple_int_call(device_type: DeviceType):
assert result == 42


@pytest.mark.memory_leak("Leaks call data cache", details={"NativeCallDataCache": 1})
@pytest.mark.parametrize("device_type", helpers.DEFAULT_DEVICE_TYPES)
def test_simple_struct_call(device_type: DeviceType):

Expand All @@ -53,6 +55,7 @@ def test_simple_struct_call(device_type: DeviceType):
assert result == 42


@pytest.mark.memory_leak("Leaks call data cache", details={"NativeCallDataCache": 1})
@pytest.mark.parametrize("device_type", helpers.DEFAULT_DEVICE_TYPES)
def test_vectorize_struct_array(device_type: DeviceType):

Expand Down Expand Up @@ -88,6 +91,7 @@ def test_vectorize_struct_array(device_type: DeviceType):
assert np.array_equal(results, np.array([2, 3, 4, 5], dtype=np.int32))


@pytest.mark.memory_leak("Leaks call data cache", details={"NativeCallDataCache": 1})
@pytest.mark.parametrize("device_type", helpers.DEFAULT_DEVICE_TYPES)
def test_vectorize_struct_with_tensor_array(device_type: DeviceType):
if device_type == DeviceType.metal:
Expand Down
8 changes: 8 additions & 0 deletions slangpy/tests/slangpy_tests/test_textures.py
Original file line number Diff line number Diff line change
Expand Up @@ -502,6 +502,10 @@ def texture_return_value_impl(
assert np.allclose(result_np, data.squeeze())


@pytest.mark.memory_leak(
"Leaks modules, probably issue with looking up types by name",
details={"ShaderProgram": 1, "ShaderProgramData": 1, "SlangModuleData": 2, "SlangModule": 2},
)
@pytest.mark.parametrize(
"texel_name", ["uint8_t", "uint16_t", "int8_t", "int16_t", "float", "half", "uint"]
)
Expand All @@ -516,6 +520,10 @@ def test_texture_return_value(device_type: DeviceType, texel_name: str, dims: in

# This case checks for when the return type is the string "texture".
# This checks a subset of the "test_texture_return_value" parameters.
@pytest.mark.memory_leak(
"Leaks modules, probably issue with looking up types by name",
details={"ShaderProgram": 1, "ShaderProgramData": 1, "SlangModuleData": 2, "SlangModule": 2},
)
@pytest.mark.parametrize("texel_name", ["float"])
@pytest.mark.parametrize("dims", [1, 2, 3])
@pytest.mark.parametrize("channels", [4])
Expand Down
4 changes: 4 additions & 0 deletions slangpy/tests/slangpy_tests/test_torchbuffers.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,7 @@ def run_tensor_race_condition_tests(

# Pytest for our most common default cuda-interop case, in which we've configured pytorch
# and slangpy to share the same context and stream.
@pytest.mark.memory_leak("Leaks logger", details={"Logger": 1})
@pytest.mark.parametrize("device_type", [spy.DeviceType.cuda])
def test_shared_context_and_stream(device_type: spy.DeviceType):
assert (
Expand All @@ -174,12 +175,14 @@ def test_shared_context_and_stream(device_type: spy.DeviceType):
# Pytest for none-shared context case, which appears to avoid race conditions through some level
# of synchronization in the default streams of separate contexts. For now this has shown not
# to cause race conditions, so testing for that behaviour.
@pytest.mark.memory_leak("Leaks logger", details={"Logger": 1})
@pytest.mark.parametrize("device_type", [spy.DeviceType.cuda])
def test_non_shared_context(device_type: spy.DeviceType):
assert run_tensor_race_condition_tests(share_context=False) == False


# Pytest for known race condition case, where we use a custom stream in torch but not sharing it with slangpy.
@pytest.mark.memory_leak("Leaks logger", details={"Logger": 1})
@pytest.mark.parametrize("device_type", [spy.DeviceType.cuda])
def test_custom_stream_no_share(device_type: spy.DeviceType):
pytest.skip("Race condition doesn't reproduce reliably on CI machines of varying specs")
Expand All @@ -190,6 +193,7 @@ def test_custom_stream_no_share(device_type: spy.DeviceType):


# Pytest that removes the race condition by sharing the custom stream
@pytest.mark.memory_leak("Leaks logger", details={"Logger": 1})
@pytest.mark.parametrize("device_type", [spy.DeviceType.cuda])
def test_custom_stream_share(device_type: spy.DeviceType):
assert (
Expand Down
Loading
Loading