Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,9 @@ option(SGL_BUILD_STANDALONE "Build without external dependencies" ON)
# This allows to build CUDA support without having CUDA SDK installed.
option(SGL_USE_DYNAMIC_CUDA "Load CUDA driver API dynamically" ON)

# Enable NVTX profiling annotations
option(SGL_ENABLE_NVTX "Enable NVTX profiling annotations" ON)

# Disable asserts.
# By default, asserts are enabled in both debug and release builds.
# This option can be used to override the default behavior.
Expand Down Expand Up @@ -119,6 +122,20 @@ if(NOT SGL_USE_DYNAMIC_CUDA)
find_package(CUDAToolkit REQUIRED)
endif()

# NVTX
# -----------------------------------------------------------------------------

if(SGL_ENABLE_NVTX)
find_package(CUDAToolkit REQUIRED)
if(CUDAToolkit_FOUND)
add_compile_definitions(SGL_ENABLE_NVTX=1)
message(STATUS "NVTX support enabled")
else()
message(WARNING "NVTX requested but CUDA Toolkit not found")
set(SGL_ENABLE_NVTX OFF)
endif()
endif()

# -----------------------------------------------------------------------------
# Global setup
# -----------------------------------------------------------------------------
Expand Down
58 changes: 58 additions & 0 deletions src/slangpy_ext/utils/slangpy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,10 @@

#include <fmt/format.h>

#ifdef SGL_ENABLE_NVTX
#include <nvtx3/nvToolsExt.h>
#endif

namespace sgl {
extern void write_shader_cursor(ShaderCursor& cursor, nb::object value);
extern nb::ndarray<nb::numpy> buffer_to_numpy(Buffer* self);
Expand Down Expand Up @@ -429,12 +433,22 @@ nb::object NativeCallData::exec(
nb::kwargs kwargs
)
{
#ifdef SGL_ENABLE_NVTX
nvtxRangePush("slangpy_exec_unpack_args");
#endif
// Unpack args and kwargs.
nb::list unpacked_args = unpack_args(args);
nb::dict unpacked_kwargs = unpack_kwargs(kwargs);
#ifdef SGL_ENABLE_NVTX
nvtxRangePop();
nvtxRangePush("slangpy_exec_calc_shape");
#endif

// Calculate call shape.
Shape call_shape = m_runtime->calculate_call_shape(m_call_dimensionality, unpacked_args, unpacked_kwargs, this);
#ifdef SGL_ENABLE_NVTX
nvtxRangePop();
#endif
m_last_call_shape = call_shape;

// Setup context.
Expand Down Expand Up @@ -656,23 +670,51 @@ nb::object NativeCallData::exec(
}
};

#ifdef SGL_ENABLE_NVTX
nvtxRangePush("slangpy_exec_create_encoder");
#endif
// Create temporary command encoder if none is provided.
ref<CommandEncoder> temp_command_encoder;
if (command_encoder == nullptr) {
temp_command_encoder = m_device->create_command_encoder();
command_encoder = temp_command_encoder.get();
}
#ifdef SGL_ENABLE_NVTX
nvtxRangePop();
#endif

bool is_ray_tracing = opts->is_ray_tracing();

if (!is_ray_tracing) {
#ifdef SGL_ENABLE_NVTX
nvtxRangePush("slangpy_exec_begin_compute_pass");
#endif
ref<ComputePassEncoder> pass_encoder = command_encoder->begin_compute_pass();
#ifdef SGL_ENABLE_NVTX
nvtxRangePop();
nvtxRangePush("slangpy_exec_bind_pipeline");
#endif
ComputePipeline* pipeline = dynamic_cast<ComputePipeline*>(m_pipeline.get());
SGL_ASSERT(pipeline != nullptr);
ShaderCursor cursor(pass_encoder->bind_pipeline(pipeline));
#ifdef SGL_ENABLE_NVTX
nvtxRangePop();
nvtxRangePush("slangpy_exec_bind_call_data");
#endif
bind_call_data(cursor);
#ifdef SGL_ENABLE_NVTX
nvtxRangePop();
nvtxRangePush("slangpy_exec_dispatch");
#endif
pass_encoder->dispatch(uint3(total_threads, 1, 1));
#ifdef SGL_ENABLE_NVTX
nvtxRangePop();
nvtxRangePush("slangpy_exec_end_pass");
#endif
pass_encoder->end();
#ifdef SGL_ENABLE_NVTX
nvtxRangePop();
#endif
} else {
ref<RayTracingPassEncoder> pass_encoder = command_encoder->begin_ray_tracing_pass();
RayTracingPipeline* pipeline = dynamic_cast<RayTracingPipeline*>(m_pipeline.get());
Expand All @@ -683,17 +725,26 @@ nb::object NativeCallData::exec(
pass_encoder->end();
}

#ifdef SGL_ENABLE_NVTX
nvtxRangePush("slangpy_exec_submit_cmdbuf");
#endif
// If we created a temporary command encoder, we need to submit it.
if (temp_command_encoder) {
m_device->submit_command_buffer(temp_command_encoder->finish(), CommandQueueType::graphics, cuda_stream);
command_encoder = nullptr;
}
#ifdef SGL_ENABLE_NVTX
nvtxRangePop();
#endif

// If command_encoder is not null, return early.
if (command_encoder != nullptr) {
return nanobind::none();
}

#ifdef SGL_ENABLE_NVTX
nvtxRangePush("slangpy_exec_readback");
#endif
// Read call data post dispatch.
// m_runtime->read_call_data_post_dispatch(context, call_data, unpacked_args, unpacked_kwargs);
for (auto val : read_back) {
Expand All @@ -703,6 +754,10 @@ nb::object NativeCallData::exec(
auto rb_data = t[2];
bvr->python_type()->read_calldata(context, bvr.get(), rb_val, rb_data);
}
#ifdef SGL_ENABLE_NVTX
nvtxRangePop();
nvtxRangePush("slangpy_exec_pack_results");
#endif

// Pack updated 'this' values back.
for (size_t i = 0; i < args.size(); ++i) {
Expand All @@ -711,6 +766,9 @@ nb::object NativeCallData::exec(
for (auto [k, v] : kwargs) {
pack_arg(nb::cast<nb::object>(v), unpacked_kwargs[k]);
}
#ifdef SGL_ENABLE_NVTX
nvtxRangePop();
#endif

// Handle return value based on call mode.
if (m_call_mode == CallMode::prim) {
Expand Down
47 changes: 43 additions & 4 deletions src/slangpy_ext/utils/slangpyfunction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,10 @@
#include "utils/slangpyfunction.h"
#include <fmt/format.h>

#ifdef SGL_ENABLE_NVTX
#include <nvtx3/nvToolsExt.h>
#endif

namespace sgl {

template<>
Expand Down Expand Up @@ -47,6 +51,10 @@ ref<NativeCallData> NativeFunctionNode::build_call_data(NativeCallDataCache* cac

nb::object NativeFunctionNode::call(NativeCallDataCache* cache, nb::args args, nb::kwargs kwargs)
{
#ifdef SGL_ENABLE_NVTX
nvtxRangePush("slangpy_native_call");
#endif

auto options = make_ref<NativeCallRuntimeOptions>();
gather_runtime_options(options);

Expand All @@ -55,26 +63,57 @@ nb::object NativeFunctionNode::call(NativeCallDataCache* cache, nb::args args, n
args = nb::cast<nb::args>(nb::make_tuple(options->get_this()) + args);
}

#ifdef SGL_ENABLE_NVTX
nvtxRangePush("slangpy_signature_build");
#endif

auto builder = make_ref<SignatureBuilder>();
read_signature(builder);
cache->get_args_signature(builder, args, kwargs);

std::string sig = builder->str();
ref<NativeCallData> call_data = cache->find_call_data(sig);

#ifdef SGL_ENABLE_NVTX
nvtxRangePop();
#endif

nb::object result;
if (call_data) {
#ifdef SGL_ENABLE_NVTX
nvtxRangePush("slangpy_call_cached");
#endif
if (call_data->is_torch_integration())
return call_data->_py_torch_call(this, options, args, kwargs);
result = call_data->_py_torch_call(this, options, args, kwargs);
else
return call_data->call(options, args, kwargs);
result = call_data->call(options, args, kwargs);
#ifdef SGL_ENABLE_NVTX
nvtxRangePop();
#endif
} else {
#ifdef SGL_ENABLE_NVTX
nvtxRangePush("slangpy_call_generate");
#endif
ref<NativeCallData> new_call_data = generate_call_data(args, kwargs);
cache->add_call_data(sig, new_call_data);
#ifdef SGL_ENABLE_NVTX
nvtxRangePop();
nvtxRangePush("slangpy_call_new");
#endif
if (new_call_data->is_torch_integration())
return new_call_data->_py_torch_call(this, options, args, kwargs);
result = new_call_data->_py_torch_call(this, options, args, kwargs);
else
return new_call_data->call(options, args, kwargs);
result = new_call_data->call(options, args, kwargs);
#ifdef SGL_ENABLE_NVTX
nvtxRangePop();
#endif
}

#ifdef SGL_ENABLE_NVTX
nvtxRangePop();
#endif

return result;
}

void NativeFunctionNode::append_to(
Expand Down
Loading