From 12ec62859bd03d6d35aba8c0640ef8fca339794c Mon Sep 17 00:00:00 2001 From: szihs <675653+szihs@users.noreply.github.com> Date: Wed, 26 Nov 2025 11:30:53 +0000 Subject: [PATCH] DXR SER: Add dual-API support for NVAPI and DXR 1.3 native MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implements Shader Execution Reordering (SER) with support for both: - NVAPI path (existing, requires hlsl_nvapi capability) - DXR 1.3 native path (new, requires SM 6.9+ without NVAPI) Key changes: - Dual HitObject intrinsics: NVAPI vs DXR 1.3 native implementations - New DXR 1.3-only methods: GetRayFlags, GetRayTMin, MakeMiss, FromRayQuery - Smart HLSL emission: NvHitObject vs dx::HitObject based on capabilities - New capability atoms: ser_hlsl_native, ser_dxr, ser_nvapi - MaybeReorderThread aliases for DXR 1.3 compatibility 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- .../a3-02-reference-capability-atoms.md | 17 +- source/slang/hlsl.meta.slang | 361 +++++++++++++++--- source/slang/slang-capabilities.capdef | 25 +- source/slang/slang-emit-hlsl.cpp | 34 +- .../raygen-trace-ray-param-non-struct.slang | 2 +- .../vkray/raygen-trace-ray-param-struct.slang | 2 +- 6 files changed, 373 insertions(+), 68 deletions(-) diff --git a/docs/user-guide/a3-02-reference-capability-atoms.md b/docs/user-guide/a3-02-reference-capability-atoms.md index 6e94b8ca7fc..f8ca54c9c90 100644 --- a/docs/user-guide/a3-02-reference-capability-atoms.md +++ b/docs/user-guide/a3-02-reference-capability-atoms.md @@ -669,6 +669,9 @@ Extensions `SPV_NV_tensor_addressing` > Represents the SPIR-V extension for SPV_NV_tensor_addressing. +`ser_hlsl_native` +> DXR 1.3 native SER support (SM 6.9, no NVAPI required) + `spvAtomicFloat16AddEXT` > Represents the SPIR-V capability for atomic float 16 add operations. @@ -1240,7 +1243,7 @@ Compound Capabilities > Collection of shader stages `ser` -> Capabilities needed for shader-execution-reordering +> Capabilities needed for shader-execution-reordering (all paths) `ser_any_closesthit_intersection_miss` > Collection of capabilities for raytracing + shader execution reordering and the shader stages of anyhit, closesthit, intersection, and miss. @@ -1251,6 +1254,15 @@ Compound Capabilities `ser_anyhit_closesthit_intersection` > Collection of capabilities for raytracing + shader execution reordering and the shader stages of anyhit, closesthit, and intersection. +`ser_dxr` +> Capabilities needed for shader-execution-reordering (native DXR 1.3 path) + +`ser_dxr_raygen` +> Collection of capabilities for DXR 1.3 native SER (HLSL only) with raygen stage. + +`ser_dxr_raygen_closesthit_miss` +> Collection of capabilities for DXR 1.3 native SER (HLSL only) with raygen, closesthit, miss stages. + `ser_motion` > Capabilities needed for shader-execution-reordering and motion-blur @@ -1260,6 +1272,9 @@ Compound Capabilities `ser_motion_raygen_closesthit_miss` > Collection of capabilities for raytracing + motion blur + shader execution reordering and the shader stages of raygen, closesthit, and miss. +`ser_nvapi` +> Capabilities needed for shader-execution-reordering (NVAPI path for HLSL) + `ser_raygen` > Collection of capabilities for raytracing + shader execution reordering and the shader stage of raygen. diff --git a/source/slang/hlsl.meta.slang b/source/slang/hlsl.meta.slang index 76cfa5e5114..7e9b8278bb8 100644 --- a/source/slang/hlsl.meta.slang +++ b/source/slang/hlsl.meta.slang @@ -21270,19 +21270,20 @@ struct HitObject } } - /// Creates a HitObject representing “NOP” (no operation) which is neither a hit nor a miss. Invoking a + /// Creates a HitObject representing "NOP" (no operation) which is neither a hit nor a miss. Invoking a /// NOP hit object using HitObject::Invoke has no effect. Reordering by hit objects using /// ReorderThread will group NOP hit objects together. This can be useful in some reordering /// scenarios where future control flow for some threads is known to process neither a hit nor a /// miss. - [__requiresNVAPI] [ForceInline] - [require(cuda_glsl_hlsl_spirv, ser_raygen_closesthit_miss)] + [require(cuda_glsl_spirv, ser_raygen_closesthit_miss)] + [require(hlsl_nvapi, ser_raygen_closesthit_miss)] + [require(hlsl, ser_dxr_raygen_closesthit_miss)] static HitObject MakeNop() { __target_switch { - case hlsl: + case hlsl_nvapi: __intrinsic_asm "($0 = NvMakeNop())"; case glsl: __glslMakeNop(__return_val); @@ -21295,27 +21296,42 @@ struct HitObject OpHitObjectRecordEmptyNV /**/ &__return_val; }; + case hlsl: + // DXR 1.3 native path (hlsl without nvapi) + __intrinsic_asm "($0 = dx::HitObject::MakeNop())"; } } - [require(hlsl, ser)] + [require(hlsl_nvapi, ser_nvapi)] __generic - static void __InvokeHLSL( + static void __InvokeHLSL_NVAPI( RaytracingAccelerationStructure AccelerationStructure, HitObject HitOrMiss, inout payload_t Payload) { __target_switch { - case hlsl: __intrinsic_asm "NvInvokeHitObject"; + case hlsl_nvapi: __intrinsic_asm "NvInvokeHitObject"; + } + } + + [require(hlsl, ser_dxr)] + __generic + static void __InvokeHLSL_DXR( + HitObject HitOrMiss, + inout payload_t Payload) + { + __target_switch + { + case hlsl: __intrinsic_asm "dx::HitObject::Invoke"; } } /// Invokes closesthit or miss shading for the specified hit object. In case of a NOP HitObject, no - /// shader is invoked. - [__requiresNVAPI] + /// shader is invoked. (NVAPI/SPIRV/CUDA variant with AccelerationStructure parameter) [ForceInline] - [require(cuda_glsl_hlsl_spirv, ser_raygen_closesthit_miss)] + [require(cuda_glsl_spirv, ser_raygen_closesthit_miss)] + [require(hlsl_nvapi, ser_raygen_closesthit_miss)] static void Invoke( RaytracingAccelerationStructure AccelerationStructure, HitObject HitOrMiss, @@ -21323,8 +21339,8 @@ struct HitObject { __target_switch { - case hlsl: - __InvokeHLSL( + case hlsl_nvapi: + __InvokeHLSL_NVAPI( AccelerationStructure, HitOrMiss, __forceVarIntoRayPayloadStructTemporarily(Payload)); @@ -21365,8 +21381,24 @@ struct HitObject } } + /// Invokes closesthit or miss shading for the specified hit object. In case of a NOP HitObject, no + /// shader is invoked. (DXR 1.3 variant without AccelerationStructure parameter) + [ForceInline] + [require(hlsl, ser_dxr_raygen_closesthit_miss)] + static void Invoke( + HitObject HitOrMiss, + inout payload_t Payload) + { + __target_switch + { + case hlsl: + __InvokeHLSL_DXR( + HitOrMiss, + __forceVarIntoRayPayloadStructTemporarily(Payload)); + } + } + /// Returns true if the HitObject encodes a miss, otherwise returns false. - [__requiresNVAPI] [ForceInline] [require(cuda_glsl_hlsl_spirv, ser_raygen_closesthit_miss)] bool IsMiss() @@ -21387,7 +21419,6 @@ struct HitObject } /// Returns true if the HitObject encodes a hit, otherwise returns false. - [__requiresNVAPI] [ForceInline] [require(cuda_glsl_hlsl_spirv, ser_raygen_closesthit_miss)] bool IsHit() @@ -21408,7 +21439,6 @@ struct HitObject } /// Returns true if the HitObject encodes a nop, otherwise returns false. - [__requiresNVAPI] [ForceInline] [require(cuda_glsl_hlsl_spirv, ser_raygen_closesthit_miss)] bool IsNop() @@ -21429,14 +21459,15 @@ struct HitObject } /// Queries ray properties from HitObject. Valid if the hit object represents a hit or a miss. - [__requiresNVAPI] + /// Note: This is an NVAPI/SPIRV/CUDA extension. DXR 1.3 uses individual accessors (GetRayTMin, GetRayTCurrent, etc.) [ForceInline] - [require(cuda_glsl_hlsl_spirv, ser_raygen_closesthit_miss)] + [require(cuda_glsl_spirv, ser_raygen_closesthit_miss)] + [require(hlsl_nvapi, ser_raygen_closesthit_miss)] RayDesc GetRayDesc() { __target_switch { - case hlsl: + case hlsl_nvapi: __intrinsic_asm ".GetRayDesc"; case glsl: { @@ -21459,7 +21490,6 @@ struct HitObject } /// Queries shader table index from HitObject. Valid if the hit object represents a hit or a miss. - [__requiresNVAPI] __glsl_extension(GL_EXT_ray_tracing) [ForceInline] [require(cuda_glsl_hlsl_spirv, ser_raygen_closesthit_miss)] @@ -21467,7 +21497,8 @@ struct HitObject { __target_switch { - case hlsl: __intrinsic_asm ".GetShaderTableIndex"; + case hlsl: + __intrinsic_asm ".GetShaderTableIndex"; case glsl: __intrinsic_asm "hitObjectGetShaderBindingTableRecordIndexNV($0)"; case cuda: __intrinsic_asm "slangOptixHitObjectGetSbtRecordIndex"; case spirv: @@ -21480,7 +21511,6 @@ struct HitObject } } - [__requiresNVAPI] __glsl_extension(GL_EXT_ray_tracing) [ForceInline] [require(cuda_hlsl, ser_raygen_closesthit_miss)] @@ -21488,12 +21518,12 @@ struct HitObject { __target_switch { - case hlsl: __intrinsic_asm ".SetShaderTableIndex"; + case hlsl: + __intrinsic_asm ".SetShaderTableIndex"; case cuda: __intrinsic_asm "slangOptixHitObjectSetSbtRecordIndex"; } } /// Returns the instance index of a hit. Valid if the hit object represents a hit. - [__requiresNVAPI] __glsl_extension(GL_EXT_ray_tracing) [ForceInline] [require(cuda_glsl_hlsl_spirv, ser_raygen_closesthit_miss)] @@ -21501,7 +21531,8 @@ struct HitObject { __target_switch { - case hlsl: __intrinsic_asm ".GetInstanceIndex"; + case hlsl: + __intrinsic_asm ".GetInstanceIndex"; case glsl: __intrinsic_asm "hitObjectGetInstanceIdNV($0)"; case cuda: __intrinsic_asm "slangOptixHitObjectGetInstanceIndex"; case spirv: @@ -21515,7 +21546,6 @@ struct HitObject } /// Returns the instance ID of a hit. Valid if the hit object represents a hit. - [__requiresNVAPI] __glsl_extension(GL_EXT_ray_tracing) [ForceInline] [require(cuda_glsl_hlsl_spirv, ser_raygen_closesthit_miss)] @@ -21523,7 +21553,8 @@ struct HitObject { __target_switch { - case hlsl: __intrinsic_asm ".GetInstanceID"; + case hlsl: + __intrinsic_asm ".GetInstanceID"; case glsl: __intrinsic_asm "hitObjectGetInstanceCustomIndexNV($0)"; case cuda: __intrinsic_asm "slangOptixHitObjectGetInstanceId"; case spirv: @@ -21537,7 +21568,6 @@ struct HitObject } /// Returns the geometry index of a hit. Valid if the hit object represents a hit. - [__requiresNVAPI] __glsl_extension(GL_EXT_ray_tracing) [ForceInline] [require(cuda_glsl_hlsl_spirv, ser_raygen_closesthit_miss)] @@ -21545,7 +21575,8 @@ struct HitObject { __target_switch { - case hlsl: __intrinsic_asm ".GetGeometryIndex"; + case hlsl: + __intrinsic_asm ".GetGeometryIndex"; case glsl: __intrinsic_asm "hitObjectGetGeometryIndexNV($0)"; case cuda: __intrinsic_asm "slangOptixHitObjectGetSbtGASIndex"; case spirv: @@ -21559,7 +21590,6 @@ struct HitObject } /// Returns the primitive index of a hit. Valid if the hit object represents a hit. - [__requiresNVAPI] __glsl_extension(GL_EXT_ray_tracing) [ForceInline] [require(cuda_glsl_hlsl_spirv, ser_raygen_closesthit_miss)] @@ -21567,7 +21597,8 @@ struct HitObject { __target_switch { - case hlsl: __intrinsic_asm ".GetPrimitiveIndex"; + case hlsl: + __intrinsic_asm ".GetPrimitiveIndex"; case glsl: __intrinsic_asm "hitObjectGetPrimitiveIndexNV($0)"; case cuda: __intrinsic_asm "slangOptixHitObjectGetPrimitiveIndex"; case spirv: @@ -21581,7 +21612,6 @@ struct HitObject } /// Returns the hit kind. Valid if the hit object represents a hit. - [__requiresNVAPI] __glsl_extension(GL_EXT_ray_tracing) [ForceInline] [require(cuda_glsl_hlsl_spirv, ser_raygen_closesthit_miss)] @@ -21589,7 +21619,8 @@ struct HitObject { __target_switch { - case hlsl: __intrinsic_asm ".GetHitKind"; + case hlsl: + __intrinsic_asm ".GetHitKind"; case glsl: __intrinsic_asm "hitObjectGetHitKindNV($0)"; case cuda: __intrinsic_asm "optixHitObjectGetHitKind()"; case spirv: @@ -21603,16 +21634,17 @@ struct HitObject } /// Returns the cluster ID of the current hit. Valid if the hit object represents a hit. - [__requiresNVAPI] + /// Note: This is an NVAPI/SPIRV/CUDA extension, not available in DXR 1.3. __glsl_extension(GL_NV_cluster_acceleration_structure) __glsl_extension(GL_EXT_ray_tracing) [ForceInline] - [require(cuda_glsl_hlsl_spirv, ser_raygen_closesthit_miss)] + [require(cuda_glsl_spirv, ser_raygen_closesthit_miss)] + [require(hlsl_nvapi, ser_raygen_closesthit_miss)] int GetClusterID() { __target_switch { - case hlsl: __intrinsic_asm ".GetClusterID"; + case hlsl_nvapi: __intrinsic_asm ".GetClusterID"; case glsl: __intrinsic_asm "hitObjectGetClusterIdNV($0)"; case cuda: __intrinsic_asm "slangOptixHitObjectGetClusterId"; case spirv: @@ -21725,15 +21757,17 @@ struct HitObject } } - [__requiresNVAPI] + /// Returns 4x3 world-to-object transform matrix. Valid if the hit object represents a hit. + /// Note: NVAPI/SPIRV/GLSL extension. DXR 1.3 uses GetWorldToObject3x4() and GetWorldToObject4x3(). __glsl_extension(GL_EXT_ray_tracing) [ForceInline] - [require(glsl_hlsl_spirv, ser_raygen_closesthit_miss)] + [require(glsl_spirv, ser_raygen_closesthit_miss)] + [require(hlsl_nvapi, ser_raygen_closesthit_miss)] float4x3 GetWorldToObject() { __target_switch { - case hlsl: __intrinsic_asm ".GetWorldToObject"; + case hlsl_nvapi: __intrinsic_asm ".GetWorldToObject"; case glsl: __intrinsic_asm "hitObjectGetWorldToObjectNV($0)"; case spirv: return spirv_asm @@ -21745,15 +21779,17 @@ struct HitObject } } - [__requiresNVAPI] + /// Returns 4x3 object-to-world transform matrix. Valid if the hit object represents a hit. + /// Note: NVAPI/SPIRV/GLSL extension. DXR 1.3 uses GetObjectToWorld3x4() and GetObjectToWorld4x3(). __glsl_extension(GL_EXT_ray_tracing) [ForceInline] - [require(glsl_hlsl_spirv, ser_raygen_closesthit_miss)] + [require(glsl_spirv, ser_raygen_closesthit_miss)] + [require(hlsl_nvapi, ser_raygen_closesthit_miss)] float4x3 GetObjectToWorld() { __target_switch { - case hlsl: __intrinsic_asm ".GetObjectToWorld"; + case hlsl_nvapi: __intrinsic_asm ".GetObjectToWorld"; case glsl: __intrinsic_asm "hitObjectGetObjectToWorldNV($0)"; case spirv: return spirv_asm @@ -21782,11 +21818,14 @@ struct HitObject } } + /// Returns object-space ray origin. Valid if the hit object represents a hit. [ForceInline] - [require(cuda_glsl_spirv, ser_raygen_closesthit_miss)] + [require(cuda_glsl_hlsl_spirv, ser_raygen_closesthit_miss)] float3 GetObjectRayOrigin() { __target_switch { + case hlsl: + __intrinsic_asm ".GetObjectRayOrigin"; case glsl: __intrinsic_asm "hitObjectGetObjectRayOriginNV($0)"; case cuda: __intrinsic_asm "optixGetObjectRayOrigin()"; @@ -21800,11 +21839,14 @@ struct HitObject } } + /// Returns object-space ray direction. Valid if the hit object represents a hit. [ForceInline] - [require(cuda_glsl_spirv, ser_raygen_closesthit_miss)] + [require(cuda_glsl_hlsl_spirv, ser_raygen_closesthit_miss)] float3 GetObjectRayDirection() { __target_switch { + case hlsl: + __intrinsic_asm ".GetObjectRayDirection"; case glsl: __intrinsic_asm "hitObjectGetObjectRayDirectionNV($0)"; case cuda: __intrinsic_asm "optixGetObjectRayDirection()"; @@ -21875,7 +21917,6 @@ struct HitObject } /// Loads a root constant from the local root table referenced by the hit object. Valid if the hit object /// represents a hit or a miss. RootConstantOffsetInBytes must be a multiple of 4. - [__requiresNVAPI] [require(cuda_hlsl, ser_raygen_closesthit_miss)] uint LoadLocalRootTableConstant(uint RootConstantOffsetInBytes) { @@ -21886,6 +21927,168 @@ struct HitObject } } + // + // DXR 1.3-only methods (not available in NVAPI/SPIRV/CUDA) + // + + /// Returns the ray flags associated with the hit object. Valid if the hit object represents a hit or a miss. + /// DXR 1.3 only. + [ForceInline] + [require(hlsl, ser_dxr_raygen_closesthit_miss)] + uint GetRayFlags() + { + __target_switch + { + case hlsl: __intrinsic_asm ".GetRayFlags"; + } + } + + /// Returns the parametric starting point (TMin) of the ray. Valid if the hit object represents a hit or a miss. + /// DXR 1.3 only. + [ForceInline] + [require(hlsl, ser_dxr_raygen_closesthit_miss)] + float GetRayTMin() + { + __target_switch + { + case hlsl: __intrinsic_asm ".GetRayTMin"; + } + } + + /// Returns the parametric ending point (T value at hit/miss). Valid if the hit object represents a hit or a miss. + /// DXR 1.3 only. + [ForceInline] + [require(hlsl, ser_dxr_raygen_closesthit_miss)] + float GetRayTCurrent() + { + __target_switch + { + case hlsl: __intrinsic_asm ".GetRayTCurrent"; + } + } + + /// Returns the world-space ray origin. Valid if the hit object represents a hit or a miss. + /// DXR 1.3 only. + [ForceInline] + [require(hlsl, ser_dxr_raygen_closesthit_miss)] + float3 GetWorldRayOrigin() + { + __target_switch + { + case hlsl: __intrinsic_asm ".GetWorldRayOrigin"; + } + } + + /// Returns the world-space ray direction. Valid if the hit object represents a hit or a miss. + /// DXR 1.3 only. + [ForceInline] + [require(hlsl, ser_dxr_raygen_closesthit_miss)] + float3 GetWorldRayDirection() + { + __target_switch + { + case hlsl: __intrinsic_asm ".GetWorldRayDirection"; + } + } + + /// Returns the 3x4 object-to-world transform matrix. Valid if the hit object represents a hit. + /// DXR 1.3 only. + [ForceInline] + [require(hlsl, ser_dxr_raygen_closesthit_miss)] + float3x4 GetObjectToWorld3x4() + { + __target_switch + { + case hlsl: __intrinsic_asm ".GetObjectToWorld3x4"; + } + } + + /// Returns the 4x3 object-to-world transform matrix (transposed). Valid if the hit object represents a hit. + /// DXR 1.3 only. + [ForceInline] + [require(hlsl, ser_dxr_raygen_closesthit_miss)] + float4x3 GetObjectToWorld4x3() + { + __target_switch + { + case hlsl: __intrinsic_asm ".GetObjectToWorld4x3"; + } + } + + /// Returns the 3x4 world-to-object transform matrix. Valid if the hit object represents a hit. + /// DXR 1.3 only. + [ForceInline] + [require(hlsl, ser_dxr_raygen_closesthit_miss)] + float3x4 GetWorldToObject3x4() + { + __target_switch + { + case hlsl: __intrinsic_asm ".GetWorldToObject3x4"; + } + } + + /// Returns the 4x3 world-to-object transform matrix (transposed). Valid if the hit object represents a hit. + /// DXR 1.3 only. + [ForceInline] + [require(hlsl, ser_dxr_raygen_closesthit_miss)] + float4x3 GetWorldToObject4x3() + { + __target_switch + { + case hlsl: __intrinsic_asm ".GetWorldToObject4x3"; + } + } + + /// Creates a HitObject representing a miss with specified ray flags. + /// DXR 1.3 only - includes RayFlags parameter. + [ForceInline] + [require(hlsl, ser_dxr_raygen_closesthit_miss)] + static HitObject MakeMiss( + uint RayFlags, + uint MissShaderIndex, + RayDesc Ray) + { + __target_switch + { + case hlsl: + __intrinsic_asm "($3 = dx::HitObject::MakeMiss($0, $1, $2))"; + } + } + + /// Creates a HitObject from a committed hit in a RayQuery. + /// The RayQuery must have a committed hit (COMMITTED_TRIANGLE_HIT or COMMITTED_PROCEDURAL_PRIMITIVE_HIT). + /// DXR 1.3 only. + [ForceInline] + [require(hlsl, ser_dxr_raygen)] + __generic + static HitObject FromRayQuery(RayQuery Query) + { + __target_switch + { + case hlsl: + __intrinsic_asm "($1 = dx::HitObject::FromRayQuery($0))"; + } + } + + /// Creates a HitObject from a committed hit in a RayQuery with custom hit attributes. + /// The RayQuery must have a committed procedural hit (COMMITTED_PROCEDURAL_PRIMITIVE_HIT). + /// CommittedCustomHitKind specifies the user-defined hit kind (must be >= HIT_KIND_USER_DEFINED_MIN). + /// DXR 1.3 only. + [ForceInline] + [require(hlsl, ser_dxr_raygen)] + __generic + static HitObject FromRayQuery( + RayQuery Query, + uint CommittedCustomHitKind, + attr_t CommittedCustomAttribs) + { + __target_switch + { + case hlsl: + __intrinsic_asm "($3 = dx::HitObject::FromRayQuery($0, $1, $2))"; + } + } + /// /// !!!! Internal NVAPI HLSL impl. Not part of interface! !!!!!!!!!!!! /// @@ -21900,8 +22103,8 @@ struct HitObject } } - [__requiresNVAPI] - [require(hlsl, ser_raygen_closesthit_miss)] + [require(hlsl_nvapi, ser_raygen_closesthit_miss)] + [require(hlsl, ser_dxr_raygen_closesthit_miss)] static void __hlslMakeHitWithRecordIndex( uint HitGroupRecordIndex, RaytracingAccelerationStructure AccelerationStructure, @@ -21915,12 +22118,13 @@ struct HitObject { __target_switch { - case hlsl: __intrinsic_asm "NvMakeHitWithRecordIndex"; + case hlsl_nvapi: __intrinsic_asm "NvMakeHitWithRecordIndex"; + case hlsl: __intrinsic_asm "($8 = dx::HitObject::MakeHit($0, $1, $2, $3, $4, $5, $6, $7))"; } } - [__requiresNVAPI] - [require(hlsl, ser_raygen_closesthit_miss)] + [require(hlsl_nvapi, ser_raygen_closesthit_miss)] + [require(hlsl, ser_dxr_raygen_closesthit_miss)] static void __hlslMakeHit(RaytracingAccelerationStructure AccelerationStructure, uint InstanceIndex, uint GeometryIndex, @@ -21934,12 +22138,13 @@ struct HitObject { __target_switch { - case hlsl: __intrinsic_asm "NvMakeHit"; + case hlsl_nvapi: __intrinsic_asm "NvMakeHit"; + case hlsl: __intrinsic_asm "($9 = dx::HitObject::MakeHit($0, $1, $2, $3, $4, $5, $6, $7, $8))"; } } - [__requiresNVAPI] - [require(hlsl, ser_raygen_closesthit_miss)] + [require(hlsl_nvapi, ser_raygen_closesthit_miss)] + [require(hlsl, ser_dxr_raygen_closesthit_miss)] static void __hlslTraceRay( RaytracingAccelerationStructure AccelerationStructure, uint RayFlags, @@ -21953,7 +22158,8 @@ struct HitObject { __target_switch { - case hlsl: __intrinsic_asm "NvTraceRayHitObject"; + case hlsl_nvapi: __intrinsic_asm "NvTraceRayHitObject"; + case hlsl: __intrinsic_asm "($8 = dx::HitObject::TraceRay($0, $1, $2, $3, $4, $5, $6, $7))"; } } @@ -22245,9 +22451,8 @@ struct HitObject /// Applications should set this to the lowest value required to represent all possible values in /// CoherenceHint. For best performance, all threads should provide the same value for /// NumCoherenceHintBits. - /// Where possible, reordering will also attempt to retain locality in the thread’s launch indices + /// Where possible, reordering will also attempt to retain locality in the thread's launch indices /// (DispatchRaysIndex in DXR). -[__requiresNVAPI] __glsl_extension(GL_EXT_ray_tracing) __glsl_extension(GL_NV_shader_invocation_reorder) [ForceInline] @@ -22256,7 +22461,8 @@ void ReorderThread( uint CoherenceHint, uint NumCoherenceHintBitsFromLSB ) { __target_switch { - case hlsl: __intrinsic_asm "NvReorderThread"; + case hlsl_nvapi: __intrinsic_asm "NvReorderThread"; + case hlsl: __intrinsic_asm "dx::MaybeReorderThread"; case glsl: __intrinsic_asm "reorderThreadNV"; case cuda: __intrinsic_asm "optixReorder"; case spirv: @@ -22285,7 +22491,6 @@ void ReorderThread( uint CoherenceHint, uint NumCoherenceHintBitsFromLSB ) /// groups, it will attempt to order threads by the value of their coherence hints. And within ranges /// of equal coherence hints, it will attempt to maximize locality in 3D space of the ray hit (if any). -[__requiresNVAPI] __glsl_extension(GL_EXT_ray_tracing) __glsl_extension(GL_NV_shader_invocation_reorder) [ForceInline] @@ -22294,7 +22499,8 @@ void ReorderThread( HitObject HitOrMiss, uint CoherenceHint, uint NumCoherenceHi { __target_switch { - case hlsl: __intrinsic_asm "NvReorderThread"; + case hlsl_nvapi: __intrinsic_asm "NvReorderThread"; + case hlsl: __intrinsic_asm "dx::MaybeReorderThread"; case glsl: __intrinsic_asm "reorderThreadNV"; case cuda: __intrinsic_asm "optixReorder($1, $2)"; case spirv: @@ -22313,7 +22519,6 @@ void ReorderThread( HitObject HitOrMiss, uint CoherenceHint, uint NumCoherenceHi // ``` // With CoherenceHint and NumCoherenceHintBitsFromLSB as 0, meaning they are ignored. -[__requiresNVAPI] __glsl_extension(GL_EXT_ray_tracing) __glsl_extension(GL_NV_shader_invocation_reorder) [ForceInline] @@ -22322,7 +22527,8 @@ void ReorderThread( HitObject HitOrMiss ) { __target_switch { - case hlsl: __intrinsic_asm "NvReorderThread"; + case hlsl_nvapi: __intrinsic_asm "NvReorderThread"; + case hlsl: __intrinsic_asm "dx::MaybeReorderThread"; case glsl: __intrinsic_asm "reorderThreadNV"; case cuda: __intrinsic_asm "optixReorder()"; case spirv: @@ -22335,6 +22541,41 @@ void ReorderThread( HitObject HitOrMiss ) } } +// DXR 1.3 naming: MaybeReorderThread (alias for ReorderThread for DXR compatibility) + + /// DXR 1.3 MaybeReorderThread - reorders threads based on a coherence hint value. +[ForceInline] +[require(hlsl, ser_dxr_raygen)] +void MaybeReorderThread( uint CoherenceHint, uint NumCoherenceHintBitsFromLSB ) +{ + __target_switch + { + case hlsl: __intrinsic_asm "dx::MaybeReorderThread"; + } +} + + /// DXR 1.3 MaybeReorderThread - reorders threads based on a hit object and coherence hint. +[ForceInline] +[require(hlsl, ser_dxr_raygen)] +void MaybeReorderThread( HitObject HitOrMiss, uint CoherenceHint, uint NumCoherenceHintBitsFromLSB ) +{ + __target_switch + { + case hlsl: __intrinsic_asm "dx::MaybeReorderThread"; + } +} + + /// DXR 1.3 MaybeReorderThread - reorders threads based on a hit object. +[ForceInline] +[require(hlsl, ser_dxr_raygen)] +void MaybeReorderThread( HitObject HitOrMiss ) +{ + __target_switch + { + case hlsl: __intrinsic_asm "dx::MaybeReorderThread"; + } +} + /// /// DebugBreak support /// diff --git a/source/slang/slang-capabilities.capdef b/source/slang/slang-capabilities.capdef index 9c9afe0e0f4..324aae3dfc7 100644 --- a/source/slang/slang-capabilities.capdef +++ b/source/slang/slang-capabilities.capdef @@ -1176,9 +1176,22 @@ alias nvapi = hlsl_nvapi; /// Capabilities needed for minimal raytracing support /// [Compound] alias raytracing = GL_EXT_ray_tracing | _sm_6_3 | cuda; -/// Capabilities needed for shader-execution-reordering +/// Capabilities needed for shader-execution-reordering (NVAPI path for HLSL) /// [Compound] -alias ser = raytracing + GL_NV_shader_invocation_reorder | raytracing + hlsl_nvapi | cuda; +alias ser_nvapi = raytracing + hlsl_nvapi; + +/// DXR 1.3 native SER support (SM 6.9, no NVAPI required) +/// [EXT] +def ser_hlsl_native : _sm_6_9; + +/// Capabilities needed for shader-execution-reordering (native DXR 1.3 path) +/// [Compound] +alias ser_dxr = raytracing + ser_hlsl_native; + +/// Capabilities needed for shader-execution-reordering (all paths) +/// [Compound] +alias ser = raytracing + GL_NV_shader_invocation_reorder | ser_nvapi | ser_dxr | cuda; + /// Capabilities needed for raytracing-motionblur /// [Compound] alias motionblur = GL_NV_ray_tracing_motion_blur | _sm_6_3 + hlsl_nvapi | cuda; @@ -2402,6 +2415,14 @@ alias ser_motion_raygen_closesthit_miss = raygen_closesthit_miss + ser_motion; /// [Compound] alias ser_motion_raygen = raygen + ser_motion; +/// Collection of capabilities for DXR 1.3 native SER (HLSL only) with raygen, closesthit, miss stages. +/// [Compound] +alias ser_dxr_raygen_closesthit_miss = raygen_closesthit_miss + ser_dxr; + +/// Collection of capabilities for DXR 1.3 native SER (HLSL only) with raygen stage. +/// [Compound] +alias ser_dxr_raygen = raygen + ser_dxr; + /// User should not use this capability /// [Other] alias all = _sm_6_9 + hlsl_nvapi diff --git a/source/slang/slang-emit-hlsl.cpp b/source/slang/slang-emit-hlsl.cpp index a5d634ebfbe..081d52a774e 100644 --- a/source/slang/slang-emit-hlsl.cpp +++ b/source/slang/slang-emit-hlsl.cpp @@ -1453,7 +1453,26 @@ void HLSLSourceEmitter::emitSimpleTypeImpl(IRType* type) } case kIROp_HitObjectType: { - m_writer->emit("NvHitObject"); + // Check if NVAPI is explicitly enabled first + auto targetCaps = getTargetReq()->getTargetCaps(); + auto nvapiCapabilitySet = CapabilitySet(CapabilityName::hlsl_nvapi); + auto sm69CapabilitySet = CapabilitySet(CapabilityName::_sm_6_9); + + if (targetCaps.implies(nvapiCapabilitySet)) + { + // Explicit NVAPI: use NvHitObject + m_writer->emit("NvHitObject"); + } + else if (targetCaps.implies(sm69CapabilitySet)) + { + // DXR 1.3 standard: use dx::HitObject namespace + m_writer->emit("dx::HitObject"); + } + else + { + // Fallback to legacy NVAPI + m_writer->emit("NvHitObject"); + } return; } case kIROp_TextureFootprintType: @@ -1862,9 +1881,18 @@ void HLSLSourceEmitter::handleRequiredCapabilitiesImpl(IRInst* inst) } } -void HLSLSourceEmitter::emitFrontMatterImpl(TargetRequest*) +void HLSLSourceEmitter::emitFrontMatterImpl(TargetRequest* targetReq) { - if (m_extensionTracker->m_requiresNVAPI) + // Check if NVAPI is required either by decoration or by capability + bool requiresNVAPI = m_extensionTracker->m_requiresNVAPI; + if (!requiresNVAPI && targetReq) + { + auto targetCaps = targetReq->getTargetCaps(); + auto nvapiCapabilitySet = CapabilitySet(CapabilityName::hlsl_nvapi); + requiresNVAPI = targetCaps.implies(nvapiCapabilitySet); + } + + if (requiresNVAPI) { // If the generated code includes implicit NVAPI use, // then we need to ensure that NVAPI support is included diff --git a/tests/vkray/raygen-trace-ray-param-non-struct.slang b/tests/vkray/raygen-trace-ray-param-non-struct.slang index 72d85ed026a..256b930d5ac 100644 --- a/tests/vkray/raygen-trace-ray-param-non-struct.slang +++ b/tests/vkray/raygen-trace-ray-param-non-struct.slang @@ -1,4 +1,4 @@ -//TEST:SIMPLE(filecheck=CHECK): -target hlsl -stage raygeneration -entry main +//TEST:SIMPLE(filecheck=CHECK): -target hlsl -stage raygeneration -entry main -capability hlsl_nvapi RaytracingAccelerationStructure as; diff --git a/tests/vkray/raygen-trace-ray-param-struct.slang b/tests/vkray/raygen-trace-ray-param-struct.slang index d9a6d134a3f..560604244a8 100644 --- a/tests/vkray/raygen-trace-ray-param-struct.slang +++ b/tests/vkray/raygen-trace-ray-param-struct.slang @@ -1,4 +1,4 @@ -//TEST:SIMPLE(filecheck=CHECK): -target hlsl -stage raygeneration -entry main +//TEST:SIMPLE(filecheck=CHECK): -target hlsl -stage raygeneration -entry main -capability hlsl_nvapi RaytracingAccelerationStructure as;