diff --git a/CHANGELOG.md b/CHANGELOG.md index 5e9bc2d0f63..e1309c8f283 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -86,6 +86,7 @@ By @Vecvec in [#7913](https://github.com/gfx-rs/wgpu/pull/7913). #### DX12 - Allow disabling waiting for latency waitable object. By @marcpabst in [#7400](https://github.com/gfx-rs/wgpu/pull/7400) +- Add mesh shader support, including to the example. By @SupaMaggie70Incorporated in [#8110](https://github.com/gfx-rs/wgpu/issues/7219) ### Bug Fixes diff --git a/examples/features/src/framework.rs b/examples/features/src/framework.rs index 7a3017848fa..67c859052c0 100644 --- a/examples/features/src/framework.rs +++ b/examples/features/src/framework.rs @@ -32,6 +32,10 @@ pub trait Example: 'static + Sized { wgpu::Limits::downlevel_webgl2_defaults() // These downlevel limits will allow the code to run on all possible hardware } + fn supported_backends() -> wgpu::Backends { + wgpu::Backends::all() + } + fn init( config: &wgpu::SurfaceConfiguration, adapter: &wgpu::Adapter, @@ -268,9 +272,10 @@ impl ExampleContext { async fn init_async(surface: &mut SurfaceWrapper, window: Arc) -> Self { log::info!("Initializing wgpu..."); - let instance = wgpu::Instance::new(&wgpu::InstanceDescriptor::from_env_or_default()); + let mut instance_descriptor = wgpu::InstanceDescriptor::from_env_or_default(); + instance_descriptor.backends &= E::supported_backends(); + let instance = wgpu::Instance::new(&instance_descriptor); surface.pre_adapter(&instance, window); - let adapter = get_adapter_with_capabilities_or_from_env( &instance, &E::required_features(), diff --git a/examples/features/src/mesh_shader/mod.rs b/examples/features/src/mesh_shader/mod.rs index 956722a661d..b5706a9c5bb 100644 --- a/examples/features/src/mesh_shader/mod.rs +++ b/examples/features/src/mesh_shader/mod.rs @@ -20,7 +20,6 @@ fn compile_glsl( .spawn() .expect("Failed to call glslc"); cmd.stdin.as_ref().unwrap().write_all(data).unwrap(); - println!("{shader_stage}"); let output = cmd.wait_with_output().expect("Error waiting for glslc"); assert!(output.status.success()); unsafe { @@ -32,6 +31,39 @@ fn compile_glsl( )) } } +fn compile_hlsl(device: &wgpu::Device, entry: &str, stage_str: &str) -> wgpu::ShaderModule { + let out_path = format!( + "{}/src/mesh_shader/shader.{stage_str}.cso", + env!("CARGO_MANIFEST_DIR") + ); + let cmd = std::process::Command::new("dxc") + .args([ + "-T", + &format!("{stage_str}_6_5"), + "-E", + entry, + &format!("{}/src/mesh_shader/shader.hlsl", env!("CARGO_MANIFEST_DIR")), + "-Fo", + &out_path, + ]) + .output() + .unwrap(); + if !cmd.status.success() { + panic!("DXC failed:\n{}", String::from_utf8(cmd.stderr).unwrap()); + } + let file = std::fs::read(&out_path).unwrap(); + std::fs::remove_file(out_path).unwrap(); + unsafe { + device.create_shader_module_passthrough(wgpu::ShaderModuleDescriptorPassthrough::Dxil( + wgpu::ShaderModuleDescriptorDxil { + entry_point: entry.to_owned(), + label: None, + source: &file, + num_workgroups: (0, 0, 0), + }, + )) + } +} pub struct Example { pipeline: wgpu::RenderPipeline, @@ -43,16 +75,27 @@ impl crate::framework::Example for Example { device: &wgpu::Device, _queue: &wgpu::Queue, ) -> Self { + let features = device.features(); + let (ts, ms, fs) = if features.contains(wgpu::Features::SPIRV_SHADER_PASSTHROUGH) { + ( + compile_glsl(device, include_bytes!("shader.task"), "task"), + compile_glsl(device, include_bytes!("shader.mesh"), "mesh"), + compile_glsl(device, include_bytes!("shader.frag"), "frag"), + ) + } else if features.contains(wgpu::Features::HLSL_DXIL_SHADER_PASSTHROUGH) { + ( + compile_hlsl(device, "Task", "as"), + compile_hlsl(device, "Mesh", "ms"), + compile_hlsl(device, "Frag", "ps"), + ) + } else { + panic!("Device must support SPIRV_SHADER_PASSTHROUGH or HLSL_DXIL_SHADER_PASSTHROUGH"); + }; let pipeline_layout = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor { label: None, bind_group_layouts: &[], push_constant_ranges: &[], }); - let (ts, ms, fs) = ( - compile_glsl(device, include_bytes!("shader.task"), "task"), - compile_glsl(device, include_bytes!("shader.mesh"), "mesh"), - compile_glsl(device, include_bytes!("shader.frag"), "frag"), - ); let pipeline = device.create_mesh_pipeline(&wgpu::MeshPipelineDescriptor { label: None, layout: Some(&pipeline_layout), @@ -119,11 +162,19 @@ impl crate::framework::Example for Example { Default::default() } fn required_features() -> wgpu::Features { - wgpu::Features::EXPERIMENTAL_MESH_SHADER | wgpu::Features::SPIRV_SHADER_PASSTHROUGH + wgpu::Features::EXPERIMENTAL_MESH_SHADER } fn required_limits() -> wgpu::Limits { wgpu::Limits::defaults().using_recommended_minimum_mesh_shader_values() } + fn optional_features() -> wgpu::Features { + wgpu::Features::SPIRV_SHADER_PASSTHROUGH | wgpu::Features::HLSL_DXIL_SHADER_PASSTHROUGH + } + // This is because the passthrough features are optional despite at least one + // being required + fn supported_backends() -> wgpu::Backends { + wgpu::Backends::VULKAN | wgpu::Backends::DX12 + } fn resize( &mut self, _config: &wgpu::SurfaceConfiguration, diff --git a/examples/features/src/mesh_shader/shader.hlsl b/examples/features/src/mesh_shader/shader.hlsl new file mode 100644 index 00000000000..e70961f2d71 --- /dev/null +++ b/examples/features/src/mesh_shader/shader.hlsl @@ -0,0 +1,53 @@ +struct OutVertex { + float4 Position : SV_POSITION; + float4 Color: COLOR; +}; +struct OutPrimitive { + float4 ColorMask : COLOR_MASK : PRIMITIVE; + bool CullPrimitive: SV_CullPrimitive; +}; +struct InVertex { + float4 Color: COLOR; +}; +struct InPrimitive { + float4 ColorMask : COLOR_MASK : PRIMITIVE; +}; +struct PayloadData { + float4 ColorMask; + bool Visible; +}; + + +static const float4 positions[3] = {float4(0., 1.0, 0., 1.0), float4(-1.0, -1.0, 0., 1.0), float4(1.0, -1.0, 0., 1.0)}; +static const float4 colors[3] = {float4(0., 1., 0., 1.), float4(0., 0., 1., 1.), float4(1., 0., 0., 1.)}; + +groupshared PayloadData outPayload; + +[numthreads(1, 1, 1)] +void Task() { + outPayload.ColorMask = float4(1.0, 1.0, 0.0, 1.0); + outPayload.Visible = true; + DispatchMesh(3, 1, 1, outPayload); +} + +[outputtopology("triangle")] +[numthreads(1, 1, 1)] +void Mesh(out indices uint3 triangles[1], out vertices OutVertex vertices[3], out primitives OutPrimitive primitives[1], in payload PayloadData payload) { + SetMeshOutputCounts(3, 1); + + vertices[0].Position = positions[0]; + vertices[1].Position = positions[1]; + vertices[2].Position = positions[2]; + + vertices[0].Color = colors[0] * payload.ColorMask; + vertices[1].Color = colors[1] * payload.ColorMask; + vertices[2].Color = colors[2] * payload.ColorMask; + + triangles[0] = uint3(0, 1, 2); + primitives[0].ColorMask = float4(1.0, 0.0, 0.0, 1.0); + primitives[0].CullPrimitive = !payload.Visible; +} + +float4 Frag(InVertex vertex, InPrimitive primitive) : SV_Target { + return vertex.Color * primitive.ColorMask; +} diff --git a/examples/features/src/mesh_shader/shader.mesh b/examples/features/src/mesh_shader/shader.mesh index 7d350e8ce77..c2579670d30 100644 --- a/examples/features/src/mesh_shader/shader.mesh +++ b/examples/features/src/mesh_shader/shader.mesh @@ -20,18 +20,18 @@ vertexOutput[]; layout(location = 1) perprimitiveEXT out PrimitiveOutput { vec4 colorMask; } primitiveOutput[]; -shared uint sharedData; - layout(triangles, max_vertices = 3, max_primitives = 1) out; void main() { - sharedData = 5; SetMeshOutputsEXT(3, 1); + gl_MeshVerticesEXT[0].gl_Position = positions[0]; gl_MeshVerticesEXT[1].gl_Position = positions[1]; gl_MeshVerticesEXT[2].gl_Position = positions[2]; + vertexOutput[0].color = colors[0] * payloadData.colorMask; vertexOutput[1].color = colors[1] * payloadData.colorMask; vertexOutput[2].color = colors[2] * payloadData.colorMask; + gl_PrimitiveTriangleIndicesEXT[gl_LocalInvocationIndex] = uvec3(0, 1, 2); primitiveOutput[0].colorMask = vec4(1.0, 0.0, 1.0, 1.0); gl_MeshPrimitivesEXT[0].gl_CullPrimitiveEXT = !payloadData.visible; diff --git a/examples/features/src/mesh_shader/shader.task b/examples/features/src/mesh_shader/shader.task index 6c766bc83ae..04cdef2d5c2 100644 --- a/examples/features/src/mesh_shader/shader.task +++ b/examples/features/src/mesh_shader/shader.task @@ -1,7 +1,7 @@ #version 450 #extension GL_EXT_mesh_shader : require -layout(local_size_x = 4, local_size_y = 1, local_size_z = 1) in; +layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; struct TaskPayload { vec4 colorMask; diff --git a/naga/src/back/hlsl/mod.rs b/naga/src/back/hlsl/mod.rs index 8df06cf1323..c7747eb3fac 100644 --- a/naga/src/back/hlsl/mod.rs +++ b/naga/src/back/hlsl/mod.rs @@ -283,7 +283,8 @@ impl crate::ShaderStage { Self::Vertex => "vs", Self::Fragment => "ps", Self::Compute => "cs", - Self::Task | Self::Mesh => unreachable!(), + Self::Task => "as", + Self::Mesh => "ms", } } } diff --git a/tests/tests/wgpu-gpu/mesh_shader/basic.hlsl b/tests/tests/wgpu-gpu/mesh_shader/basic.hlsl new file mode 100644 index 00000000000..89a7276d3c0 --- /dev/null +++ b/tests/tests/wgpu-gpu/mesh_shader/basic.hlsl @@ -0,0 +1,41 @@ +struct OutVertex { + float4 Position : SV_POSITION; + float4 Color: COLOR; +}; +struct InVertex { + float4 Color: COLOR; +}; + + +static const float4 positions[3] = {float4(0., 1.0, 0., 1.0), float4(-1.0, -1.0, 0., 1.0), float4(1.0, -1.0, 0., 1.0)}; +static const float4 colors[3] = {float4(0., 1., 0., 1.), float4(0., 0., 1., 1.), float4(1., 0., 0., 1.)}; + +struct EmptyPayload { + uint _nullField; +}; +groupshared EmptyPayload _emptyPayload; + +[numthreads(4, 1, 1)] +void Task() { + DispatchMesh(1, 1, 1, _emptyPayload); +} + +[outputtopology("triangle")] +[numthreads(1, 1, 1)] +void Mesh(out indices uint3 triangles[1], out vertices OutVertex vertices[3], in payload EmptyPayload _emptyPayload) { + SetMeshOutputCounts(3, 1); + + vertices[0].Position = positions[0]; + vertices[1].Position = positions[1]; + vertices[2].Position = positions[2]; + + vertices[0].Color = colors[0]; + vertices[1].Color = colors[1]; + vertices[2].Color = colors[2]; + + triangles[0] = uint3(0, 1, 2); +} + +float4 Frag(InVertex vertex) : SV_Target { + return vertex.Color; +} diff --git a/tests/tests/wgpu-gpu/mesh_shader/mod.rs b/tests/tests/wgpu-gpu/mesh_shader/mod.rs index 8a0bdf4b80f..e32668280f3 100644 --- a/tests/tests/wgpu-gpu/mesh_shader/mod.rs +++ b/tests/tests/wgpu-gpu/mesh_shader/mod.rs @@ -1,3 +1,4 @@ +use nanorand::Rng; use std::{io::Write, process::Stdio}; use wgpu::util::DeviceExt; @@ -6,15 +7,30 @@ use wgpu_test::{ }; pub fn all_tests(tests: &mut Vec) { - tests.extend([ - MESH_PIPELINE_BASIC_MESH, - MESH_PIPELINE_BASIC_TASK_MESH, - MESH_PIPELINE_BASIC_MESH_FRAG, - MESH_PIPELINE_BASIC_TASK_MESH_FRAG, - MESH_DRAW_INDIRECT, - MESH_MULTI_DRAW_INDIRECT, - MESH_MULTI_DRAW_INDIRECT_COUNT, - ]); + { + use vulkan::*; + tests.extend([ + MESH_PIPELINE_BASIC_MESH, + MESH_PIPELINE_BASIC_TASK_MESH, + MESH_PIPELINE_BASIC_MESH_FRAG, + MESH_PIPELINE_BASIC_TASK_MESH_FRAG, + MESH_DRAW_INDIRECT, + MESH_MULTI_DRAW_INDIRECT, + MESH_MULTI_DRAW_INDIRECT_COUNT, + ]); + } + { + use dx12::*; + tests.extend([ + MESH_PIPELINE_BASIC_MESH, + MESH_PIPELINE_BASIC_TASK_MESH, + MESH_PIPELINE_BASIC_MESH_FRAG, + MESH_PIPELINE_BASIC_TASK_MESH_FRAG, + MESH_DRAW_INDIRECT, + MESH_MULTI_DRAW_INDIRECT, + MESH_MULTI_DRAW_INDIRECT_COUNT, + ]); + } } // Same as in mesh shader example @@ -37,7 +53,6 @@ fn compile_glsl( .spawn() .expect("Failed to call glslc"); cmd.stdin.as_ref().unwrap().write_all(data).unwrap(); - println!("{shader_stage}"); let output = cmd.wait_with_output().expect("Error waiting for glslc"); assert!(output.status.success()); unsafe { @@ -50,6 +65,71 @@ fn compile_glsl( } } +fn compile_hlsl(device: &wgpu::Device, entry: &str, stage_str: &str) -> wgpu::ShaderModule { + // Each test needs its own files + let rand: u32 = nanorand::tls_rng().generate(); + let out_path = format!( + "{}/tests/wgpu-gpu/mesh_shader/{rand}.{stage_str}.cso", + env!("CARGO_MANIFEST_DIR") + ); + let cmd = std::process::Command::new("dxc") + .args([ + "-T", + &format!("{stage_str}_6_5"), + "-E", + entry, + &format!( + "{}/tests/wgpu-gpu/mesh_shader/basic.hlsl", + env!("CARGO_MANIFEST_DIR") + ), + "-Fo", + &out_path, + ]) + .output() + .unwrap(); + if !cmd.status.success() { + panic!("DXC failed:\n{}", String::from_utf8(cmd.stderr).unwrap()); + } + let file = std::fs::read(&out_path).unwrap(); + std::fs::remove_file(out_path).unwrap(); + unsafe { + device.create_shader_module_passthrough(wgpu::ShaderModuleDescriptorPassthrough::Dxil( + wgpu::ShaderModuleDescriptorDxil { + entry_point: entry.to_owned(), + label: None, + source: &file, + num_workgroups: (0, 0, 0), + }, + )) + } +} + +fn get_shaders( + device: &wgpu::Device, +) -> (wgpu::ShaderModule, wgpu::ShaderModule, wgpu::ShaderModule) { + if device + .features() + .contains(wgpu::Features::SPIRV_SHADER_PASSTHROUGH) + { + ( + compile_glsl(device, include_bytes!("basic.task"), "task"), + compile_glsl(device, include_bytes!("basic.mesh"), "mesh"), + compile_glsl(device, include_bytes!("basic.frag"), "frag"), + ) + } else if device + .features() + .contains(wgpu::Features::HLSL_DXIL_SHADER_PASSTHROUGH) + { + ( + compile_hlsl(device, "Task", "as"), + compile_hlsl(device, "Mesh", "ms"), + compile_hlsl(device, "Frag", "ps"), + ) + } else { + unreachable!() + } +} + fn create_depth( device: &wgpu::Device, ) -> (wgpu::Texture, wgpu::TextureView, wgpu::DepthStencilState) { @@ -79,18 +159,12 @@ fn create_depth( (depth_texture, depth_view, state) } -fn mesh_pipeline_build( - ctx: &TestingContext, - task: Option<&[u8]>, - mesh: &[u8], - frag: Option<&[u8]>, - draw: bool, -) { +fn mesh_pipeline_build(ctx: &TestingContext, use_task: bool, use_frag: bool, draw: bool) { let device = &ctx.device; let (_depth_image, depth_view, depth_state) = create_depth(device); - let task = task.map(|t| compile_glsl(device, t, "task")); - let mesh = compile_glsl(device, mesh, "mesh"); - let frag = frag.map(|f| compile_glsl(device, f, "frag")); + let (task, mesh, frag) = get_shaders(device); + let task = if use_task { Some(task) } else { None }; + let frag = if use_frag { Some(frag) } else { None }; let layout = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor { label: None, bind_group_layouts: &[], @@ -162,9 +236,7 @@ pub enum DrawType { fn mesh_draw(ctx: &TestingContext, draw_type: DrawType) { let device = &ctx.device; let (_depth_image, depth_view, depth_state) = create_depth(device); - let task = compile_glsl(device, BASIC_TASK, "task"); - let mesh = compile_glsl(device, BASIC_MESH, "mesh"); - let frag = compile_glsl(device, NO_WRITE_FRAG, "frag"); + let (task, mesh, frag) = get_shaders(device); let layout = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor { label: None, bind_group_layouts: &[], @@ -256,18 +328,16 @@ fn mesh_draw(ctx: &TestingContext, draw_type: DrawType) { ctx.device.poll(wgpu::PollType::Wait).unwrap(); } -const BASIC_TASK: &[u8] = include_bytes!("basic.task"); -const BASIC_MESH: &[u8] = include_bytes!("basic.mesh"); -//const BASIC_FRAG: &[u8] = include_bytes!("basic.frag.spv"); -const NO_WRITE_FRAG: &[u8] = include_bytes!("no-write.frag"); - -fn default_gpu_test_config(draw_type: DrawType) -> GpuTestConfiguration { +fn default_gpu_test_config( + draw_type: DrawType, + passthrough_feature: wgpu::Features, +) -> GpuTestConfiguration { GpuTestConfiguration::new().parameters( TestParameters::default() .test_features_limits() .features( wgpu::Features::EXPERIMENTAL_MESH_SHADER - | wgpu::Features::SPIRV_SHADER_PASSTHROUGH + | passthrough_feature | match draw_type { DrawType::Standard | DrawType::Indirect => wgpu::Features::empty(), DrawType::MultiIndirect => wgpu::Features::MULTI_DRAW_INDIRECT, @@ -279,46 +349,52 @@ fn default_gpu_test_config(draw_type: DrawType) -> GpuTestConfiguration { } // Mesh pipeline configs -#[gpu_test] -static MESH_PIPELINE_BASIC_MESH: GpuTestConfiguration = default_gpu_test_config(DrawType::Standard) - .run_sync(|ctx| { - mesh_pipeline_build(&ctx, None, BASIC_MESH, None, true); - }); -#[gpu_test] -static MESH_PIPELINE_BASIC_TASK_MESH: GpuTestConfiguration = - default_gpu_test_config(DrawType::Standard).run_sync(|ctx| { - mesh_pipeline_build(&ctx, Some(BASIC_TASK), BASIC_MESH, None, true); - }); -#[gpu_test] -static MESH_PIPELINE_BASIC_MESH_FRAG: GpuTestConfiguration = - default_gpu_test_config(DrawType::Standard).run_sync(|ctx| { - mesh_pipeline_build(&ctx, None, BASIC_MESH, Some(NO_WRITE_FRAG), true); - }); -#[gpu_test] -static MESH_PIPELINE_BASIC_TASK_MESH_FRAG: GpuTestConfiguration = - default_gpu_test_config(DrawType::Standard).run_sync(|ctx| { - mesh_pipeline_build( - &ctx, - Some(BASIC_TASK), - BASIC_MESH, - Some(NO_WRITE_FRAG), - true, - ); - }); +macro_rules! make_tests { + ($feature:expr) => { + #[gpu_test] + pub static MESH_PIPELINE_BASIC_MESH: GpuTestConfiguration = + default_gpu_test_config(DrawType::Standard, $feature).run_sync(|ctx| { + mesh_pipeline_build(&ctx, false, false, true); + }); + #[gpu_test] + pub static MESH_PIPELINE_BASIC_TASK_MESH: GpuTestConfiguration = + default_gpu_test_config(DrawType::Standard, $feature).run_sync(|ctx| { + mesh_pipeline_build(&ctx, true, false, true); + }); + #[gpu_test] + pub static MESH_PIPELINE_BASIC_MESH_FRAG: GpuTestConfiguration = + default_gpu_test_config(DrawType::Standard, $feature).run_sync(|ctx| { + mesh_pipeline_build(&ctx, false, true, true); + }); + #[gpu_test] + pub static MESH_PIPELINE_BASIC_TASK_MESH_FRAG: GpuTestConfiguration = + default_gpu_test_config(DrawType::Standard, $feature).run_sync(|ctx| { + mesh_pipeline_build(&ctx, true, true, true); + }); -// Mesh draw -#[gpu_test] -static MESH_DRAW_INDIRECT: GpuTestConfiguration = default_gpu_test_config(DrawType::Indirect) - .run_sync(|ctx| { - mesh_draw(&ctx, DrawType::Indirect); - }); -#[gpu_test] -static MESH_MULTI_DRAW_INDIRECT: GpuTestConfiguration = - default_gpu_test_config(DrawType::MultiIndirect).run_sync(|ctx| { - mesh_draw(&ctx, DrawType::MultiIndirect); - }); -#[gpu_test] -static MESH_MULTI_DRAW_INDIRECT_COUNT: GpuTestConfiguration = - default_gpu_test_config(DrawType::MultiIndirectCount).run_sync(|ctx| { - mesh_draw(&ctx, DrawType::MultiIndirectCount); - }); + // Mesh draw + #[gpu_test] + pub static MESH_DRAW_INDIRECT: GpuTestConfiguration = + default_gpu_test_config(DrawType::Indirect, $feature).run_sync(|ctx| { + mesh_draw(&ctx, DrawType::Indirect); + }); + #[gpu_test] + pub static MESH_MULTI_DRAW_INDIRECT: GpuTestConfiguration = + default_gpu_test_config(DrawType::MultiIndirect, $feature).run_sync(|ctx| { + mesh_draw(&ctx, DrawType::MultiIndirect); + }); + #[gpu_test] + pub static MESH_MULTI_DRAW_INDIRECT_COUNT: GpuTestConfiguration = + default_gpu_test_config(DrawType::MultiIndirectCount, $feature).run_sync(|ctx| { + mesh_draw(&ctx, DrawType::MultiIndirectCount); + }); + }; +} +mod vulkan { + use super::*; + make_tests!(wgpu::Features::SPIRV_SHADER_PASSTHROUGH); +} +mod dx12 { + use super::*; + make_tests!(wgpu::Features::HLSL_DXIL_SHADER_PASSTHROUGH); +} diff --git a/tests/tests/wgpu-gpu/mesh_shader/no-write.frag b/tests/tests/wgpu-gpu/mesh_shader/no-write.frag deleted file mode 100644 index d0512bb0fad..00000000000 --- a/tests/tests/wgpu-gpu/mesh_shader/no-write.frag +++ /dev/null @@ -1,7 +0,0 @@ -#version 450 -#extension GL_EXT_mesh_shader : require - -in VertexInput { layout(location = 0) vec4 color; } -vertexInput; - -void main() {} \ No newline at end of file diff --git a/typos.toml b/typos.toml index d93e1f8ecdc..25155801124 100644 --- a/typos.toml +++ b/typos.toml @@ -8,6 +8,9 @@ extend-exclude = [ 'docs/big-picture.xml', # This test has weird pattern-derived variable names. 'naga/tests/in/wgsl/abstract-types-builtins.wgsl', + # These use "-Fo" as an argument to DXC + 'examples/features/src/mesh_shader/mod.rs', + 'tests/tests/wgpu-gpu/mesh_shader/mod.rs', ] # Corrections take the form of a key/value pair. The key is the incorrect word diff --git a/wgpu-hal/src/dx12/adapter.rs b/wgpu-hal/src/dx12/adapter.rs index 1f57fa69010..6d3818722a5 100644 --- a/wgpu-hal/src/dx12/adapter.rs +++ b/wgpu-hal/src/dx12/adapter.rs @@ -528,6 +528,22 @@ impl super::Adapter { wgt::Features::SHADER_INT64_ATOMIC_ALL_OPS | wgt::Features::SHADER_INT64_ATOMIC_MIN_MAX, atomic_int64_on_typed_resource_supported, ); + let mesh_shader_supported = { + let mut features7 = Direct3D12::D3D12_FEATURE_DATA_D3D12_OPTIONS7::default(); + unsafe { + device.CheckFeatureSupport( + Direct3D12::D3D12_FEATURE_D3D12_OPTIONS7, + <*mut _>::cast(&mut features7), + size_of_val(&features7) as u32, + ) + } + .is_ok() + && features7.MeshShaderTier != Direct3D12::D3D12_MESH_SHADER_TIER_NOT_SUPPORTED + }; + features.set( + wgt::Features::EXPERIMENTAL_MESH_SHADER, + mesh_shader_supported, + ); // TODO: Determine if IPresentationManager is supported let presentation_timer = auxil::dxgi::time::PresentationTimer::new_dxgi(); @@ -647,10 +663,15 @@ impl super::Adapter { max_buffer_size: i32::MAX as u64, max_non_sampler_bindings: 1_000_000, - max_task_workgroup_total_count: 0, - max_task_workgroups_per_dimension: 0, + // Source: https://microsoft.github.io/DirectX-Specs/d3d/MeshShader.html#dispatchmesh-api + max_task_workgroup_total_count: 2u32.pow(22), + // Technically it says "64k" but I highly doubt they want 65536 for compute and exactly 64,000 for task workgroups + max_task_workgroups_per_dimension: + Direct3D12::D3D12_CS_DISPATCH_MAX_THREAD_GROUPS_PER_DIMENSION, + // Multiview not supported by WGPU yet max_mesh_multiview_count: 0, - max_mesh_output_layers: 0, + // This seems to be right, and I can't find anything to suggest it would be less than the 2048 provided here + max_mesh_output_layers: Direct3D12::D3D12_REQ_TEXTURE2D_ARRAY_AXIS_DIMENSION, max_blas_primitive_count: if supports_ray_tracing { 1 << 29 // 2^29 diff --git a/wgpu-hal/src/dx12/command.rs b/wgpu-hal/src/dx12/command.rs index edfea952ed2..2454f82ef88 100644 --- a/wgpu-hal/src/dx12/command.rs +++ b/wgpu-hal/src/dx12/command.rs @@ -1228,11 +1228,16 @@ impl crate::CommandEncoder for super::CommandEncoder { } unsafe fn draw_mesh_tasks( &mut self, - _group_count_x: u32, - _group_count_y: u32, - _group_count_z: u32, + group_count_x: u32, + group_count_y: u32, + group_count_z: u32, ) { - unreachable!() + self.prepare_dispatch([group_count_x, group_count_y, group_count_z]); + let cmd_list6: Direct3D12::ID3D12GraphicsCommandList6 = + self.list.as_ref().unwrap().cast().unwrap(); + unsafe { + cmd_list6.DispatchMesh(group_count_x, group_count_y, group_count_z); + } } unsafe fn draw_indirect( &mut self, @@ -1314,11 +1319,36 @@ impl crate::CommandEncoder for super::CommandEncoder { } unsafe fn draw_mesh_tasks_indirect( &mut self, - _buffer: &::Buffer, - _offset: wgt::BufferAddress, - _draw_count: u32, + buffer: &::Buffer, + offset: wgt::BufferAddress, + draw_count: u32, ) { - unreachable!() + if self + .pass + .layout + .special_constants + .as_ref() + .and_then(|sc| sc.indirect_cmd_signatures.as_ref()) + .is_some() + { + self.update_root_elements(); + } else { + self.prepare_dispatch([0; 3]); + } + + let cmd_list6: Direct3D12::ID3D12GraphicsCommandList6 = + self.list.as_ref().unwrap().cast().unwrap(); + let cmd_signature = &self + .pass + .layout + .special_constants + .as_ref() + .and_then(|sc| sc.indirect_cmd_signatures.as_ref()) + .unwrap_or_else(|| &self.shared.cmd_signatures) + .draw_mesh; + unsafe { + cmd_list6.ExecuteIndirect(cmd_signature, draw_count, &buffer.resource, offset, None, 0); + } } unsafe fn draw_indirect_count( &mut self, @@ -1362,13 +1392,25 @@ impl crate::CommandEncoder for super::CommandEncoder { } unsafe fn draw_mesh_tasks_indirect_count( &mut self, - _buffer: &::Buffer, - _offset: wgt::BufferAddress, - _count_buffer: &::Buffer, - _count_offset: wgt::BufferAddress, - _max_count: u32, + buffer: &::Buffer, + offset: wgt::BufferAddress, + count_buffer: &::Buffer, + count_offset: wgt::BufferAddress, + max_count: u32, ) { - unreachable!() + self.prepare_dispatch([0; 3]); + let cmd_list6: Direct3D12::ID3D12GraphicsCommandList6 = + self.list.as_ref().unwrap().cast().unwrap(); + unsafe { + cmd_list6.ExecuteIndirect( + &self.shared.cmd_signatures.draw_mesh, + max_count, + &buffer.resource, + offset, + &count_buffer.resource, + count_offset, + ); + } } // compute diff --git a/wgpu-hal/src/dx12/device.rs b/wgpu-hal/src/dx12/device.rs index d62fc5c6751..eef2f59a926 100644 --- a/wgpu-hal/src/dx12/device.rs +++ b/wgpu-hal/src/dx12/device.rs @@ -139,6 +139,16 @@ impl super::Device { }], 0, )?, + draw_mesh: Self::create_command_signature( + &raw, + None, + size_of::(), + &[Direct3D12::D3D12_INDIRECT_ARGUMENT_DESC { + Type: Direct3D12::D3D12_INDIRECT_ARGUMENT_TYPE_DISPATCH_MESH, + ..Default::default() + }], + 0, + )?, dispatch: Self::create_command_signature( &raw, None, @@ -1392,6 +1402,19 @@ impl crate::Device for super::Device { ], 0, )?, + draw_mesh: Self::create_command_signature( + &self.raw, + Some(&raw), + special_constant_buffer_args_len + size_of::(), + &[ + constant_indirect_argument_desc, + Direct3D12::D3D12_INDIRECT_ARGUMENT_DESC { + Type: Direct3D12::D3D12_INDIRECT_ARGUMENT_TYPE_DISPATCH_MESH, + ..Default::default() + }, + ], + 0, + )?, dispatch: Self::create_command_signature( &self.raw, Some(&raw), @@ -1824,60 +1847,10 @@ impl crate::Device for super::Device { super::PipelineCache, >, ) -> Result { + let mut shader_stages = wgt::ShaderStages::empty(); + let root_signature = + unsafe { borrow_optional_interface_temporarily(&desc.layout.shared.signature) }; let (topology_class, topology) = conv::map_topology(desc.primitive.topology); - let mut shader_stages = wgt::ShaderStages::VERTEX; - - let (vertex_stage_desc, vertex_buffers_desc) = match &desc.vertex_processor { - crate::VertexProcessor::Standard { - vertex_buffers, - vertex_stage, - } => (vertex_stage, *vertex_buffers), - crate::VertexProcessor::Mesh { .. } => unreachable!(), - }; - - let blob_vs = self.load_shader( - vertex_stage_desc, - desc.layout, - naga::ShaderStage::Vertex, - desc.fragment_stage.as_ref(), - )?; - let blob_fs = match desc.fragment_stage { - Some(ref stage) => { - shader_stages |= wgt::ShaderStages::FRAGMENT; - Some(self.load_shader(stage, desc.layout, naga::ShaderStage::Fragment, None)?) - } - None => None, - }; - - let mut vertex_strides = [None; crate::MAX_VERTEX_BUFFERS]; - let mut input_element_descs = Vec::new(); - for (i, (stride, vbuf)) in vertex_strides - .iter_mut() - .zip(vertex_buffers_desc) - .enumerate() - { - *stride = NonZeroU32::new(vbuf.array_stride as u32); - let (slot_class, step_rate) = match vbuf.step_mode { - wgt::VertexStepMode::Vertex => { - (Direct3D12::D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0) - } - wgt::VertexStepMode::Instance => { - (Direct3D12::D3D12_INPUT_CLASSIFICATION_PER_INSTANCE_DATA, 1) - } - }; - for attribute in vbuf.attributes { - input_element_descs.push(Direct3D12::D3D12_INPUT_ELEMENT_DESC { - SemanticName: windows::core::PCSTR(NAGA_LOCATION_SEMANTIC.as_ptr()), - SemanticIndex: attribute.shader_location, - Format: auxil::dxgi::conv::map_vertex_format(attribute.format), - InputSlot: i as u32, - AlignedByteOffset: attribute.offset as u32, - InputSlotClass: slot_class, - InstanceDataStepRate: step_rate, - }); - } - } - let mut rtv_formats = [Dxgi::Common::DXGI_FORMAT_UNKNOWN; Direct3D12::D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT as usize]; for (rtv_format, ct) in rtv_formats.iter_mut().zip(desc.color_targets) { @@ -1892,7 +1865,7 @@ impl crate::Device for super::Device { .map(|ds| ds.bias) .unwrap_or_default(); - let raw_rasterizer = Direct3D12::D3D12_RASTERIZER_DESC { + let rasterizer_state = Direct3D12::D3D12_RASTERIZER_DESC { FillMode: conv::map_polygon_mode(desc.primitive.polygon_mode), CullMode: match desc.primitive.cull_mode { None => Direct3D12::D3D12_CULL_MODE_NONE, @@ -1916,80 +1889,193 @@ impl crate::Device for super::Device { Direct3D12::D3D12_CONSERVATIVE_RASTERIZATION_MODE_OFF }, }; + let blob_fs = match desc.fragment_stage { + Some(ref stage) => { + shader_stages |= wgt::ShaderStages::FRAGMENT; + Some(self.load_shader(stage, desc.layout, naga::ShaderStage::Fragment, None)?) + } + None => None, + }; + let pixel_shader = match &blob_fs { + Some(shader) => shader.create_native_shader(), + None => Direct3D12::D3D12_SHADER_BYTECODE::default(), + }; + let mut vertex_strides = [None; crate::MAX_VERTEX_BUFFERS]; + let stream_output = Direct3D12::D3D12_STREAM_OUTPUT_DESC { + pSODeclaration: ptr::null(), + NumEntries: 0, + pBufferStrides: ptr::null(), + NumStrides: 0, + RasterizedStream: 0, + }; + let blend_state = Direct3D12::D3D12_BLEND_DESC { + AlphaToCoverageEnable: Foundation::BOOL::from( + desc.multisample.alpha_to_coverage_enabled, + ), + IndependentBlendEnable: true.into(), + RenderTarget: conv::map_render_targets(desc.color_targets), + }; + let depth_stencil_state = match desc.depth_stencil { + Some(ref ds) => conv::map_depth_stencil(ds), + None => Default::default(), + }; + let dsv_format = desc + .depth_stencil + .as_ref() + .map_or(Dxgi::Common::DXGI_FORMAT_UNKNOWN, |ds| { + auxil::dxgi::conv::map_texture_format(ds.format) + }); + let sample_desc = Dxgi::Common::DXGI_SAMPLE_DESC { + Count: desc.multisample.count, + Quality: 0, + }; + let cached_pso = Direct3D12::D3D12_CACHED_PIPELINE_STATE { + pCachedBlob: ptr::null(), + CachedBlobSizeInBytes: 0, + }; + let flags = Direct3D12::D3D12_PIPELINE_STATE_FLAG_NONE; - let raw_desc = Direct3D12::D3D12_GRAPHICS_PIPELINE_STATE_DESC { - pRootSignature: unsafe { - borrow_optional_interface_temporarily(&desc.layout.shared.signature) - }, - VS: blob_vs.create_native_shader(), - PS: match &blob_fs { - Some(shader) => shader.create_native_shader(), - None => Direct3D12::D3D12_SHADER_BYTECODE::default(), - }, - GS: Direct3D12::D3D12_SHADER_BYTECODE::default(), - DS: Direct3D12::D3D12_SHADER_BYTECODE::default(), - HS: Direct3D12::D3D12_SHADER_BYTECODE::default(), - StreamOutput: Direct3D12::D3D12_STREAM_OUTPUT_DESC { - pSODeclaration: ptr::null(), - NumEntries: 0, - pBufferStrides: ptr::null(), - NumStrides: 0, - RasterizedStream: 0, - }, - BlendState: Direct3D12::D3D12_BLEND_DESC { - AlphaToCoverageEnable: Foundation::BOOL::from( - desc.multisample.alpha_to_coverage_enabled, - ), - IndependentBlendEnable: true.into(), - RenderTarget: conv::map_render_targets(desc.color_targets), - }, - SampleMask: desc.multisample.mask as u32, - RasterizerState: raw_rasterizer, - DepthStencilState: match desc.depth_stencil { - Some(ref ds) => conv::map_depth_stencil(ds), - None => Default::default(), - }, - InputLayout: Direct3D12::D3D12_INPUT_LAYOUT_DESC { - pInputElementDescs: if input_element_descs.is_empty() { - ptr::null() - } else { - input_element_descs.as_ptr() - }, - NumElements: input_element_descs.len() as u32, - }, - IBStripCutValue: match desc.primitive.strip_index_format { - Some(wgt::IndexFormat::Uint16) => { - Direct3D12::D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFF + let raw: Direct3D12::ID3D12PipelineState = match &desc.vertex_processor { + &crate::VertexProcessor::Standard { + vertex_buffers, + ref vertex_stage, + } => { + shader_stages |= wgt::ShaderStages::VERTEX; + let blob_vs = self.load_shader( + vertex_stage, + desc.layout, + naga::ShaderStage::Vertex, + desc.fragment_stage.as_ref(), + )?; + + let mut input_element_descs = Vec::new(); + for (i, (stride, vbuf)) in vertex_strides.iter_mut().zip(vertex_buffers).enumerate() + { + *stride = NonZeroU32::new(vbuf.array_stride as u32); + let (slot_class, step_rate) = match vbuf.step_mode { + wgt::VertexStepMode::Vertex => { + (Direct3D12::D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0) + } + wgt::VertexStepMode::Instance => { + (Direct3D12::D3D12_INPUT_CLASSIFICATION_PER_INSTANCE_DATA, 1) + } + }; + for attribute in vbuf.attributes { + input_element_descs.push(Direct3D12::D3D12_INPUT_ELEMENT_DESC { + SemanticName: windows::core::PCSTR(NAGA_LOCATION_SEMANTIC.as_ptr()), + SemanticIndex: attribute.shader_location, + Format: auxil::dxgi::conv::map_vertex_format(attribute.format), + InputSlot: i as u32, + AlignedByteOffset: attribute.offset as u32, + InputSlotClass: slot_class, + InstanceDataStepRate: step_rate, + }); + } + } + let raw_desc = Direct3D12::D3D12_GRAPHICS_PIPELINE_STATE_DESC { + pRootSignature: root_signature, + VS: blob_vs.create_native_shader(), + PS: pixel_shader, + GS: Direct3D12::D3D12_SHADER_BYTECODE::default(), + DS: Direct3D12::D3D12_SHADER_BYTECODE::default(), + HS: Direct3D12::D3D12_SHADER_BYTECODE::default(), + StreamOutput: stream_output, + BlendState: blend_state, + SampleMask: desc.multisample.mask as u32, + RasterizerState: rasterizer_state, + DepthStencilState: depth_stencil_state, + InputLayout: Direct3D12::D3D12_INPUT_LAYOUT_DESC { + pInputElementDescs: if input_element_descs.is_empty() { + ptr::null() + } else { + input_element_descs.as_ptr() + }, + NumElements: input_element_descs.len() as u32, + }, + IBStripCutValue: match desc.primitive.strip_index_format { + Some(wgt::IndexFormat::Uint16) => { + Direct3D12::D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFF + } + Some(wgt::IndexFormat::Uint32) => { + Direct3D12::D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFFFFFF + } + None => Direct3D12::D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_DISABLED, + }, + PrimitiveTopologyType: topology_class, + NumRenderTargets: desc.color_targets.len() as u32, + RTVFormats: rtv_formats, + DSVFormat: dsv_format, + SampleDesc: sample_desc, + NodeMask: 0, + CachedPSO: cached_pso, + Flags: flags, + }; + unsafe { + profiling::scope!("ID3D12Device::CreateGraphicsPipelineState"); + self.raw.CreateGraphicsPipelineState(&raw_desc) } - Some(wgt::IndexFormat::Uint32) => { - Direct3D12::D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFFFFFF + } + crate::VertexProcessor::Mesh { + task_stage, + mesh_stage, + } => { + let blob_ts = if let Some(ts) = task_stage { + shader_stages |= wgt::ShaderStages::TASK; + Some(self.load_shader( + ts, + desc.layout, + naga::ShaderStage::Task, + desc.fragment_stage.as_ref(), + )?) + } else { + None + }; + let task_shader = if let Some(ts) = &blob_ts { + ts.create_native_shader() + } else { + Default::default() + }; + shader_stages |= wgt::ShaderStages::MESH; + let blob_ms = self.load_shader( + mesh_stage, + desc.layout, + naga::ShaderStage::Mesh, + desc.fragment_stage.as_ref(), + )?; + let desc = super::MeshShaderPipelineStateStream { + root_signature: root_signature + .as_ref() + .map(|a| a.as_raw().cast()) + .unwrap_or(ptr::null_mut()), + task_shader, + pixel_shader, + mesh_shader: blob_ms.create_native_shader(), + blend_state, + sample_mask: desc.multisample.mask as u32, + rasterizer_state, + depth_stencil_state, + primitive_topology_type: topology_class, + rtv_formats: Direct3D12::D3D12_RT_FORMAT_ARRAY { + RTFormats: rtv_formats, + NumRenderTargets: desc.color_targets.len() as u32, + }, + dsv_format, + sample_desc, + node_mask: 0, + cached_pso, + flags, + }; + let mut raw_desc = unsafe { desc.to_bytes() }; + let stream_desc = Direct3D12::D3D12_PIPELINE_STATE_STREAM_DESC { + SizeInBytes: raw_desc.len(), + pPipelineStateSubobjectStream: raw_desc.as_mut_ptr().cast(), + }; + let device: Direct3D12::ID3D12Device2 = self.raw.cast().unwrap(); + unsafe { + profiling::scope!("ID3D12Device2::CreatePipelineState"); + device.CreatePipelineState(&stream_desc) } - None => Direct3D12::D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_DISABLED, - }, - PrimitiveTopologyType: topology_class, - NumRenderTargets: desc.color_targets.len() as u32, - RTVFormats: rtv_formats, - DSVFormat: desc - .depth_stencil - .as_ref() - .map_or(Dxgi::Common::DXGI_FORMAT_UNKNOWN, |ds| { - auxil::dxgi::conv::map_texture_format(ds.format) - }), - SampleDesc: Dxgi::Common::DXGI_SAMPLE_DESC { - Count: desc.multisample.count, - Quality: 0, - }, - NodeMask: 0, - CachedPSO: Direct3D12::D3D12_CACHED_PIPELINE_STATE { - pCachedBlob: ptr::null(), - CachedBlobSizeInBytes: 0, - }, - Flags: Direct3D12::D3D12_PIPELINE_STATE_FLAG_NONE, - }; - - let raw: Direct3D12::ID3D12PipelineState = { - profiling::scope!("ID3D12Device::CreateGraphicsPipelineState"); - unsafe { self.raw.CreateGraphicsPipelineState(&raw_desc) } + } } .map_err(|err| crate::PipelineError::Linkage(shader_stages, err.to_string()))?; diff --git a/wgpu-hal/src/dx12/mod.rs b/wgpu-hal/src/dx12/mod.rs index c15f3b81e4e..a1b37799286 100644 --- a/wgpu-hal/src/dx12/mod.rs +++ b/wgpu-hal/src/dx12/mod.rs @@ -650,6 +650,7 @@ struct Idler { struct CommandSignatures { draw: Direct3D12::ID3D12CommandSignature, draw_indexed: Direct3D12::ID3D12CommandSignature, + draw_mesh: Direct3D12::ID3D12CommandSignature, dispatch: Direct3D12::ID3D12CommandSignature, } @@ -1558,3 +1559,114 @@ pub enum ShaderModuleSource { DxilPassthrough(DxilPassthroughShader), HlslPassthrough(HlslPassthroughShader), } + +#[repr(C)] +#[derive(Debug)] +struct MeshShaderPipelineStateStream { + root_signature: *mut Direct3D12::ID3D12RootSignature, + task_shader: Direct3D12::D3D12_SHADER_BYTECODE, + mesh_shader: Direct3D12::D3D12_SHADER_BYTECODE, + pixel_shader: Direct3D12::D3D12_SHADER_BYTECODE, + blend_state: Direct3D12::D3D12_BLEND_DESC, + sample_mask: u32, + rasterizer_state: Direct3D12::D3D12_RASTERIZER_DESC, + depth_stencil_state: Direct3D12::D3D12_DEPTH_STENCIL_DESC, + primitive_topology_type: Direct3D12::D3D12_PRIMITIVE_TOPOLOGY_TYPE, + rtv_formats: Direct3D12::D3D12_RT_FORMAT_ARRAY, + dsv_format: Dxgi::Common::DXGI_FORMAT, + sample_desc: Dxgi::Common::DXGI_SAMPLE_DESC, + node_mask: u32, + cached_pso: Direct3D12::D3D12_CACHED_PIPELINE_STATE, + flags: Direct3D12::D3D12_PIPELINE_STATE_FLAGS, +} +impl MeshShaderPipelineStateStream { + /// # Safety + /// Self must outlive the bytes (I think) + pub unsafe fn to_bytes(&self) -> Vec { + use Direct3D12::*; + let mut bytes = Vec::new(); + + macro_rules! push_subobject { + ($subobject_type:expr, $data:expr) => {{ + // Ensure 8-byte alignment for the subobject start + let alignment = 8; + let padding = (alignment - (bytes.len() % alignment)) % alignment; + bytes.extend(core::iter::repeat(0).take(padding)); + + // Append the type tag (u32) + let tag: u32 = $subobject_type.0 as u32; + bytes.extend_from_slice(&tag.to_ne_bytes()); + + // Align the data + let obj_align = align_of_val(&$data); + let data_padding = (obj_align - (bytes.len() % obj_align)) % obj_align; + bytes.extend(core::iter::repeat(0).take(data_padding)); + + // Append the data itself + #[allow(clippy::ptr_as_ptr, trivial_casts)] + let data_ptr = &$data as *const _ as *const u8; + let data_size = size_of_val(&$data); + let slice = unsafe { core::slice::from_raw_parts(data_ptr, data_size) }; + bytes.extend_from_slice(slice); + }}; + } + push_subobject!( + D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_ROOT_SIGNATURE, + self.root_signature + ); + if !self.task_shader.pShaderBytecode.is_null() { + push_subobject!(D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_AS, self.task_shader); + } + push_subobject!(D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_MS, self.mesh_shader); + if !self.pixel_shader.pShaderBytecode.is_null() { + push_subobject!(D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_PS, self.pixel_shader); + } + push_subobject!(D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_BLEND, self.blend_state); + push_subobject!( + D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_SAMPLE_MASK, + self.sample_mask + ); + push_subobject!( + D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_RASTERIZER, + self.rasterizer_state + ); + push_subobject!( + D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_DEPTH_STENCIL, + self.depth_stencil_state + ); + push_subobject!( + D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_PRIMITIVE_TOPOLOGY, + self.primitive_topology_type + ); + if self.rtv_formats.NumRenderTargets != 0 { + push_subobject!( + D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_RENDER_TARGET_FORMATS, + self.rtv_formats + ); + } + if self.dsv_format != Dxgi::Common::DXGI_FORMAT_UNKNOWN { + push_subobject!( + D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_DEPTH_STENCIL_FORMAT, + self.dsv_format + ); + } + push_subobject!( + D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_SAMPLE_DESC, + self.sample_desc + ); + if self.node_mask != 0 { + push_subobject!( + D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_NODE_MASK, + self.node_mask + ); + } + if !self.cached_pso.pCachedBlob.is_null() { + push_subobject!( + D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_CACHED_PSO, + self.cached_pso + ); + } + push_subobject!(D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_FLAGS, self.flags); + bytes + } +}