From 5ca1eda3a83e5a2098ffc7a506a15c268bdbed20 Mon Sep 17 00:00:00 2001 From: YXHXianYu <2943003@qq.com> Date: Sun, 22 Mar 2026 00:01:28 +0800 Subject: [PATCH 01/17] =?UTF-8?q?feat(scene):=20=E5=9C=A8GPrimitive?= =?UTF-8?q?=E4=B8=AD=E6=B7=BB=E5=8A=A0=E4=BA=86aabb?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- source/runtime/render/scene/CpuScene.cpp | 4 ++++ .../render/shaderheaders/shared/scene/SharedSceneStruct.h | 6 ++++++ 2 files changed, 10 insertions(+) diff --git a/source/runtime/render/scene/CpuScene.cpp b/source/runtime/render/scene/CpuScene.cpp index 018cdc78d..16bf80eb2 100644 --- a/source/runtime/render/scene/CpuScene.cpp +++ b/source/runtime/render/scene/CpuScene.cpp @@ -235,6 +235,10 @@ void CpuScene::InitializeMeshes() { } else { g_primitive.index_start_idx = 0; // 默认值 } + + // AABB for frustum culling + g_primitive.aabb_min = c_primitive.aabb.min; + g_primitive.aabb_max = c_primitive.aabb.max; uint primitive_id = static_cast(m_primitive_buf.size()); m_primitive_buf.emplace_back(g_primitive); diff --git a/source/runtime/render/shaderheaders/shared/scene/SharedSceneStruct.h b/source/runtime/render/shaderheaders/shared/scene/SharedSceneStruct.h index 5abb9847f..460e19fd9 100644 --- a/source/runtime/render/shaderheaders/shared/scene/SharedSceneStruct.h +++ b/source/runtime/render/shaderheaders/shared/scene/SharedSceneStruct.h @@ -65,6 +65,12 @@ struct GPrimitive { uint packed_tangent_start_idx; // in element (uint) uint texcoord0_start_idx; // in element (float2) uint index_start_idx; // in uint(index buffer的元素是以uint为单位,而非uint3) + + // AABB for frustum culling (local space) + float3 aabb_min; + float _padding0; // padding to align to 16 bytes + float3 aabb_max; + uint _padding1; // padding to align to 16 bytes }; /** From 350838f7d6e02890cd46804bb61ea4c5e2c95c72 Mon Sep 17 00:00:00 2001 From: YXHXianYu <2943003@qq.com> Date: Sat, 28 Mar 2026 19:07:17 +0800 Subject: [PATCH 02/17] =?UTF-8?q?fix(scene):=20=E4=BF=AE=E5=A4=8D=E4=BA=86?= =?UTF-8?q?GPrimitive=E7=9A=84AABB=E6=B2=A1=E6=9C=89=E5=AF=B9=E9=BD=90?= =?UTF-8?q?=E7=9A=84=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../render/renderer/raster/RasterRenderer.cpp | 2 +- .../shaderheaders/shared/scene/SharedSceneStruct.h | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/source/runtime/render/renderer/raster/RasterRenderer.cpp b/source/runtime/render/renderer/raster/RasterRenderer.cpp index 4d6e8ae7c..f4dc7dd67 100644 --- a/source/runtime/render/renderer/raster/RasterRenderer.cpp +++ b/source/runtime/render/renderer/raster/RasterRenderer.cpp @@ -232,7 +232,7 @@ bool RasterRenderer::RunSingle(const SharedPtr editor_config, cons gfx_queue.Sync(); } - const auto& raster_config = editor_config->raster_config; + auto& raster_config = editor_config->raster_config; auto& camera = scene.GetMainCamera().camera; { diff --git a/source/runtime/render/shaderheaders/shared/scene/SharedSceneStruct.h b/source/runtime/render/shaderheaders/shared/scene/SharedSceneStruct.h index 460e19fd9..f881a7e2a 100644 --- a/source/runtime/render/shaderheaders/shared/scene/SharedSceneStruct.h +++ b/source/runtime/render/shaderheaders/shared/scene/SharedSceneStruct.h @@ -57,6 +57,12 @@ enum { * GPrimitive 和 CPrimitive 是一一对应的 */ struct GPrimitive { + // AABB for frustum culling (local space) + float3 aabb_min; + float padding0; // padding to align to 16 bytes + float3 aabb_max; + uint padding1; // padding to align to 16 bytes + uint material_idx; uint attribute_mask; @@ -65,12 +71,6 @@ struct GPrimitive { uint packed_tangent_start_idx; // in element (uint) uint texcoord0_start_idx; // in element (float2) uint index_start_idx; // in uint(index buffer的元素是以uint为单位,而非uint3) - - // AABB for frustum culling (local space) - float3 aabb_min; - float _padding0; // padding to align to 16 bytes - float3 aabb_max; - uint _padding1; // padding to align to 16 bytes }; /** From f0343e0e2d1b62330595cfffb44a3593a990e1fd Mon Sep 17 00:00:00 2001 From: YXHXianYu <2943003@qq.com> Date: Sat, 28 Mar 2026 20:10:45 +0800 Subject: [PATCH 03/17] =?UTF-8?q?feat(raster):=20=E6=B7=BB=E5=8A=A0?= =?UTF-8?q?=E4=BA=86Instance=20Culling?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../raster/culling/FrustumCull.comp.hlsl | 128 ++++++++++++++ source/editor/raster_ui/RasterUI.cpp | 27 +++ .../render/renderer/raster/CullingPass.h | 165 ++++++++++++++++++ .../render/renderer/raster/GeometryPass.h | 40 ++++- .../render/renderer/raster/RasterConfig.h | 14 ++ source/runtime/render/scene/CpuScene.cpp | 10 ++ source/runtime/render/scene/GpuScene.cpp | 12 ++ source/runtime/render/scene/GpuScene.h | 6 + source/runtime/render/scene/Scene.cpp | 5 + source/runtime/render/scene/Scene.h | 5 + source/runtime/render/scene/camera/Camera.cpp | 14 +- 11 files changed, 413 insertions(+), 13 deletions(-) create mode 100644 shaders/pipelines/raster/culling/FrustumCull.comp.hlsl create mode 100644 source/runtime/render/renderer/raster/CullingPass.h diff --git a/shaders/pipelines/raster/culling/FrustumCull.comp.hlsl b/shaders/pipelines/raster/culling/FrustumCull.comp.hlsl new file mode 100644 index 000000000..7db257c07 --- /dev/null +++ b/shaders/pipelines/raster/culling/FrustumCull.comp.hlsl @@ -0,0 +1,128 @@ +/** + * Frustum Culling Compute Shader + * + * 每个线程处理一个 Draw Command (对应一个 Primitive 的所有 Instances) + * 将不可见的 draw command 的 instance_cnt 设为 0 + */ + +#include "shared/scene/SharedSceneStruct.h" + +struct DrawIndexedCmdData { + uint index_cnt; + uint instance_cnt; + uint first_index; + uint vertex_offset; + uint first_instance; +}; + +// Culling 统计结构 +struct CullStatistics { + uint total_instances_before; // 剔除前的总 instance 数 + uint total_instances_after; // 剔除后的总 instance 数 + uint visible_draws; // 可见的 draw call 数量 + uint total_draws; // 总 draw call 数量 +}; + +struct CullParams { + float4 frustum_planes[6]; // World space frustum planes (nx, ny, nz, d) + uint draw_count; + uint _pad[3]; +}; + +[[vk::push_constant]] ConstantBuffer cull_params; + +[[vk::binding(0, 0)]] RWStructuredBuffer draw_commands; +[[vk::binding(1, 0)]] StructuredBuffer primitives; +[[vk::binding(2, 0)]] StructuredBuffer instances; +[[vk::binding(3, 0)]] RWStructuredBuffer statistics; // 统计结果 + +/** + * AABB-Frustum 测试 + * 使用分离轴定理(SAT)的简化版本 + * 如果 AABB 完全在某个平面的负半空间,则判定为不可见 + */ +bool AABBInsideFrustum(float3 aabb_min, float3 aabb_max, float4 planes[6]) { + float3 center = (aabb_min + aabb_max) * 0.5; + float3 extent = (aabb_max - aabb_min) * 0.5; + + [unroll] + for (int i = 0; i < 6; i++) { + float3 normal = planes[i].xyz; + float distance = planes[i].w; + + // 计算 AABB 在平面法线方向上的投影半径 + float radius = dot(extent, abs(normal)); + float center_dist = dot(center, normal) + distance; + + // 如果 center 到平面的距离小于 -radius,则 AABB 完全在平面外侧 + if (center_dist < -radius) { + return false; + } + } + return true; +} + +/** + * 将 local AABB 变换到 world space + */ +void TransformAABB(float4x4 transform, float3 local_min, float3 local_max, + out float3 out_min, out float3 out_max) { + float3 corners[8]; + corners[0] = local_min; + corners[1] = float3(local_max.x, local_min.y, local_min.z); + corners[2] = float3(local_min.x, local_max.y, local_min.z); + corners[3] = float3(local_max.x, local_max.y, local_min.z); + corners[4] = float3(local_min.x, local_min.y, local_max.z); + corners[5] = float3(local_max.x, local_min.y, local_max.z); + corners[6] = float3(local_min.x, local_max.y, local_max.z); + corners[7] = local_max; + + out_min = float3(1e30, 1e30, 1e30); + out_max = float3(-1e30, -1e30, -1e30); + + [unroll] + for (int i = 0; i < 8; i++) { + float3 world_pos = mul(transform, float4(corners[i], 1.0)).xyz; + out_min = min(out_min, world_pos); + out_max = max(out_max, world_pos); + } +} + +[numthreads(64, 1, 1)] +void main(uint tid : SV_DispatchThreadID) { + if (tid >= cull_params.draw_count) return; + + DrawIndexedCmdData cmd = draw_commands[tid]; + uint first_inst = cmd.first_instance; + uint inst_count = cmd.instance_cnt; + + // 获取 primitive 的 local AABB + Moer::GPrimitive prim = primitives[tid]; + + uint visible_count = 0; + + for (uint i = 0; i < inst_count; i++) { + Moer::GInstance inst = instances[first_inst + i]; + + // 将 local AABB 变换到 world space + float3 world_min, world_max; + TransformAABB(inst.world_transform, prim.aabb_min, prim.aabb_max, world_min, world_max); + + // 视锥测试 + if (AABBInsideFrustum(world_min, world_max, cull_params.frustum_planes)) { + visible_count++; + } + } + + // 写入剔除后的 instance 数量 + // 如果 visible_count == 0,GPU 会跳过这个 draw + draw_commands[tid].instance_cnt = visible_count; + + // 统计 + InterlockedAdd(statistics[0].total_draws, 1); + InterlockedAdd(statistics[0].total_instances_before, inst_count); + InterlockedAdd(statistics[0].total_instances_after, visible_count); + if (visible_count > 0) { + InterlockedAdd(statistics[0].visible_draws, 1); + } +} diff --git a/source/editor/raster_ui/RasterUI.cpp b/source/editor/raster_ui/RasterUI.cpp index e68cf9182..a85ed81af 100644 --- a/source/editor/raster_ui/RasterUI.cpp +++ b/source/editor/raster_ui/RasterUI.cpp @@ -58,6 +58,33 @@ void RasterUI::ShowConfig() { // MARK: Geometry & Culling if (ImGui::TreeNode("Geometry & Culling")) { + ImGui::Checkbox("Enable GPU Frustum Culling", &m_config.enable_frustum_culling); + + // Culling Statistics + if (m_config.enable_frustum_culling) { + ImGui::Separator(); + ImGui::Text("Culling Statistics:"); + ImGui::Indent(); + + if (m_config.culling_stats.total_instances_before == 0) { + ImGui::TextDisabled(" Waiting for data..."); + } else { + const auto& stats = m_config.culling_stats; + uint32_t culled = stats.total_instances_before - stats.total_instances_after; + float culled_pct = 100.0f * float(culled) / float(stats.total_instances_before); + ImGui::Text( + "Instances: %u / %u visible (%u culled, %.1f%%)", + stats.total_instances_after, + stats.total_instances_before, + culled, + culled_pct + ); + ImGui::ProgressBar(culled_pct / 100.0f, ImVec2(150, 0)); + } + ImGui::Unindent(); + ImGui::Separator(); + } + ImGui::Checkbox("Enable Alpha Test", &m_config.geometry_enable_alpha_test); ImGui::SliderFloat("Alpha Cutoff", &m_config.geometry_alpha_test_blend_pixel_cutoff, 0.0f, 1.0f); diff --git a/source/runtime/render/renderer/raster/CullingPass.h b/source/runtime/render/renderer/raster/CullingPass.h new file mode 100644 index 000000000..7ebbf3e84 --- /dev/null +++ b/source/runtime/render/renderer/raster/CullingPass.h @@ -0,0 +1,165 @@ +#pragma once + +/** + * GPU Frustum Culling Pass + * + * 使用 Compute Shader 对视锥外的物体进行剔除 + * 将不可见的 draw command 的 instance_cnt 设为 0 + */ + +#include "math/Function.h" +#include "misc/STL.h" +#include "rhi/RHICommon.h" +#include "rhi/RHIResource.h" +#include "scene/camera/Camera.h" +#include "shader/ShaderCommon.h" +#include "shader/ShaderPipeline.h" +#include "shader/ShaderResourceManager.h" +#include "shaderheaders/shared/raster/geometry_pass/ShaderParameters.h" + +#include "RasterConfig.h" +#include "RasterResource.h" + +namespace Moer::Render::Raster { + +/** + * Culling 统计结构(与 Shader 对应) + */ +struct CullStatistics { + uint total_instances_before; // 剔除前的总 instance 数 + uint total_instances_after; // 剔除后的总 instance 数 + uint visible_draws; // 可见的 draw call 数量 + uint total_draws; // 总 draw call 数量 +}; + +/** + * Frustum Culling 参数 + */ +struct FrustumCullParams { + float4 frustum_planes[6]; // World space frustum planes + uint draw_count; + uint _pad[3]; +}; + +/** + * Frustum Culling Pipeline + */ +class FrustumCullPipeline : public ComputePipeline { +public: + DEFINE_COMPUTE_PIPELINE_CLASS(FrustumCullPipeline); + + // 注意:参数名必须与 shader 中的变量名一致 + // UAV: draw_commands (可写的间接绘制命令缓冲区) + DEFINE_SHADER_BUFFER(draw_commands); + // SRV: primitives (只读的图元数据) + DEFINE_SHADER_BUFFER(primitives); + // SRV: instances (只读的实例数据) + DEFINE_SHADER_BUFFER(instances); + // UAV: statistics (统计结果) + DEFINE_SHADER_BUFFER(statistics); + // Push constant: cull_params + DEFINE_SHADER_CONSTANT_STRUCT(FrustumCullParams, cull_params); + + DEFINE_SHADER_ARGS(draw_commands, primitives, instances, statistics, cull_params); +}; + +/** + * Frustum Culling Pass + */ +class CullingPass { +public: + // 裁剪统计信息 + struct CullStatistics { + uint32_t total_instances_before = 0; // 裁剪前的总实例数 + uint32_t total_instances_after = 0; // 裁剪后的总实例数 + uint32_t visible_draws = 0; // 可见的 Draw 数量 + uint32_t total_draws = 0; // 总 Draw 数量 + }; + +public: + CullingPass(RasterContext& context) { + auto& device = RenderDevice::Get(); + + // 创建 Compute PSO + m_pso = ShaderManager::Get().Compute( + "pipelines/raster/culling/FrustumCull.comp.hlsl" + ); + + // 创建统计 buffer (GPU 端) + m_statistics_buf = + device.CreateBuffer("CullingStatistics", 1, EBufferUsageFlags::UNORDERED_ACCESS); + } + + /** + * 执行视锥剔除 + * + * @param context RasterContext + * @param camera 相机,用于获取视锥体 + * @param gpu_scene_res GPU Scene 资源 + * @param out_stats 输出统计结果(可选,为 nullptr 则不读取) + */ + void Process( + RasterContext& context, + const Camera& camera, + const GpuScene::Res& gpu_scene_res, + CullStatistics* out_stats = nullptr + ) { + uint draw_count = gpu_scene_res.draw_cmd_buf.buf->GetNumElement(); + + // 0. 输出上一帧的统计信息(延迟一帧 readback) + if (out_stats) { + *out_stats = m_readback_stats; + } + + // 1. 清零统计 buffer + context.cmd_list.ClearResource(m_statistics_buf->GetView(), 0u); + + // 2. 获取视锥体平面(world space) + FrustumCullParams params; + params.draw_count = draw_count; + camera.GetPlanes(params.frustum_planes); + + // 3. Barrier: 转换 draw_cmd_buf 从 INDIRECT_ARGUMENT -> UNORDERED_ACCESS + context.cmd_list.Barriers( + EQueueType::Graphics, + EQueueType::Graphics, + EPassType::Compute, + WriteBuffer{gpu_scene_res.draw_cmd_buf.buf->GetView(), EBufferState::UNORDERED_ACCESS} + ); + + // 4. Dispatch Compute Shader + uint dispatch_count = (params.draw_count + 63) / 64; + + context.cmd_list + .Compute( + m_pso, + gpu_scene_res.draw_cmd_buf.buf->GetView(), // UAV: draw_commands + gpu_scene_res.primitive_buf.buf->GetView(), // SRV: primitives + gpu_scene_res.instance_buf.buf->GetView(), // SRV: instances + m_statistics_buf->GetView(), // UAV: statistics + params // Push constant: cull_params + ) + .Dispatch(uint3(dispatch_count, 1, 1), "FrustumCulling"); + + // 5. Barrier: 转换回 INDIRECT 用于 DrawIndirect + context.cmd_list.Barriers( + EQueueType::Graphics, + EQueueType::Graphics, + EPassType::Graphics, + WriteBuffer{gpu_scene_res.draw_cmd_buf.buf->GetView(), EBufferState::INDIRECT} + ); + + // 6. Readback 统计数据(下一帧才可读) + context.cmd_list.CopyFrom( + m_statistics_buf->GetView(), + std::span(reinterpret_cast(&m_readback_stats), sizeof(CullStatistics)) + ); + } + +private: + FrustumCullPipeline m_pso; + BufferRef m_statistics_buf; // GPU 端统计 buffer + CullStatistics m_readback_stats{}; // CPU 端 readback 结果(延迟一帧) +}; + +} // namespace Moer::Render::Raster diff --git a/source/runtime/render/renderer/raster/GeometryPass.h b/source/runtime/render/renderer/raster/GeometryPass.h index 086480e72..2af537319 100644 --- a/source/runtime/render/renderer/raster/GeometryPass.h +++ b/source/runtime/render/renderer/raster/GeometryPass.h @@ -11,6 +11,7 @@ #include "shader/ShaderResourceManager.h" #include "shaderheaders/shared/raster/geometry_pass/ShaderParameters.h" +#include "CullingPass.h" #include "RasterConfig.h" #include "RasterResource.h" @@ -29,7 +30,7 @@ class GeometryPassPipeline : public RasterPipeline { class GeometryPass { public: - GeometryPass(RasterContext& context) { + GeometryPass(RasterContext& context) : m_culling_pass(context), m_culling_last_enabled(false) { // 1. PSO @@ -70,14 +71,37 @@ class GeometryPass { ); } - void Process(RasterContext& context, const RasterConfig& ui_config, const Camera& camera) { - - // 1. Params - + void Process(RasterContext& context, RasterConfig& ui_config, const Camera& camera) { + const auto& gpu_scene_res = context.scene.gpu_scene_res(); + + // 1. Frustum Culling (if enabled) + if (ui_config.enable_frustum_culling) { + // 每帧恢复原始 draw commands(因为 culling 会修改 instance_cnt) + context.scene.RestoreDrawCommands(context.cmd_list); + + CullingPass::CullStatistics stats; + m_culling_pass.Process(context, camera, gpu_scene_res, &stats); + + // 更新 UI 统计信息 + ui_config.culling_stats.total_instances_before = stats.total_instances_before; + ui_config.culling_stats.total_instances_after = stats.total_instances_after; + ui_config.culling_stats.visible_draws = stats.visible_draws; + ui_config.culling_stats.total_draws = stats.total_draws; + m_culling_last_enabled = true; + } else { + // 重置统计 + ui_config.culling_stats = RasterConfig::CullingStats{}; + // 恢复原始 draw commands(如果之前启用了 culling) + if (m_culling_last_enabled) { + context.scene.RestoreDrawCommands(context.cmd_list); + m_culling_last_enabled = false; + } + } + + // 2. Params GeometryPassBindlessParam param; param.world2clip = Transpose(camera.GetViewProjectionMatrix()); - const auto& gpu_scene_res = context.scene.gpu_scene_res(); param.instance_buf_hdl = gpu_scene_res.instance_buf.hdl; param.primitive_buf_hdl = gpu_scene_res.primitive_buf.hdl; param.position_buf_hdl = gpu_scene_res.position_buf.hdl; @@ -89,7 +113,7 @@ class GeometryPass { param.enable_alpha_test = ui_config.geometry_enable_alpha_test ? 1 : 0; param.alpha_test_blend_pixel_cutoff = ui_config.geometry_alpha_test_blend_pixel_cutoff; - // 2. Draw + // 3. Draw auto rect2d = context.textures.position.GetRect2D(); assert( rect2d == context.textures.vbuffer.GetRect2D() && rect2d == context.textures.normal.GetRect2D() && @@ -116,6 +140,8 @@ class GeometryPass { private: GeometryPassPipeline m_pso; + CullingPass m_culling_pass; + bool m_culling_last_enabled; }; } // namespace Moer::Render::Raster \ No newline at end of file diff --git a/source/runtime/render/renderer/raster/RasterConfig.h b/source/runtime/render/renderer/raster/RasterConfig.h index aec08d3a3..95c999201 100644 --- a/source/runtime/render/renderer/raster/RasterConfig.h +++ b/source/runtime/render/renderer/raster/RasterConfig.h @@ -113,6 +113,20 @@ struct RasterConfig { bool geometry_enable_alpha_test = true; float geometry_alpha_test_blend_pixel_cutoff = 0.5f; // 当AlphaMode为BLEND时,低于该值的像素会被丢弃 + bool enable_frustum_culling = true; // GPU视锥剔除 + + // MARK: Culling Statistics (只读,由GPU更新) + struct CullingStats { + uint total_instances_before = 0; + uint total_instances_after = 0; + uint visible_draws = 0; + uint total_draws = 0; + + float GetCullingRatio() const { + if (total_instances_before == 0) return 0.0f; + return 1.0f - (float)total_instances_after / (float)total_instances_before; + } + } culling_stats; // MARK: Shading EShadingMode shading_mode = EShadingMode::DEFAULT_PBR; diff --git a/source/runtime/render/scene/CpuScene.cpp b/source/runtime/render/scene/CpuScene.cpp index 16bf80eb2..f55bfd2b3 100644 --- a/source/runtime/render/scene/CpuScene.cpp +++ b/source/runtime/render/scene/CpuScene.cpp @@ -240,6 +240,16 @@ void CpuScene::InitializeMeshes() { g_primitive.aabb_min = c_primitive.aabb.min; g_primitive.aabb_max = c_primitive.aabb.max; + // 验证 AABB 有效性 + if (!c_primitive.aabb.IsValid()) { + LOG_WARNING( + "Primitive {} has invalid AABB: min=({},{},{}), max=({},{},{})", + m_primitive_buf.size(), + g_primitive.aabb_min.x, g_primitive.aabb_min.y, g_primitive.aabb_min.z, + g_primitive.aabb_max.x, g_primitive.aabb_max.y, g_primitive.aabb_max.z + ); + } + uint primitive_id = static_cast(m_primitive_buf.size()); m_primitive_buf.emplace_back(g_primitive); m_map_primitive_entity_to_id[entity] = primitive_id; // build index cache diff --git a/source/runtime/render/scene/GpuScene.cpp b/source/runtime/render/scene/GpuScene.cpp index 656d2b23a..d53cd6c24 100644 --- a/source/runtime/render/scene/GpuScene.cpp +++ b/source/runtime/render/scene/GpuScene.cpp @@ -521,4 +521,16 @@ void GpuScene::UpdateRaytracingScene(CommandList& cmd_list) { cmd_list.UpdateRaytracingScene(m_res.rt_scene); } +void GpuScene::RestoreDrawCommands(CommandList& cmd_list) { + // 从 CPU 数据重新上传 draw_cmd_buf,恢复原始 instance_cnt + cmd_list.CopyFrom( + std::span( + (byte*)m_cpu_scene.m_draw_cmd_buf.data(), + m_cpu_scene.m_draw_cmd_buf.size() * sizeof(Render::DrawIndexedCmdData) + ), + m_res.draw_cmd_buf.buf->GetView(), + "RestoreDrawCommands" + ); +} + } // namespace Moer::Render \ No newline at end of file diff --git a/source/runtime/render/scene/GpuScene.h b/source/runtime/render/scene/GpuScene.h index 42454ca47..97d411d67 100644 --- a/source/runtime/render/scene/GpuScene.h +++ b/source/runtime/render/scene/GpuScene.h @@ -99,6 +99,12 @@ class RENDER_API GpuScene { return m_res.rt_scene; } + /** + * 恢复 Draw Commands 到原始状态(从 CPU 数据重新上传) + * 用于 GPU Culling 关闭时恢复原始 instance_cnt + */ + void RestoreDrawCommands(CommandList& cmd_list); + private: ecs::LogicalScene& m_logical_scene; CpuScene& m_cpu_scene; diff --git a/source/runtime/render/scene/Scene.cpp b/source/runtime/render/scene/Scene.cpp index f73364320..e546fd12c 100644 --- a/source/runtime/render/scene/Scene.cpp +++ b/source/runtime/render/scene/Scene.cpp @@ -125,6 +125,11 @@ const CpuScene& Scene::GetCpuScene() const { return cpu_scene(); } +void Scene::RestoreDrawCommands(Render::CommandList& cmd_list) { + assert(m_gpu_scene && "Scene is not ready"); + m_gpu_scene->RestoreDrawCommands(cmd_list); +} + Render::BindlessArrayRef Scene::bindless_array() { if (!m_bindless_array) { m_bindless_array = Render::RenderDevice::Get().CreateBindlessArray(); diff --git a/source/runtime/render/scene/Scene.h b/source/runtime/render/scene/Scene.h index 71b42ed55..157de2340 100644 --- a/source/runtime/render/scene/Scene.h +++ b/source/runtime/render/scene/Scene.h @@ -128,6 +128,11 @@ class RENDER_API Scene { const Render::GpuScene::Res& gpu_scene_res() const; const Render::GpuScene::Res& GetGpuSceneRes() const; + /** + * 恢复 Draw Commands 到原始状态(用于关闭 GPU Culling 时) + */ + void RestoreDrawCommands(Render::CommandList& cmd_list); + const CpuScene& cpu_scene() const; const CpuScene& GetCpuScene() const; diff --git a/source/runtime/render/scene/camera/Camera.cpp b/source/runtime/render/scene/camera/Camera.cpp index 5b9affac4..36c0e02da 100644 --- a/source/runtime/render/scene/camera/Camera.cpp +++ b/source/runtime/render/scene/camera/Camera.cpp @@ -405,12 +405,12 @@ void Camera::UpdateViewProjectionMatrix() { } void Camera::UpdatePlanesAndFrustum() { - // FIXME: m_planes是怎么算的?没看懂,不知道是否有错 - auto vp = GetProjectionMatrix(); + // Gribb/Hartmann method: 从 ViewProjection 矩阵提取 world space 视锥体平面 + auto vp = GetViewProjectionMatrix(); m_planes[0] = vp.r3 + vp.r0; //left m_planes[1] = vp.r3 - vp.r0; //right - m_planes[2] = vp.r3 + vp.r1; //top - m_planes[3] = vp.r3 - vp.r1; //bottom + m_planes[2] = vp.r3 + vp.r1; //bottom + m_planes[3] = vp.r3 - vp.r1; //top m_planes[4] = vp.r2; //near m_planes[5] = vp.r3 - vp.r2; //far @@ -544,8 +544,10 @@ void Camera::Tick(const SharedPtr config) { // unreal style camera control if (WindowInput::Get().key_button_switch_state[KeyButtons::F]) { pure_rotate(); - } else if (WindowInput::Get().mouse_button_state[MouseButtons::Left] && - WindowInput::Get().mouse_button_state[MouseButtons::Right]) { + } else if ( + WindowInput::Get().mouse_button_state[MouseButtons::Left] && + WindowInput::Get().mouse_button_state[MouseButtons::Right] + ) { pure_move(); } else if (WindowInput::Get().mouse_button_state[MouseButtons::Middle]) { pure_move(); From eabec07d7152517daa2e1575a2fbabb2795e58a5 Mon Sep 17 00:00:00 2001 From: YXHXianYu <2943003@qq.com> Date: Sun, 29 Mar 2026 12:30:37 +0800 Subject: [PATCH 04/17] =?UTF-8?q?feat(raster):=20ShadowMaskPass=E6=80=A7?= =?UTF-8?q?=E8=83=BD=E4=BC=98=E5=8C=96=EF=BC=8C=E4=BC=98=E5=8C=96CSM=20Ble?= =?UTF-8?q?nd=E6=97=B6=E9=9D=9E=E9=87=8D=E5=90=88=E9=83=A8=E5=88=86?= =?UTF-8?q?=E7=9A=84=E5=83=8F=E7=B4=A0=E7=9A=84SM=E9=87=87=E6=A0=B7?= =?UTF-8?q?=E6=AC=A1=E6=95=B0=E7=94=B12=E5=88=B01=EF=BC=88ShadowMaskPass?= =?UTF-8?q?=206.7ms->5ms=EF=BC=89?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../raster/deferred/lighting/shadows/PCSS.hlsli | 4 ++-- .../deferred/lighting/shadows/Shadows.hlsli | 16 +++++++++------- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/shaders/pipelines/raster/deferred/lighting/shadows/PCSS.hlsli b/shaders/pipelines/raster/deferred/lighting/shadows/PCSS.hlsli index 552f83ca0..6a873cbee 100644 --- a/shaders/pipelines/raster/deferred/lighting/shadows/PCSS.hlsli +++ b/shaders/pipelines/raster/deferred/lighting/shadows/PCSS.hlsli @@ -26,11 +26,11 @@ #define PCSS_ENABLE_POST_PCF_SHARPENING 1 // Blocker search samples -#define PCSS_SEARCH_BITS 5 +#define PCSS_SEARCH_BITS 4 #define PCSS_SEARCH_SAMPLES (1 << PCSS_SEARCH_BITS) // Shadow filtering samples -#define PCSS_SAMPLE_BITS 5 +#define PCSS_SAMPLE_BITS 4 #define PCSS_SAMPLES (1 << PCSS_SAMPLE_BITS) //必须先定义宏,再包含头文件 diff --git a/shaders/pipelines/raster/deferred/lighting/shadows/Shadows.hlsli b/shaders/pipelines/raster/deferred/lighting/shadows/Shadows.hlsli index 8a101a23c..072cc5a65 100644 --- a/shaders/pipelines/raster/deferred/lighting/shadows/Shadows.hlsli +++ b/shaders/pipelines/raster/deferred/lighting/shadows/Shadows.hlsli @@ -116,17 +116,19 @@ float calculate_csm_shadow( float3 main_light_dir = lighting_data.main_light_direction; if (lighting_data.is_csm_blend_enabled == 1) { + float cascade_blend_ratio = get_cascade_blend_ratio(lighting_data, world_pos, cascade_index); float shadow_current = get_single_shadow(lighting_data, world_pos, cascade_index, screen_uv, normal, main_light_dir); - float shadow_next = - (cascade_index + 1 < lighting_data.shadow_csm_num_of_cascades) ? + + // Only sample the next cascade when actually in the blend region + if (cascade_blend_ratio > 0.0 && cascade_index + 1 < lighting_data.shadow_csm_num_of_cascades) { + float shadow_next = get_single_shadow( lighting_data, world_pos, cascade_index + 1, screen_uv, normal, main_light_dir - ) : - 1.0; - - float cascade_blend_ratio = get_cascade_blend_ratio(lighting_data, world_pos, cascade_index); - return lerp(shadow_current, shadow_next, cascade_blend_ratio); + ); + return lerp(shadow_current, shadow_next, cascade_blend_ratio); + } + return shadow_current; } else { return get_single_shadow(lighting_data, world_pos, cascade_index, screen_uv, normal, main_light_dir); } From cde55a44eca85235594c62fd73b40d29bdb08df3 Mon Sep 17 00:00:00 2001 From: YXHXianYu <2943003@qq.com> Date: Sun, 29 Mar 2026 13:33:59 +0800 Subject: [PATCH 05/17] =?UTF-8?q?feat(raster):=20=E5=B0=86=E7=9F=A9?= =?UTF-8?q?=E9=98=B5=E5=8F=98=E6=8D=A2=E5=90=8D=E5=AD=97=E7=BB=9F=E4=B8=80?= =?UTF-8?q?=E4=B8=BAspace2space=E5=BD=A2=E5=BC=8F=EF=BC=9B=E5=8E=BB?= =?UTF-8?q?=E9=99=A4gbuffer=E4=B8=AD=E5=AD=98=E5=82=A8=E7=9A=84position?= =?UTF-8?q?=EF=BC=8C=E5=AE=9E=E7=8E=B0=E4=BA=86GeometryPass=2050%=E7=9A=84?= =?UTF-8?q?=E6=80=A7=E8=83=BD=E6=8F=90=E5=8D=87=EF=BC=882.3ms->1.2ms?= =?UTF-8?q?=EF=BC=89?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- shaders/pipelines/RasterCommon.hlsli | 19 ++++ .../pipelines/postprocess/aa/SmaaWrapper.hlsl | 7 +- .../postprocess/lighting_effects/Ao.hlsl | 7 +- .../lighting_effects/AoCommon.hlsl | 5 +- .../postprocess/lighting_effects/Rtao.hlsl | 8 +- .../postprocess/lighting_effects/Ssdo.hlsl | 28 +++--- .../postprocess/lighting_effects/Ssr.hlsl | 5 +- .../env_and_atmo/SkyboxPass.frag.hlsl | 12 +-- .../geometry/GeometryPassCommon.hlsli | 1 - .../deferred/geometry/GeometryPassPixel.hlsl | 3 - .../deferred/geometry/GeometryPassVertex.hlsl | 6 +- .../lighting/RasterLightingPass.frag.hlsl | 10 +-- .../deferred/lighting/shadows/CSM.hlsli | 4 +- .../lighting/shadows/ShadowMask.frag.hlsl | 6 +- .../deferred/lighting/shadows/Shadows.hlsli | 2 +- .../runtime/render/renderer/raster/AaPass.h | 4 +- .../runtime/render/renderer/raster/AoPass.h | 9 +- .../render/renderer/raster/GeometryPass.h | 6 +- .../render/renderer/raster/LightingPass.h | 1 - .../render/renderer/raster/RasterRenderer.cpp | 12 +-- .../render/renderer/raster/RasterResource.h | 2 +- .../renderer/raster/ShadowDepthPass.cpp | 4 +- .../render/renderer/raster/SkyboxPass.cpp | 4 +- .../runtime/render/renderer/raster/SsrPass.h | 4 +- .../env_and_atmo_pass/ShaderParameters.h | 2 +- .../raster/lighting_pass/ShaderParameters.h | 6 +- .../raster/post_process/ShaderParameters.h | 89 ++++++++++--------- 27 files changed, 135 insertions(+), 131 deletions(-) create mode 100644 shaders/pipelines/RasterCommon.hlsli diff --git a/shaders/pipelines/RasterCommon.hlsli b/shaders/pipelines/RasterCommon.hlsli new file mode 100644 index 000000000..4687afd91 --- /dev/null +++ b/shaders/pipelines/RasterCommon.hlsli @@ -0,0 +1,19 @@ +#ifndef MOER_RASTER_COMMON_HLSL +#define MOER_RASTER_COMMON_HLSL + +#include "core/common/Bindless.hlsl" +#include "core/common/Common.hlsl" + +float3 WorldPosFromDepth(float depth, float2 screen_uv, float4x4 clip2world) { + float4 clip = float4(screen_uv.x * 2.f - 1.f, 1.f - screen_uv.y * 2.f, depth, 1.0); + float4 world_w = mul(clip2world, clip); + float3 pos = world_w.xyz / world_w.w; + return pos; +} + +float3 WorldPosFromDepthTexture(uint depth_tex_hdl, float2 screen_uv, float4x4 clip2world) { + float depth = TextureHandle(depth_tex_hdl).Sample2D(screen_uv); + return WorldPosFromDepth(depth, screen_uv, clip2world); +} + +#endif \ No newline at end of file diff --git a/shaders/pipelines/postprocess/aa/SmaaWrapper.hlsl b/shaders/pipelines/postprocess/aa/SmaaWrapper.hlsl index 7a888763f..d81804469 100644 --- a/shaders/pipelines/postprocess/aa/SmaaWrapper.hlsl +++ b/shaders/pipelines/postprocess/aa/SmaaWrapper.hlsl @@ -3,6 +3,7 @@ #include "core/common/Bindless.hlsl" #include "core/common/Common.hlsl" BINDLESS_BINDINGS(3, 2, 4, 5) +#include "pipelines/RasterCommon.hlsli" #include "shared/raster/ShaderParameters.h" [[vk::push_constant]] ConstantBuffer param; @@ -131,8 +132,10 @@ SMAAResolveVS_Output SMAAResolveVS_Wrapper(uint VertexIndex : SV_VertexID) { // according to SMAARepo: Demo/DX10/Shaders/Simple.fx float2 SMAAGetVelocity(float2 uv) { - float4 c_pos = SMAAGetTexture2D(param.position_tex).Sample(LinearSampler, uv); - float4 p_pos = mul(param.curr_inv_vp_and_prev_vp, float4(c_pos.xyz, 1.0)); + float depth = SMAAGetTexture2D(param.depth_tex).Sample(LinearSampler, uv).r; + float4 c_pos = float4(WorldPosFromDepth(depth, uv, param.clip2world), 1.0); + + float4 p_pos = mul(param.clip2prev_clip, float4(c_pos.xyz, 1.0)); float2 c_pos2 = (c_pos.xy / c_pos.w) * float2(0.5, -0.5); float2 p_pos2 = (p_pos.xy / p_pos.w) * float2(0.5, -0.5); diff --git a/shaders/pipelines/postprocess/lighting_effects/Ao.hlsl b/shaders/pipelines/postprocess/lighting_effects/Ao.hlsl index a0caa858e..64612d336 100644 --- a/shaders/pipelines/postprocess/lighting_effects/Ao.hlsl +++ b/shaders/pipelines/postprocess/lighting_effects/Ao.hlsl @@ -6,11 +6,12 @@ #include "core/common/Bindless.hlsl" #include "core/common/Common.hlsl" BINDLESS_BINDINGS(3, 2, 4, 5) +#include "pipelines/RasterCommon.hlsli" #include "shared/raster/ShaderParameters.h" [[vk::push_constant]] ConstantBuffer param; -// 定义了AoOutput、CameraMotionVector等函? +// 定义了AoOutput、CameraMotionVector等函�? #include "pipelines/postprocess/lighting_effects/AoCommon.hlsl" static const float3 ABNORMAL_COLOR = float3(0.0, 0.0, 1.0); @@ -23,7 +24,7 @@ float2 random_2to2(float2 uv) { // reference: games202 & https://www.shadertoy.com/view/Ms33WB float ssao_games202(float2 uv) { float3 normal = Raster::UnpackNormal(TextureHandle(param.normal_tex).Sample2D(uv)); - float3 position = TextureHandle(param.position_tex).Sample2D(uv); + float3 position = WorldPosFromDepthTexture(param.depth_tex, uv, param.clip2world); // if (uv.x < param.inv_resolution.x && uv.y < param.inv_resolution.y) { // printf("uv: %f, %f; pos: %f, %f, %f\n", uv.x, uv.y, position.x, position.y, position.z); @@ -34,7 +35,7 @@ float ssao_games202(float2 uv) { for (uint i = 0; i < param.ssao_sample_count; i++) { float2 offset = random_2to2(uv + 0.093 * float2(i, i)) * 2.0 - 1.0; - float3 sample_position = TextureHandle(param.position_tex).Sample2D(uv + offset * tmp1).rgb; + float3 sample_position = WorldPosFromDepthTexture(param.depth_tex, uv + offset * tmp1, param.clip2world); float3 vec = sample_position - position; float3 len = length(vec); diff --git a/shaders/pipelines/postprocess/lighting_effects/AoCommon.hlsl b/shaders/pipelines/postprocess/lighting_effects/AoCommon.hlsl index 49a6a9b13..96de959dd 100644 --- a/shaders/pipelines/postprocess/lighting_effects/AoCommon.hlsl +++ b/shaders/pipelines/postprocess/lighting_effects/AoCommon.hlsl @@ -1,4 +1,5 @@ #include "core/math/Math.hlsli" +#include "pipelines/RasterCommon.hlsli" struct AoOutput { float4 color_with_ao : SV_Target0; @@ -11,8 +12,6 @@ float2 GetCameraMotionVector(float2 uv) { // camera_mv.world2clip // camera_mv.world2clip_prev // param.depth_tex - // param.position_tex - // MARK: Lighting Data ArrayBuffer camera_mv_data = ArrayBuffer(param.camera_mv_data_handle); @@ -23,7 +22,7 @@ float2 GetCameraMotionVector(float2 uv) { return float2(0.0, 0.0); } - float3 world_pos_vec3 = TextureHandle(param.position_tex).Sample2D(uv); + float3 world_pos_vec3 = WorldPosFromDepthTexture(param.depth_tex, uv, param.clip2world); float4 world_pos = float4(world_pos_vec3, 1.0); float4 clip_pos = mul(camera_mv.world2clip, world_pos); diff --git a/shaders/pipelines/postprocess/lighting_effects/Rtao.hlsl b/shaders/pipelines/postprocess/lighting_effects/Rtao.hlsl index 89dc48d0f..feb0965c5 100644 --- a/shaders/pipelines/postprocess/lighting_effects/Rtao.hlsl +++ b/shaders/pipelines/postprocess/lighting_effects/Rtao.hlsl @@ -14,7 +14,7 @@ BINDLESS_BINDINGS(3, 2, 4, 5) [[vk::binding(0, DI_BINDING_SLOT)]] RaytracingAccelerationStructure tlas; -// 定义了AoOutput、CameraMotionVector等函? +// 定义了AoOutput、CameraMotionVector等函�? #include "pipelines/postprocess/lighting_effects/AoCommon.hlsl" // TODO: 代码整理 @@ -39,7 +39,7 @@ float4 SampleCosineHemisphere(float2 u) { return float4(r * cos(theta), y, r * sin(theta), /* pdf */ y / PI); } -// 将一个半球坐标系中的vector转换到以某个特定normal?z的半球上 (Written by AI) +// 将一个半球坐标系中的vector转换到以某个特定normal�?z的半球上 (Written by AI) float3 LocalVectorToWorld(float3 local_vector, float3 normal) { float3 up = abs(normal.y) < 0.999 ? float3(0, 1, 0) : float3(1, 0, 0); float3 tangent = normalize(cross(up, normal)); @@ -50,7 +50,7 @@ float3 LocalVectorToWorld(float3 local_vector, float3 normal) { + local_vector.z * bitangent; } -// TODO: 和RT那边的函数合? +// TODO: 和RT那边的函数合�? bool CastVisibilityRay(float3 origin, float3 direction, float tmin, float tmax, RaytracingAccelerationStructure accel, uint instance_mask, uint ray_flags) { @@ -95,7 +95,7 @@ AoOutput get_rtao(float2 uv) { Moer::RandomState rng = Moer::RandomState::Create(uv * param.resolution, param.frame_idx); - float3 frag_position = TextureHandle(param.position_tex).Sample2D(uv); + float3 frag_position = WorldPosFromDepthTexture(param.depth_tex, uv, param.clip2world); // Raytraced AO float total_ray_contrib = 0.0; diff --git a/shaders/pipelines/postprocess/lighting_effects/Ssdo.hlsl b/shaders/pipelines/postprocess/lighting_effects/Ssdo.hlsl index cd7bae955..8e12fe93d 100644 --- a/shaders/pipelines/postprocess/lighting_effects/Ssdo.hlsl +++ b/shaders/pipelines/postprocess/lighting_effects/Ssdo.hlsl @@ -17,7 +17,7 @@ static const float3 DIFFUSE_ALBEDO = float3(0.5, 0.5, 0.5); // } float random_1to1(float2 seed) { - // 使用 sin 和一个大数的小数部分来产生伪随机? + // 使用 sin 和一个大数的小数部分来产生伪随机�? return frac(sin(dot(seed, float2(12.9898, 78.233))) * 43758.5453123); } @@ -45,7 +45,7 @@ float3 SampleHemisphere_Cosine(float3 N, float2 randomValues) { } float3 apply_view_projection(float3 position) { - float4 p = mul(param.view_projection_matrix, float4(position, 1.0)); + float4 p = mul(param.world2clip, float4(position, 1.0)); p /= p.w; return float3(p.x * 0.5 + 0.5, -p.y * 0.5 + 0.5, p.z); } @@ -55,7 +55,7 @@ bool isUvdValid(float3 uv) { } float GetDepthFromWorldPos(float3 worldPos) { - float4 clipPos = mul(param.view_projection_matrix, float4(worldPos, 1.0)); + float4 clipPos = mul(param.world2clip, float4(worldPos, 1.0)); return clipPos.z / clipPos.w; } @@ -65,30 +65,30 @@ float3 GetVplIndirectLight(float3 vpl_pos, float3 vpl_normal, float3 shading_pos float VPL_distance = length(vpl_pos - shading_pos); float attenuation = 1.0 / (VPL_distance * VPL_distance + 1.0); // 稳定衰减 - // 面积? - float vpl_linear_depth = abs(mul(param.view_matrix, float4(vpl_pos, 1.0)).z); + // 面积�? + float vpl_linear_depth = abs(mul(param.world2view, float4(vpl_pos, 1.0)).z); float area_weight = vpl_linear_depth * vpl_linear_depth + 0.0001; area_weight = min(2, area_weight); //防止过大 // 简单的漫反射间接光 - //由于采用了余弦加权采样,这里不用再乘?shadingCosine ? + //由于采用了余弦加权采样,这里不用再乘�?shadingCosine �? float3 indirect_light = vplCosine * pixel_color * attenuation * area_weight; return indirect_light; } float3 GetVplContribution(float2 vpl_uv, float2 shading_uv) { - float3 vpl_pos = TextureHandle(param.position_tex).Sample2D(vpl_uv); + float3 vpl_pos = WorldPosFromDepthTexture(param.depth_tex, vpl_uv, param.clip2world); float3 vpl_normal = normalize(Raster::UnpackNormal(TextureHandle(param.normal_tex).Sample2D(vpl_uv))); - float3 shading_pos = TextureHandle(param.position_tex).Sample2D(shading_uv); + float3 shading_pos = WorldPosFromDepthTexture(param.depth_tex, shading_uv, param.clip2world); float3 pixel_color = TextureHandle(param.input_image).Sample2D(vpl_uv); return GetVplIndirectLight(vpl_pos, vpl_normal, shading_pos, pixel_color); } float4 GetSsdo(float2 uv) { float3 normal = Raster::UnpackNormal(TextureHandle(param.normal_tex).Sample2D(uv)); - float3 position = TextureHandle(param.position_tex).Sample2D(uv); + float3 position = WorldPosFromDepthTexture(param.depth_tex, uv, param.clip2world); float occlusion = 0.0; float3 indirect_light = float3(0.0, 0.0, 0.0); @@ -111,7 +111,7 @@ float4 GetSsdo(float2 uv) { } float sceneDepthOfUV = TextureHandle(param.depth_tex).Sample2D(sampleUVD.xy); - float3 scenePosOfUV = TextureHandle(param.position_tex).Sample2D(sampleUVD.xy).rgb; + float3 scenePosOfUV = WorldPosFromDepthTexture(param.depth_tex, sampleUVD.xy, param.clip2world); float distFromVplToShadingPoint = length(scenePosOfUV - position); if ((sampleUVD.z + param.ssdo_depth_bias) < sceneDepthOfUV) { //reversed z! @@ -164,14 +164,14 @@ AoOutput main(float2 uv : TEXCOORD0) { // float3 get_ssdo_ao_test(float2 uv) { // float3 normal = Raster::UnpackNormal(TextureHandle(param.normal_tex).Sample2D(uv)); -// float3 position = TextureHandle(param.position_tex).Sample2D(uv); +// float3 position = WorldPosFromDepthTexture(param.depth_tex, uv, param.clip2world); // float ao = 0.0; // float2 tmp1 = param.ssdo_radius * param.inv_resolution; // for (uint i = 0; i < param.ssdo_sample_count; i++) { // float2 offset = random_2to2(uv + 0.093 * float2(i, i)) * 2.0 - 1.0; -// float3 sample_position = TextureHandle(param.position_tex).Sample2D(uv + offset * tmp1).rgb; +// float3 sample_position = WorldPosFromDepthTexture(param.depth_tex, uv + offset * tmp1, param.clip2world); // float3 vec = sample_position - position; // float3 len = length(vec); @@ -187,14 +187,14 @@ AoOutput main(float2 uv : TEXCOORD0) { // float3 get_ssdo_indirect_test(float2 uv) { // float3 normal = Raster::UnpackNormal(TextureHandle(param.normal_tex).Sample2D(uv)); -// float3 position = TextureHandle(param.position_tex).Sample2D(uv); +// float3 position = WorldPosFromDepthTexture(param.depth_tex, uv, param.clip2world); // float3 indirect_light = float3(0.0, 0.0, 0.0); // float2 tmp1 = param.ssdo_radius * param.inv_resolution; // for (uint i = 0; i < param.ssdo_sample_count; i++) { // float2 offset = random_2to2(uv + 0.093 * float2(i, i)) * 2.0 - 1.0; -// float3 sample_position = TextureHandle(param.position_tex).Sample2D(uv + offset * tmp1).rgb; +// float3 sample_position = WorldPosFromDepthTexture(param.depth_tex, uv + offset * tmp1, param.clip2world); // float3 vec = sample_position - position; // float3 len = length(vec); diff --git a/shaders/pipelines/postprocess/lighting_effects/Ssr.hlsl b/shaders/pipelines/postprocess/lighting_effects/Ssr.hlsl index d8ba60832..16a5a8da7 100644 --- a/shaders/pipelines/postprocess/lighting_effects/Ssr.hlsl +++ b/shaders/pipelines/postprocess/lighting_effects/Ssr.hlsl @@ -6,6 +6,7 @@ #include "core/common/Bindless.hlsl" #include "core/common/Common.hlsl" BINDLESS_BINDINGS(3, 2, 4, 5) +#include "pipelines/RasterCommon.hlsli" #include "materials/Material.hlsli" #include "shared/raster/ShaderParameters.h" @@ -44,7 +45,7 @@ bool should_apply_ssr(float2 uv) { // the performance cost is so high } float3 apply_view_projection(float3 position) { - float4 p = mul(param.view_projection_matrix, float4(position, 1.0)); + float4 p = mul(param.world2clip, float4(position, 1.0)); p /= p.w; // 两个究极大坑�? // p.z is not needed to apply f(x) = x * 0.5 + 0.5; @@ -123,7 +124,7 @@ float3 ssr_ray_tracing(float3 color, float3 test_point, float3 direction, float float3 ssr(float3 color, float2 uv) { float3 normal = TextureHandle(param.normal_tex).Sample2D(uv).rgb * 2.0 - 1.0; - float3 position = TextureHandle(param.position_tex).Sample2D(uv).rgb; + float3 position = WorldPosFromDepthTexture(param.depth_tex, uv, param.clip2world); float3 camera_to_pixel = normalize(position - param.camera_position); float3 reflect_dir = normalize(reflect(camera_to_pixel, normal)); diff --git a/shaders/pipelines/raster/deferred/env_and_atmo/SkyboxPass.frag.hlsl b/shaders/pipelines/raster/deferred/env_and_atmo/SkyboxPass.frag.hlsl index 950005fea..9a57f81b0 100644 --- a/shaders/pipelines/raster/deferred/env_and_atmo/SkyboxPass.frag.hlsl +++ b/shaders/pipelines/raster/deferred/env_and_atmo/SkyboxPass.frag.hlsl @@ -2,21 +2,15 @@ #define RASTER_SKYBOX_HLSLI #include "core/common/Bindless.hlsl" -BINDLESS_BINDINGS(3, 2, 4, 5) #include "core/common/Common.hlsl" +BINDLESS_BINDINGS(3, 2, 4, 5) +#include "pipelines/RasterCommon.hlsli" #include "shared/raster/ShaderParameters.h" [[vk::push_constant]] ConstantBuffer param; -float3 WorldPosFromDepth(float depth, float2 screen_uv, float4x4 inv_view_proj) { - float4 clip = float4(screen_uv.x * 2.f - 1.f, 1.f - screen_uv.y * 2.f, depth, 1.0); - float4 world_w = mul(inv_view_proj, clip); - float3 pos = world_w.xyz / world_w.w; - return pos; -} - float4 main(float2 in_uv : TEXCOORD0) : SV_TARGET { - float3 in_pos = WorldPosFromDepth(0.99, in_uv, param.inv_view_proj); + float3 in_pos = WorldPosFromDepth(0.99, in_uv, param.clip2world); float3 view_dir = normalize(in_pos - param.camera_pos); float3 skybox_color = TextureHandle(param.cubemap_handle).SampleCube(view_dir); skybox_color *= param.exposure_factor; diff --git a/shaders/pipelines/raster/deferred/geometry/GeometryPassCommon.hlsli b/shaders/pipelines/raster/deferred/geometry/GeometryPassCommon.hlsli index d13fe7060..d0cc76377 100644 --- a/shaders/pipelines/raster/deferred/geometry/GeometryPassCommon.hlsli +++ b/shaders/pipelines/raster/deferred/geometry/GeometryPassCommon.hlsli @@ -12,7 +12,6 @@ struct VsOutput { struct VsOutput { float4 position : SV_POSITION; - float3 world_position : POSITION; float3 normal : NORMAL; float3 tangent : TANGENT; float2 texcoord0 : TEXCOORD0; diff --git a/shaders/pipelines/raster/deferred/geometry/GeometryPassPixel.hlsl b/shaders/pipelines/raster/deferred/geometry/GeometryPassPixel.hlsl index a75384c77..b43f2c414 100644 --- a/shaders/pipelines/raster/deferred/geometry/GeometryPassPixel.hlsl +++ b/shaders/pipelines/raster/deferred/geometry/GeometryPassPixel.hlsl @@ -53,20 +53,17 @@ struct PsOutput { float4 normal : SV_TARGET1; float4 tangent : SV_TARGET2; float2 texcoord0 : SV_TARGET3; - float4 position : SV_TARGET4; }; PsOutput main(VsOutput input) : SV_TARGET { DiscardByAlphaTest(input.material_id, input.texcoord0); // 此处有可能触发discard,直接终止shader - PsOutput output; output.vbuffer = input.material_id; output.normal = float4(Raster::PackNormal(normalize(input.normal)), 1.0); output.tangent = float4(Raster::PackNormal(normalize(input.tangent)), 1.0); output.texcoord0 = input.texcoord0; - output.position = float4(input.world_position, 1.0); return output; } diff --git a/shaders/pipelines/raster/deferred/geometry/GeometryPassVertex.hlsl b/shaders/pipelines/raster/deferred/geometry/GeometryPassVertex.hlsl index 31bc3b5d8..5d34b1dd9 100644 --- a/shaders/pipelines/raster/deferred/geometry/GeometryPassVertex.hlsl +++ b/shaders/pipelines/raster/deferred/geometry/GeometryPassVertex.hlsl @@ -27,7 +27,6 @@ struct VsContext { Moer::GPrimitive primitive; // derived 2 - float3 out_world_pos; float4 out_clip_pos; #if !SHADOW_DEPTH_PASS @@ -68,8 +67,8 @@ struct VsContext { ArrayBuffer position_buf = ArrayBuffer(param.position_buf_hdl); vertex_pos = position_buf.Load(primitive.position_start_idx + vertex_id); } - out_world_pos = mul(model2world, float4(vertex_pos, 1.0)).xyz; - out_clip_pos = mul(param.world2clip, float4(out_world_pos, 1.0)); + float3 world_pos = mul(model2world, float4(vertex_pos, 1.0)).xyz; + out_clip_pos = mul(param.world2clip, float4(world_pos, 1.0)); #if !SHADOW_DEPTH_PASS @@ -125,7 +124,6 @@ VsOutput main( #if !SHADOW_DEPTH_PASS output.position = context.out_clip_pos; - output.world_position = context.out_world_pos; output.normal = context.out_normal; output.tangent = context.out_tangent; output.texcoord0 = context.out_texcoord0; diff --git a/shaders/pipelines/raster/deferred/lighting/RasterLightingPass.frag.hlsl b/shaders/pipelines/raster/deferred/lighting/RasterLightingPass.frag.hlsl index 3f743dd9f..567255f84 100644 --- a/shaders/pipelines/raster/deferred/lighting/RasterLightingPass.frag.hlsl +++ b/shaders/pipelines/raster/deferred/lighting/RasterLightingPass.frag.hlsl @@ -1,6 +1,7 @@ #include "core/common/Bindless.hlsl" #include "core/common/Common.hlsl" BINDLESS_BINDINGS(3, 2, 4, 5) +#include "pipelines/RasterCommon.hlsli" #include "materials/Brdf.hlsli" #include "materials/Material.hlsli" @@ -10,13 +11,6 @@ BINDLESS_BINDINGS(3, 2, 4, 5) [[vk::push_constant]] ConstantBuffer param; -float3 WorldPosFromDepth(float depth, float2 screen_uv, float4x4 inv_view_proj) { - float4 clip = float4(screen_uv.x * 2.f - 1.f, 1.f - screen_uv.y * 2.f, depth, 1.0); - float4 world_w = mul(inv_view_proj, clip); - float3 pos = world_w.xyz / world_w.w; - return pos; -} - float4 main(float2 in_uv : TEXCOORD0) : SV_TARGET { // MARK: Textures uint material_id = TextureHandle(param.vbuffer).Sample2D(in_uv); @@ -35,7 +29,7 @@ float4 main(float2 in_uv : TEXCOORD0) : SV_TARGET { ); // 因为法线mipmap不满足线性关系,所以这里需要normalize float3 tangent = normalize(Raster::UnpackNormal(TextureHandle(param.gbuffer_tangent).Sample2D(in_uv))); // 同上 - float3 position = WorldPosFromDepth(depth, in_uv, lighting_data.inv_view_proj); + float3 position = WorldPosFromDepth(depth, in_uv, lighting_data.clip2world); // - Lights ArrayBuffer light_buf = ArrayBuffer(param.light_buf_hdl); diff --git a/shaders/pipelines/raster/deferred/lighting/shadows/CSM.hlsli b/shaders/pipelines/raster/deferred/lighting/shadows/CSM.hlsli index 578dcff8b..44b4a408a 100644 --- a/shaders/pipelines/raster/deferred/lighting/shadows/CSM.hlsli +++ b/shaders/pipelines/raster/deferred/lighting/shadows/CSM.hlsli @@ -6,7 +6,7 @@ // 获取Cascade Index int get_cascade_index(Moer::LightingData lighting_data, float3 world_pos) { - float pixel_view_pos_z = abs(mul(lighting_data.view_matrix, float4(world_pos, 1.0)).z); + float pixel_view_pos_z = abs(mul(lighting_data.world2view, float4(world_pos, 1.0)).z); float pixel_depth_ratio = (pixel_view_pos_z - lighting_data.near_clip) / (lighting_data.far_clip - lighting_data.near_clip); for (int i = 0; i < lighting_data.shadow_csm_num_of_cascades; i++) { @@ -19,7 +19,7 @@ int get_cascade_index(Moer::LightingData lighting_data, float3 world_pos) { float get_cascade_blend_ratio(Moer::LightingData lighting_data, float3 world_pos, int cascade_index) { float pixel_view_pos_z = - abs(mul(lighting_data.view_matrix, float4(world_pos, 1.0)).z); //FIXME:需要取负吗? + abs(mul(lighting_data.world2view, float4(world_pos, 1.0)).z); //FIXME:需要取负吗? float blend_band_start_z = lighting_data.near_clip + lighting_data.cascade_blend_start_ratios[cascade_index] * (lighting_data.far_clip - lighting_data.near_clip); diff --git a/shaders/pipelines/raster/deferred/lighting/shadows/ShadowMask.frag.hlsl b/shaders/pipelines/raster/deferred/lighting/shadows/ShadowMask.frag.hlsl index 500435a1a..c7ce745cb 100644 --- a/shaders/pipelines/raster/deferred/lighting/shadows/ShadowMask.frag.hlsl +++ b/shaders/pipelines/raster/deferred/lighting/shadows/ShadowMask.frag.hlsl @@ -6,9 +6,9 @@ BINDLESS_BINDINGS(3, 2, 4, 5) [[vk::push_constant]] ConstantBuffer param; -float3 WorldPosFromDepth(float depth, float2 screen_uv, float4x4 inv_view_proj) { +float3 WorldPosFromDepth(float depth, float2 screen_uv, float4x4 clip2world) { float4 clip = float4(screen_uv.x * 2.f - 1.f, 1.f - screen_uv.y * 2.f, depth, 1.0); - float4 world_w = mul(inv_view_proj, clip); + float4 world_w = mul(clip2world, clip); float3 pos = world_w.xyz / world_w.w; return pos; } @@ -21,7 +21,7 @@ float main(float2 in_uv : TEXCOORD0) : SV_TARGET { // MARK: GBuffer float depth = TextureHandle(param.depth_hdl).Sample2D(in_uv); float3 normal = normalize(Raster::UnpackNormal(TextureHandle(param.normal_hdl).Sample2D(in_uv))); - float3 position = WorldPosFromDepth(depth, in_uv, lighting_data.inv_view_proj); + float3 position = WorldPosFromDepth(depth, in_uv, lighting_data.clip2world); float shadow = calculate_shadow(lighting_data, position, in_uv, normal); diff --git a/shaders/pipelines/raster/deferred/lighting/shadows/Shadows.hlsli b/shaders/pipelines/raster/deferred/lighting/shadows/Shadows.hlsli index 072cc5a65..8f3470d7d 100644 --- a/shaders/pipelines/raster/deferred/lighting/shadows/Shadows.hlsli +++ b/shaders/pipelines/raster/deferred/lighting/shadows/Shadows.hlsli @@ -16,7 +16,7 @@ float get_single_shadow( float3 normal, float3 lightDir ) { - float4 shadow_clip_pos = mul(lighting_data.world_to_shadow_clip[cascade_index], float4(world_pos, 1.0)); + float4 shadow_clip_pos = mul(lighting_data.world2shadow_clip[cascade_index], float4(world_pos, 1.0)); float3 shadow_ndc_pos = shadow_clip_pos.xyz / shadow_clip_pos.w; float2 shadow_uv = float2(shadow_ndc_pos.x * 0.5 + 0.5, 1.0 - (shadow_ndc_pos.y * 0.5 + 0.5)); bool in_bounds = shadow_uv.x >= 0.0 && shadow_uv.x <= 1.0 && shadow_uv.y >= 0.0 && shadow_uv.y <= 1.0 && diff --git a/source/runtime/render/renderer/raster/AaPass.h b/source/runtime/render/renderer/raster/AaPass.h index 8e45a7c7f..72041d812 100644 --- a/source/runtime/render/renderer/raster/AaPass.h +++ b/source/runtime/render/renderer/raster/AaPass.h @@ -328,9 +328,9 @@ class AaPass { auto smaa_shared_param = [&]() { SmaaSharedPipelineBindlessParam param; + param.clip2world = Transpose(camera.GetViewProjectionMatrixInv()); param.aa_mode = static_cast(ui_config.aa_mode); param.color_tex = input_image.hdl; - param.position_tex = context.textures.position.hdl; param.depth_tex = context.textures.depth_linear_sampler.hdl; param.search_tex = smaa_search_tex.hdl; param.area_tex = smaa_area_tex.hdl; @@ -347,7 +347,7 @@ class AaPass { context.textures.aa_output.GetSizeX(), context.textures.aa_output.GetSizeY() ); - param.curr_inv_vp_and_prev_vp = Transpose(previous_view_proj * current_inv_view_proj); + param.clip2prev_clip = Transpose(previous_view_proj * current_inv_view_proj); return param; }(); diff --git a/source/runtime/render/renderer/raster/AoPass.h b/source/runtime/render/renderer/raster/AoPass.h index 5e5d661d6..eb24abfa4 100644 --- a/source/runtime/render/renderer/raster/AoPass.h +++ b/source/runtime/render/renderer/raster/AoPass.h @@ -140,6 +140,7 @@ class AoPass { ) { AoPipelineBindlessParam param; + param.clip2world = Transpose(camera.GetViewProjectionMatrixInv()); param.inv_resolution = float2(1.0f) / float2(context.textures.ao_output.GetSize()); param.ssao_intensity = ui_config.ssao_intensity; param.ssao_max_distance = ui_config.ssao_max_distance; @@ -148,7 +149,6 @@ class AoPass { param.ao_mode = static_cast(ui_config.ao_mode); param.input_image = context.textures.lighting_output.hdl; param.normal_tex = context.textures.normal.hdl; - param.position_tex = context.textures.position.hdl; param.depth_tex = context.textures.depth_nearest_sampler.hdl; param.noise_tex = context.textures.noise_tex.hdl; @@ -183,7 +183,6 @@ class AoPass { param.inv_resolution = float2(1.0) / float2(context.textures.ao_output.GetSize()); param.input_image = context.textures.lighting_output.hdl; param.normal_tex = context.textures.normal.hdl; - param.position_tex = context.textures.position.hdl; param.depth_tex = context.textures.depth_nearest_sampler.hdl; param.ao_mode = static_cast(ui_config.ao_mode); param.sample_mode = static_cast(ui_config.rtao_sample_mode); @@ -214,6 +213,7 @@ class AoPass { ) { SsdoPipelineBindlessParam param; + param.clip2world = Transpose(camera.GetViewProjectionMatrixInv()); param.inv_resolution = float2(1.0f) / float2(context.textures.ao_output.GetSize()); param.ssdo_sample_count = ui_config.ssao_spp; param.ssdo_radius = ui_config.ssdo_sample_radius; @@ -222,13 +222,12 @@ class AoPass { param.ssdo_indirect_intensity = ui_config.ssdo_indirect_intensity; param.normal_tex = context.textures.normal.hdl; param.depth_tex = context.textures.depth_nearest_sampler.hdl; - param.position_tex = context.textures.position.hdl; param.noise_tex = context.textures.noise_tex.hdl; param.ao_mode = static_cast(ui_config.ao_mode); param.ssdo_depth_bias = ui_config.ssdo_depth_bias; param.input_image = context.textures.lighting_output.hdl; - param.view_projection_matrix = Transpose(camera.GetViewProjectionMatrix()); - param.view_matrix = Transpose(camera.GetViewMatrix()); + param.world2clip = Transpose(camera.GetViewProjectionMatrix()); + param.world2view = Transpose(camera.GetViewMatrix()); param.camera_position = camera.GetPosition(); UpdateMotionVectorData(context, camera); diff --git a/source/runtime/render/renderer/raster/GeometryPass.h b/source/runtime/render/renderer/raster/GeometryPass.h index 2af537319..5d24ac595 100644 --- a/source/runtime/render/renderer/raster/GeometryPass.h +++ b/source/runtime/render/renderer/raster/GeometryPass.h @@ -49,8 +49,7 @@ class GeometryPass { RHIColorAttachmentInfo::Preset(PF_R32_UINT), // vbuffer RHIColorAttachmentInfo::Preset(PF_A2R10G10B10_UNORM_PACK32), // normal RHIColorAttachmentInfo::Preset(PF_A2R10G10B10_UNORM_PACK32), // tangent - RHIColorAttachmentInfo::Preset(PF_R32G32_SFLOAT), // uv - RHIColorAttachmentInfo::Preset(PF_R32G32B32A32_SFLOAT) // position + RHIColorAttachmentInfo::Preset(PF_R32G32_SFLOAT) // uv }, ds_info, // depth buf context.textures.depth_linear_sampler.tex->GetFormat() @@ -133,8 +132,7 @@ class GeometryPass { ColorAttachment(context.textures.vbuffer.tex), ColorAttachment(context.textures.normal.tex), ColorAttachment(context.textures.tangent.tex), - ColorAttachment(context.textures.uv.tex), - ColorAttachment(context.textures.position.tex) + ColorAttachment(context.textures.uv.tex) ); } diff --git a/source/runtime/render/renderer/raster/LightingPass.h b/source/runtime/render/renderer/raster/LightingPass.h index b587e6135..c801295ac 100644 --- a/source/runtime/render/renderer/raster/LightingPass.h +++ b/source/runtime/render/renderer/raster/LightingPass.h @@ -58,7 +58,6 @@ class LightingPass { material_param.gbuffer_tangent = context.textures.tangent.hdl; material_param.gbuffer_uv = context.textures.uv.hdl; material_param.gbuffer_depth = context.textures.depth_nearest_sampler.hdl; - material_param.gbuffer_position = context.textures.position.hdl; material_param.global_param_handle = context.lighting_data_buffer.hdl; material_param.light_buf_hdl = context.scene.GetGpuSceneRes().light_buf.hdl; diff --git a/source/runtime/render/renderer/raster/RasterRenderer.cpp b/source/runtime/render/renderer/raster/RasterRenderer.cpp index f4dc7dd67..bbfa8960e 100644 --- a/source/runtime/render/renderer/raster/RasterRenderer.cpp +++ b/source/runtime/render/renderer/raster/RasterRenderer.cpp @@ -105,7 +105,7 @@ void RasterRenderer::UpdateGlobalLightingData( uint csm_layers = ui_config.shadow_csm_num_of_cascades; LightingData* lighting_data = &context.lighting_data; - lighting_data->inv_view_proj = Transpose(camera.GetViewProjectionMatrixInv()); + lighting_data->clip2world = Transpose(camera.GetViewProjectionMatrixInv()); lighting_data->light_count = context.scene.cpu_scene().GetLightCount(); lighting_data->camera_position = camera.GetPosition(); @@ -126,11 +126,11 @@ void RasterRenderer::UpdateGlobalLightingData( // Shadow Transform for (uint i = 0; i < csm_layers; i++) { - lighting_data->world_to_shadow_clip[i] = Transpose(lighting_data->world_to_shadow_clip[i]); + lighting_data->world2shadow_clip[i] = Transpose(lighting_data->world2shadow_clip[i]); } - lighting_data->view_matrix = Transpose(camera.GetViewMatrix()); - lighting_data->near_clip = camera.GetNearClip(); - lighting_data->far_clip = camera.GetFarClip(); + lighting_data->world2view = Transpose(camera.GetViewMatrix()); + lighting_data->near_clip = camera.GetNearClip(); + lighting_data->far_clip = camera.GetFarClip(); lighting_data->is_csm_blend_enabled = ui_config.shadow_csm_blend_option ? 1 : 0; // 注:此处不一定使用所有CSM,Shader中具体根据shadow_csm_num_of_cascades来决定 @@ -233,7 +233,7 @@ bool RasterRenderer::RunSingle(const SharedPtr editor_config, cons } auto& raster_config = editor_config->raster_config; - auto& camera = scene.GetMainCamera().camera; + auto& camera = scene.GetMainCamera().camera; { // Jitter Camera for SMAA T2x diff --git a/source/runtime/render/renderer/raster/RasterResource.h b/source/runtime/render/renderer/raster/RasterResource.h index 7479157ad..79e0bc202 100644 --- a/source/runtime/render/renderer/raster/RasterResource.h +++ b/source/runtime/render/renderer/raster/RasterResource.h @@ -64,7 +64,7 @@ struct RasterContext { struct CSMData { float3 light_dir; StaticArray shadow_map_textures; - StaticArray world_to_shadow_clip; + StaticArray world2shadow_clip; } csm_data; struct PointShadowData { diff --git a/source/runtime/render/renderer/raster/ShadowDepthPass.cpp b/source/runtime/render/renderer/raster/ShadowDepthPass.cpp index fd7225c5f..1ed7cff72 100644 --- a/source/runtime/render/renderer/raster/ShadowDepthPass.cpp +++ b/source/runtime/render/renderer/raster/ShadowDepthPass.cpp @@ -464,7 +464,7 @@ void ShadowDepthPass::RenderCSM(RasterContext& context, const RasterConfig& ui_c (cascade_index == 0) ? 0.0f : context.lighting_data.cascade_blend_start_ratios[cascade_index - 1]; const float frustum_far_ratio = context.lighting_data.cascade_split_ratios[cascade_index]; - context.lighting_data.world_to_shadow_clip[cascade_index] = get_world_to_shadow_clip_matrix( + context.lighting_data.world2shadow_clip[cascade_index] = get_world_to_shadow_clip_matrix( context.lighting_data.main_light_direction, camera, ui_config, @@ -476,7 +476,7 @@ void ShadowDepthPass::RenderCSM(RasterContext& context, const RasterConfig& ui_c RenderShadow( context, ui_config, - context.lighting_data.world_to_shadow_clip[cascade_index], + context.lighting_data.world2shadow_clip[cascade_index], Rect2D(0, 0, ui_config.shadow_csm_sm_size, ui_config.shadow_csm_sm_size), context.csm_data.shadow_map_textures[cascade_index].tex->GetView(), std::format("Shadow Depth Pass - {}", cascade_index) diff --git a/source/runtime/render/renderer/raster/SkyboxPass.cpp b/source/runtime/render/renderer/raster/SkyboxPass.cpp index dd8ff9644..71ac06b27 100644 --- a/source/runtime/render/renderer/raster/SkyboxPass.cpp +++ b/source/runtime/render/renderer/raster/SkyboxPass.cpp @@ -34,8 +34,8 @@ void SkyboxPass::Process(RasterContext& context, const RasterConfig& ui_config, } else { skybox_param.exposure_factor = float3(1.0f, 1.0f, 1.0f); } - skybox_param.camera_pos = camera.GetPosition(); - skybox_param.inv_view_proj = Transpose(camera.GetViewProjectionMatrixInv()); + skybox_param.camera_pos = camera.GetPosition(); + skybox_param.clip2world = Transpose(camera.GetViewProjectionMatrixInv()); DepthAttachment depth_att(context.textures.depth_linear_sampler.tex->GetView().GetTexture()); depth_att.action = EAttachmentAction::AC_DS_LOAD_STORE; diff --git a/source/runtime/render/renderer/raster/SsrPass.h b/source/runtime/render/renderer/raster/SsrPass.h index c13ff84e1..59c1c8821 100644 --- a/source/runtime/render/renderer/raster/SsrPass.h +++ b/source/runtime/render/renderer/raster/SsrPass.h @@ -59,7 +59,8 @@ class SsrPass { } SsrPipelineBindlessParam param; - param.view_projection_matrix = Transpose(camera.GetViewProjectionMatrix()); + param.clip2world = Transpose(camera.GetViewProjectionMatrixInv()); + param.world2clip = Transpose(camera.GetViewProjectionMatrix()); param.camera_position = camera.GetPosition(); param.near_clip = camera.GetNearClip(); param.resolution = float2(context.textures.ssr_output.GetSize()); @@ -71,7 +72,6 @@ class SsrPass { param.ssr_is_enable_jitter = ui_config.ssr_is_enable_jitter; param.ssr_is_force_ground_enable_ssr = ui_config.ssr_is_force_ground_enable_ssr; param.color_tex = input_image.hdl; - param.position_tex = context.textures.position.hdl; param.normal_tex = context.textures.normal.hdl; param.depth_tex = context.textures.depth_linear_sampler.hdl; param.vbuffer = context.textures.vbuffer.hdl; diff --git a/source/runtime/render/shaderheaders/shared/raster/env_and_atmo_pass/ShaderParameters.h b/source/runtime/render/shaderheaders/shared/raster/env_and_atmo_pass/ShaderParameters.h index 32390794d..2d2dffc00 100644 --- a/source/runtime/render/shaderheaders/shared/raster/env_and_atmo_pass/ShaderParameters.h +++ b/source/runtime/render/shaderheaders/shared/raster/env_and_atmo_pass/ShaderParameters.h @@ -28,7 +28,7 @@ struct SkyboxPassBindlessParam { uint cubemap_handle; float3 exposure_factor; float3 camera_pos; - float4x4 inv_view_proj; + float4x4 clip2world; }; // MARK: Main Content End diff --git a/source/runtime/render/shaderheaders/shared/raster/lighting_pass/ShaderParameters.h b/source/runtime/render/shaderheaders/shared/raster/lighting_pass/ShaderParameters.h index bec584178..349f16404 100644 --- a/source/runtime/render/shaderheaders/shared/raster/lighting_pass/ShaderParameters.h +++ b/source/runtime/render/shaderheaders/shared/raster/lighting_pass/ShaderParameters.h @@ -43,9 +43,9 @@ struct MaterialPassBindlessParam { uint shadow_mask_handle; }; struct LightingData { - float4x4 world_to_shadow_clip[MAX_CSM_CASCADES]; + float4x4 world2shadow_clip[MAX_CSM_CASCADES]; - float4x4 inv_view_proj; + float4x4 clip2world; float3 camera_position; // uint padding;// FIXME: need or not? uint light_count; @@ -69,7 +69,7 @@ struct LightingData { float light_size_world; //assumed light size for soft shadow calculation float4 scale_data[MAX_CSM_CASCADES]; - float4x4 view_matrix; + float4x4 world2view; float near_clip; float far_clip; float cascade_split_ratios[MAX_CSM_CASCADES]; diff --git a/source/runtime/render/shaderheaders/shared/raster/post_process/ShaderParameters.h b/source/runtime/render/shaderheaders/shared/raster/post_process/ShaderParameters.h index 8ee41a3e0..024669fd2 100644 --- a/source/runtime/render/shaderheaders/shared/raster/post_process/ShaderParameters.h +++ b/source/runtime/render/shaderheaders/shared/raster/post_process/ShaderParameters.h @@ -28,6 +28,8 @@ struct CameraMotionVectorData { }; struct AoPipelineBindlessParam { + float4x4 clip2world; + float2 inv_resolution; float ssao_intensity; float ssao_max_distance; @@ -38,33 +40,32 @@ struct AoPipelineBindlessParam { uint input_image; uint normal_tex; - uint position_tex; uint depth_tex; - uint noise_tex; // linear & repeat sampler - + uint noise_tex; // linear & repeat sampler uint camera_mv_data_handle; // for camera motion vector }; struct SsdoPipelineBindlessParam { - float2 inv_resolution; // 1.0 / (屏幕宽度,高度) - uint ssdo_sample_count; // 采样次数 (e.g. 16,32,…) - float ssdo_radius; // 半径(世界空间单位) - float ssdo_max_distance; // 最大距离(世界空间单位) - float ssdo_intensity; // 强度调节参数 - float ssdo_indirect_intensity; // 间接光强度调节参数 + float4x4 clip2world; + float4x4 world2clip; + float4x4 world2view; + + float3 camera_position; + float ssdo_depth_bias; + + float2 inv_resolution; // 1.0 / (屏幕宽度,高度) + uint ssdo_sample_count; // 采样次数 (e.g. 16,32,…) + float ssdo_radius; // 半径(世界空间单位) + + float ssdo_max_distance; // 最大距离(世界空间单位) + float ssdo_intensity; // 强度调节参数 + float ssdo_indirect_intensity; // 间接光强度调节参数 + uint normal_tex; - uint normal_tex; uint depth_tex; - uint position_tex; uint noise_tex; - uint ao_mode; - - uint input_image; - float4x4 view_projection_matrix; - float4x4 view_matrix; - float3 camera_position; - float ssdo_depth_bias; + uint input_image; uint camera_mv_data_handle; }; @@ -79,10 +80,9 @@ struct RtaoPipelineBindlessParam { uint input_image; uint normal_tex; - uint position_tex; uint depth_tex; + uint ao_mode; - uint ao_mode; uint sample_mode; uint spp; float ray_trace_distance; @@ -120,33 +120,36 @@ struct BilateralFilterDenoiserPipelineBindlessParam { }; struct SsrPipelineBindlessParam { - float4x4 view_projection_matrix; - float3 camera_position; - float near_clip; - float2 resolution; - float far_clip; - float ssr_roughness_threshold; - float ssr_metallic_threshold; - float ssr_step_base; - uint ssr_sample_count; - uint ssr_is_enable_jitter; - uint ssr_is_force_ground_enable_ssr; - uint color_tex; - uint position_tex; - uint normal_tex; - uint depth_tex; - uint vbuffer; - uint gbuffer_uv; - uint material_buf_hdl; + float4x4 clip2world; + float4x4 world2clip; + + float3 camera_position; + float near_clip; + + float2 resolution; + float far_clip; + float ssr_roughness_threshold; + + float ssr_metallic_threshold; + float ssr_step_base; + uint ssr_sample_count; + uint ssr_is_enable_jitter; + uint ssr_is_force_ground_enable_ssr; + uint color_tex; + uint normal_tex; + uint depth_tex; + uint vbuffer; + uint gbuffer_uv; + uint material_buf_hdl; }; struct SmaaSharedPipelineBindlessParam { - float4x4 curr_inv_vp_and_prev_vp; // = previous_view_projection * current_inverse_view_projection - float4 rt_metrics; // float4(inv_resolution.xy, resolution.xy) + float4x4 clip2world; + float4x4 clip2prev_clip; // = previous_view_projection * current_inverse_view_projection + float4 rt_metrics; // float4(inv_resolution.xy, resolution.xy) uint aa_mode; - uint color_tex; // initial input image - uint position_tex; // position gbuffer - uint depth_tex; // depth gbuffer + uint color_tex; // initial input image + uint depth_tex; // depth gbuffer uint search_tex; uint area_tex; uint edges_tex; From d38b51f64c6cdcfd2183b2c8b9d0723cbb82ef0a Mon Sep 17 00:00:00 2001 From: YXHXianYu <2943003@qq.com> Date: Sun, 29 Mar 2026 14:13:15 +0800 Subject: [PATCH 06/17] =?UTF-8?q?feat(raster):=20=E4=BC=98=E5=8C=96SSAO?= =?UTF-8?q?=E7=9A=84=E8=A1=A8=E7=8E=B0=EF=BC=9B=E4=BF=AE=E5=A4=8Dnearest?= =?UTF-8?q?=5Fdepth=E5=92=8C=E9=94=99=E8=AF=AFnoise=E5=AF=BC=E8=87=B4?= =?UTF-8?q?=E7=9A=84pattern?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../postprocess/lighting_effects/Ao.hlsl | 17 +++++++++++------ source/editor/raster_ui/RasterUI.cpp | 6 +++--- source/runtime/render/renderer/raster/AoPass.h | 2 +- .../render/renderer/raster/RasterConfig.h | 11 ++++++----- 4 files changed, 21 insertions(+), 15 deletions(-) diff --git a/shaders/pipelines/postprocess/lighting_effects/Ao.hlsl b/shaders/pipelines/postprocess/lighting_effects/Ao.hlsl index 64612d336..200edcde7 100644 --- a/shaders/pipelines/postprocess/lighting_effects/Ao.hlsl +++ b/shaders/pipelines/postprocess/lighting_effects/Ao.hlsl @@ -16,9 +16,13 @@ BINDLESS_BINDINGS(3, 2, 4, 5) static const float3 ABNORMAL_COLOR = float3(0.0, 0.0, 1.0); -// uv in [0, 1]; output in [0, 1] -float2 random_2to2(float2 uv) { - return TextureHandle(param.noise_tex).Sample2D(uv).rg; +// Per-pixel hash: 以整数像素坐标为输入,保证相邻像素得到不同的随机值, +// 不依赖 noise texture,彻底避免 UV 采样粒度导致的 screen-fixed pattern。 +float2 hash22(float2 pixel, uint sample_idx) { + float2 p = pixel + float2(float(sample_idx) * 17.0, float(sample_idx) * 31.0); + p = frac(p * float2(443.8975, 397.2973)); + p += dot(p.xy, p.yx + 19.19); + return frac(float2(p.x * p.y, p.x + p.y)); } // reference: games202 & https://www.shadertoy.com/view/Ms33WB @@ -30,11 +34,12 @@ float ssao_games202(float2 uv) { // printf("uv: %f, %f; pos: %f, %f, %f\n", uv.x, uv.y, position.x, position.y, position.z); // } - float ao = 0.0; - float2 tmp1 = param.ssao_radius * param.inv_resolution; + float ao = 0.0; + float2 tmp1 = param.ssao_radius * param.inv_resolution; + float2 pixel = floor(uv / param.inv_resolution); // 整数像素坐标 for (uint i = 0; i < param.ssao_sample_count; i++) { - float2 offset = random_2to2(uv + 0.093 * float2(i, i)) * 2.0 - 1.0; + float2 offset = hash22(pixel, i) * 2.0 - 1.0; float3 sample_position = WorldPosFromDepthTexture(param.depth_tex, uv + offset * tmp1, param.clip2world); float3 vec = sample_position - position; diff --git a/source/editor/raster_ui/RasterUI.cpp b/source/editor/raster_ui/RasterUI.cpp index a85ed81af..e5a0d8c0d 100644 --- a/source/editor/raster_ui/RasterUI.cpp +++ b/source/editor/raster_ui/RasterUI.cpp @@ -287,9 +287,9 @@ void RasterUI::ShowConfig() { if (m_config.ao_mode == EAoMode::SSAO || m_config.ao_mode == EAoMode::SSAO_AO_ONLY) { ImGui::SliderFloat("Intensity", &m_config.ssao_intensity, 0.0f, 2.0f); - ImGui::SliderFloat("Ray Trace Radius", &m_config.ssao_max_distance, 0.0f, 2.0f); - ImGui::SliderInt("Samples Per Pixel", &m_config.ssao_spp, 1, 16); - ImGui::SliderInt("Sample Radius", &m_config.ssao_sample_radius, 1, 8); + ImGui::SliderFloat("Ray Trace Radius", &m_config.ssao_max_distance, 0.0f, 5.0f); + ImGui::SliderInt("Samples Per Pixel", &m_config.ssao_spp, 1, 32); + ImGui::SliderInt("Sample Radius", &m_config.ssao_sample_radius, 1, 32); } else if (m_config.ao_mode == EAoMode::RTAO || m_config.ao_mode == EAoMode::RTAO_AO_ONLY) { ImGui::SliderFloat("Intensity", &m_config.rtao_intensity, 0.0f, 1.0f); diff --git a/source/runtime/render/renderer/raster/AoPass.h b/source/runtime/render/renderer/raster/AoPass.h index eb24abfa4..1565a9941 100644 --- a/source/runtime/render/renderer/raster/AoPass.h +++ b/source/runtime/render/renderer/raster/AoPass.h @@ -149,7 +149,7 @@ class AoPass { param.ao_mode = static_cast(ui_config.ao_mode); param.input_image = context.textures.lighting_output.hdl; param.normal_tex = context.textures.normal.hdl; - param.depth_tex = context.textures.depth_nearest_sampler.hdl; + param.depth_tex = context.textures.depth_linear_sampler.hdl; param.noise_tex = context.textures.noise_tex.hdl; UpdateMotionVectorData(context, camera); diff --git a/source/runtime/render/renderer/raster/RasterConfig.h b/source/runtime/render/renderer/raster/RasterConfig.h index 95c999201..26dc2f7bd 100644 --- a/source/runtime/render/renderer/raster/RasterConfig.h +++ b/source/runtime/render/renderer/raster/RasterConfig.h @@ -114,16 +114,17 @@ struct RasterConfig { bool geometry_enable_alpha_test = true; float geometry_alpha_test_blend_pixel_cutoff = 0.5f; // 当AlphaMode为BLEND时,低于该值的像素会被丢弃 bool enable_frustum_culling = true; // GPU视锥剔除 - + // MARK: Culling Statistics (只读,由GPU更新) struct CullingStats { uint total_instances_before = 0; uint total_instances_after = 0; uint visible_draws = 0; uint total_draws = 0; - + float GetCullingRatio() const { - if (total_instances_before == 0) return 0.0f; + if (total_instances_before == 0) + return 0.0f; return 1.0f - (float)total_instances_after / (float)total_instances_before; } } culling_stats; @@ -172,8 +173,8 @@ struct RasterConfig { EAoMode ao_mode = EAoMode::RTAO; float ssao_intensity = 1.0f; int ssao_spp = 16; - int ssao_sample_radius = 2; - float ssao_max_distance = 0.5f; + int ssao_sample_radius = 16; + float ssao_max_distance = 1.0f; ERtaoSampleMode rtao_sample_mode = ERtaoSampleMode::COSINE_WEIGHTED; float rtao_intensity = 1.0f; From d558aea41072406134898ace38bc5afb01138432 Mon Sep 17 00:00:00 2001 From: YXHXianYu <2943003@qq.com> Date: Sun, 29 Mar 2026 15:09:29 +0800 Subject: [PATCH 07/17] =?UTF-8?q?fix(raster):=20=E5=88=A0=E5=8E=BB?= =?UTF-8?q?=E4=BA=86=E5=A4=9A=E4=BD=99=E7=9A=84position=20gbuffer=EF=BC=9B?= =?UTF-8?q?=E4=BC=98=E5=8C=96=E4=BA=86=E5=88=A4=E6=96=ADStencilAttachment?= =?UTF-8?q?=E7=9A=84=E4=BB=A3=E7=A0=81=EF=BC=9B=E4=BC=98=E5=8C=96=E4=BA=86?= =?UTF-8?q?=E9=98=B4=E5=BD=B1=E9=87=87=E6=A0=B7=E7=9A=84=E4=BB=A3=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../postprocess/lighting_effects/Ssdo.hlsl | 4 +- .../deferred/lighting/shadows/PCSS.hlsli | 4 + .../lighting/shadows/ShadowMask.frag.hlsl | 5 + .../runtime/render/renderer/raster/AoPass.h | 1 - .../render/renderer/raster/GeometryPass.h | 6 +- .../render/renderer/raster/RasterTextures.h | 4 - .../render/rhi/vulkan/VulkanDevice.cpp | 20 +- .../runtime/render/rhi/vulkan/VulkanQueue.cpp | 203 ++++++++++-------- .../raster/post_process/ShaderParameters.h | 1 - 9 files changed, 143 insertions(+), 105 deletions(-) diff --git a/shaders/pipelines/postprocess/lighting_effects/Ssdo.hlsl b/shaders/pipelines/postprocess/lighting_effects/Ssdo.hlsl index 8e12fe93d..76bb8c7f3 100644 --- a/shaders/pipelines/postprocess/lighting_effects/Ssdo.hlsl +++ b/shaders/pipelines/postprocess/lighting_effects/Ssdo.hlsl @@ -66,7 +66,9 @@ float3 GetVplIndirectLight(float3 vpl_pos, float3 vpl_normal, float3 shading_pos float attenuation = 1.0 / (VPL_distance * VPL_distance + 1.0); // 稳定衰减 // 面积�? - float vpl_linear_depth = abs(mul(param.world2view, float4(vpl_pos, 1.0)).z); + // vpl_linear_depth ≈ abs(view_z), 透视投影下 clip.w == view_z + float4 vpl_clip = mul(param.world2clip, float4(vpl_pos, 1.0)); + float vpl_linear_depth = abs(vpl_clip.w); float area_weight = vpl_linear_depth * vpl_linear_depth + 0.0001; area_weight = min(2, area_weight); //防止过大 diff --git a/shaders/pipelines/raster/deferred/lighting/shadows/PCSS.hlsli b/shaders/pipelines/raster/deferred/lighting/shadows/PCSS.hlsli index 6a873cbee..3a7de79fa 100644 --- a/shaders/pipelines/raster/deferred/lighting/shadows/PCSS.hlsli +++ b/shaders/pipelines/raster/deferred/lighting/shadows/PCSS.hlsli @@ -133,6 +133,10 @@ DEFINE_FIND_BLOCKER(Dir) if (stats.numBlockers < 0.1) \ return 1.0; \ \ + /* 全遮挡 early-out:所有样本都是 blocker,PCF 必然返回 0,直接跳过整个 PCF 阶段 */ \ + if (stats.numBlockers > (float(PCSS_SEARCH_SAMPLES) - 0.5)) \ + return 0.0; \ + \ float penumbra_uv = CalculatePenumbra##SUFFIX(ctx, stats.avgDepth); \ \ /* 原 #if PCSS_LIGHT_TYPE == 1 */ \ diff --git a/shaders/pipelines/raster/deferred/lighting/shadows/ShadowMask.frag.hlsl b/shaders/pipelines/raster/deferred/lighting/shadows/ShadowMask.frag.hlsl index c7ce745cb..826d1189f 100644 --- a/shaders/pipelines/raster/deferred/lighting/shadows/ShadowMask.frag.hlsl +++ b/shaders/pipelines/raster/deferred/lighting/shadows/ShadowMask.frag.hlsl @@ -20,6 +20,11 @@ float main(float2 in_uv : TEXCOORD0) : SV_TARGET { // MARK: GBuffer float depth = TextureHandle(param.depth_hdl).Sample2D(in_uv); + + // 天空/背景像素在 Reverse-Z 下 depth == 0,无需 PCSS,直接返回全亮 + if (depth < 1e-6) + return 1.0; + float3 normal = normalize(Raster::UnpackNormal(TextureHandle(param.normal_hdl).Sample2D(in_uv))); float3 position = WorldPosFromDepth(depth, in_uv, lighting_data.clip2world); diff --git a/source/runtime/render/renderer/raster/AoPass.h b/source/runtime/render/renderer/raster/AoPass.h index 1565a9941..506d17d2c 100644 --- a/source/runtime/render/renderer/raster/AoPass.h +++ b/source/runtime/render/renderer/raster/AoPass.h @@ -227,7 +227,6 @@ class AoPass { param.ssdo_depth_bias = ui_config.ssdo_depth_bias; param.input_image = context.textures.lighting_output.hdl; param.world2clip = Transpose(camera.GetViewProjectionMatrix()); - param.world2view = Transpose(camera.GetViewMatrix()); param.camera_position = camera.GetPosition(); UpdateMotionVectorData(context, camera); diff --git a/source/runtime/render/renderer/raster/GeometryPass.h b/source/runtime/render/renderer/raster/GeometryPass.h index 5d24ac595..95424d239 100644 --- a/source/runtime/render/renderer/raster/GeometryPass.h +++ b/source/runtime/render/renderer/raster/GeometryPass.h @@ -113,10 +113,10 @@ class GeometryPass { param.alpha_test_blend_pixel_cutoff = ui_config.geometry_alpha_test_blend_pixel_cutoff; // 3. Draw - auto rect2d = context.textures.position.GetRect2D(); + auto rect2d = context.textures.vbuffer.GetRect2D(); assert( - rect2d == context.textures.vbuffer.GetRect2D() && rect2d == context.textures.normal.GetRect2D() && - rect2d == context.textures.tangent.GetRect2D() && rect2d == context.textures.uv.GetRect2D() + rect2d == context.textures.normal.GetRect2D() && rect2d == context.textures.tangent.GetRect2D() && + rect2d == context.textures.uv.GetRect2D() ); context.cmd_list.Gfx(m_pso, context.bdls, param) diff --git a/source/runtime/render/renderer/raster/RasterTextures.h b/source/runtime/render/renderer/raster/RasterTextures.h index 40d63801b..3eec2fd9e 100644 --- a/source/runtime/render/renderer/raster/RasterTextures.h +++ b/source/runtime/render/renderer/raster/RasterTextures.h @@ -39,10 +39,6 @@ namespace Moer::Render::Raster { Tex2DTag, \ TexConfig::Default(PF_A2R10G10B10_UNORM_PACK32).Usage(E_SAMPLED_COLOR).SR(SR_TAG_true)) \ X(TexHandle, uv, Tex2DTag, TexConfig::Default(PF_R32G32_SFLOAT).Usage(E_SAMPLED_COLOR).SR(SR_TAG_true)) \ - X(TexHandle, \ - position, \ - Tex2DTag, \ - TexConfig::Default(PF_R32G32B32A32_SFLOAT).Usage(E_SAMPLED_COLOR).SR(SR_TAG_true)) \ X(TexHandle, \ shadow_mask, \ Tex2DTag, \ diff --git a/source/runtime/render/rhi/vulkan/VulkanDevice.cpp b/source/runtime/render/rhi/vulkan/VulkanDevice.cpp index d97d5bae0..af354635a 100644 --- a/source/runtime/render/rhi/vulkan/VulkanDevice.cpp +++ b/source/runtime/render/rhi/vulkan/VulkanDevice.cpp @@ -829,9 +829,9 @@ void VulkanDevice::PopulateDebugMessengerCreateInfo(VkDebugUtilsMessengerCreateI _create_info.messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT | VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT; - _create_info.messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT | - VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT | - VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT; + _create_info.messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT | + VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT | + VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT; _create_info.pfnUserCallback = DebugCallback; } @@ -1096,12 +1096,16 @@ VulkanDevice::CreatePipeline(GfxPsoCreateInfo&& _create_info, PipelineShaderInfo rendering_create_info.pColorAttachmentFormats = color_attachment_formats.data(); rendering_create_info.depthAttachmentFormat = VulkanEnumTranslator::METoVKFormat(_create_info.depth_stencil_format); -#if WITH_CUDA - rendering_create_info.stencilAttachmentFormat = VK_FORMAT_UNDEFINED; -#else + + // Only set stencil format if the depth format actually has a stencil aspect + bool depth_has_stencil = + (_create_info.depth_stencil_format == PF_D32_SFLOAT_S8_UINT || + _create_info.depth_stencil_format == PF_D24_UNORM_S8_UINT || + _create_info.depth_stencil_format == PF_D16_UNORM_S8_UINT || + _create_info.depth_stencil_format == PF_S8_UINT); rendering_create_info.stencilAttachmentFormat = - VulkanEnumTranslator::METoVKFormat(_create_info.depth_stencil_format); -#endif + depth_has_stencil ? VulkanEnumTranslator::METoVKFormat(_create_info.depth_stencil_format) : + VK_FORMAT_UNDEFINED; auto to_vk_blend_attachment = [](const RHIBlendAttachmentInfo& _info) { VkPipelineColorBlendAttachmentState state{}; diff --git a/source/runtime/render/rhi/vulkan/VulkanQueue.cpp b/source/runtime/render/rhi/vulkan/VulkanQueue.cpp index 01b4e26fb..ca922c125 100644 --- a/source/runtime/render/rhi/vulkan/VulkanQueue.cpp +++ b/source/runtime/render/rhi/vulkan/VulkanQueue.cpp @@ -1,4 +1,5 @@ #include "VulkanQueue.h" +#include "PixelFormat.h" #include "RHICmdReorderer.h" #include "VulkanAllocator.h" #include "VulkanCommand.h" @@ -13,7 +14,6 @@ #include "rhi/RHICommon.h" #include "rhi/RHIIO.h" #include "rhi/RHIResource.h" -#include "PixelFormat.h" #include "VulkanCustomCommand.h" #include "shader/ShaderPipeline.h" @@ -52,6 +52,11 @@ VkRenderingAttachmentInfo FromColorAttachmentInfo(const ColorAttachment& _attach return attachment_info; } +static bool FormatHasStencil(EPixelFormat format) { + return format == PF_D32_SFLOAT_S8_UINT || format == PF_D24_UNORM_S8_UINT || + format == PF_D16_UNORM_S8_UINT || format == PF_S8_UINT; +} + VkRenderingAttachmentInfo FromDepthAttachmentInfo(const DepthAttachment& _attachment) { VkRenderingAttachmentInfo attachment_info{}; attachment_info.sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO; @@ -62,7 +67,9 @@ VkRenderingAttachmentInfo FromDepthAttachmentInfo(const DepthAttachment& _attach static_cast(_attachment.mip_level), 1, static_cast(_attachment.array_layer), 1 ); - attachment_info.imageLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; + bool has_stencil = FormatHasStencil(_attachment.target->GetFormat()); + attachment_info.imageLayout = has_stencil ? VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL : + VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL; attachment_info.loadOp = VulkanEnumTranslator::METoVKAttachmentLoadOp(GetLoadOp(GetDepthAction(_attachment.action))); attachment_info.storeOp = @@ -1487,8 +1494,10 @@ class VkCmdVisitor : VulkanDeviceObject { } } std::optional depth_stencil_attachment; + bool has_stencil = false; if (pass_info.depth_attachment.Valid()) { depth_stencil_attachment = FromDepthAttachmentInfo(pass_info.depth_attachment); + has_stencil = FormatHasStencil(pass_info.depth_attachment.target->GetFormat()); } VkRenderingInfo dynamic_rendering_info{ @@ -1503,12 +1512,9 @@ class VkCmdVisitor : VulkanDeviceObject { .pColorAttachments = color_attachments.data(), .pDepthAttachment = depth_stencil_attachment.has_value() ? &depth_stencil_attachment.value() : nullptr, -#if WITH_CUDA - .pStencilAttachment = nullptr -#else - .pStencilAttachment = - depth_stencil_attachment.has_value() ? &depth_stencil_attachment.value() : nullptr -#endif + .pStencilAttachment = (depth_stencil_attachment.has_value() && has_stencil) ? + &depth_stencil_attachment.value() : + nullptr }; cmd_list.BeginRendering(std::move(dynamic_rendering_info)); @@ -1526,12 +1532,12 @@ class VkCmdVisitor : VulkanDeviceObject { const auto& draw_datas = _cmd.DrawData(); const auto& rect = pass_info.render_area; VkViewport viewport{ - .x = float(rect.offset.x), - .y = float(rect.offset.y), - .width = float(rect.extent.width), - .height = float(rect.extent.height), - .minDepth = 0.0f, - .maxDepth = 1.0f + .x = float(rect.offset.x), + .y = float(rect.offset.y), + .width = float(rect.extent.width), + .height = float(rect.extent.height), + .minDepth = 0.0f, + .maxDepth = 1.0f }; viewport.y += viewport.height; @@ -1668,8 +1674,10 @@ class VkCmdVisitor : VulkanDeviceObject { color_attachments[i] = FromColorAttachmentInfo(pass_info.color_attachments[i]); } std::optional depth_stencil_attachment; + bool has_stencil = false; if (pass_info.depth_attachment.Valid()) { depth_stencil_attachment = FromDepthAttachmentInfo(pass_info.depth_attachment); + has_stencil = FormatHasStencil(pass_info.depth_attachment.target->GetFormat()); } VkRenderingInfo dynamic_rendering_info{ @@ -1684,23 +1692,20 @@ class VkCmdVisitor : VulkanDeviceObject { .pColorAttachments = color_attachments.data(), .pDepthAttachment = depth_stencil_attachment.has_value() ? &depth_stencil_attachment.value() : nullptr, -#if WITH_CUDA - .pStencilAttachment = nullptr -#else - .pStencilAttachment = - depth_stencil_attachment.has_value() ? &depth_stencil_attachment.value() : nullptr -#endif + .pStencilAttachment = (depth_stencil_attachment.has_value() && has_stencil) ? + &depth_stencil_attachment.value() : + nullptr }; cmd_list.BeginRendering(std::move(dynamic_rendering_info)); const auto& rect = pass_info.render_area; VkViewport viewport{ - .x = float(rect.offset.x), - .y = float(rect.offset.y), - .width = float(rect.extent.width), - .height = float(rect.extent.height), - .minDepth = 0.0f, - .maxDepth = 1.0f + .x = float(rect.offset.x), + .y = float(rect.offset.y), + .width = float(rect.extent.width), + .height = float(rect.extent.height), + .minDepth = 0.0f, + .maxDepth = 1.0f }; viewport.y += viewport.height; viewport.height = -viewport.height; @@ -1773,9 +1778,9 @@ class VkCmdVisitor : VulkanDeviceObject { } if (draw_data.indirect_draw_param.has_value()) { - VulkanBuffer* indirect_buffer = - ResourceCast(draw_data.indirect_draw_param->buffer.GetBuffer() - ); + VulkanBuffer* indirect_buffer = ResourceCast( + draw_data.indirect_draw_param->buffer.GetBuffer() + ); if (draw_data.indirect_draw_param->count_buffer.has_value()) { //draw indirect with count buffer auto* count_buffer = ResourceCast( @@ -1814,9 +1819,9 @@ class VkCmdVisitor : VulkanDeviceObject { //draw indirect if (draw_data.indirect_draw_param.has_value()) { - VulkanBuffer* indirect_buffer = - ResourceCast(draw_data.indirect_draw_param->buffer.GetBuffer() - ); + VulkanBuffer* indirect_buffer = ResourceCast( + draw_data.indirect_draw_param->buffer.GetBuffer() + ); if (draw_data.indirect_draw_param->count_buffer.has_value()) { //draw indirect with count buffer auto* count_buffer = ResourceCast( @@ -2264,8 +2269,8 @@ class VkCmdVisitor : VulkanDeviceObject { if (b_texture) { arg.component_cnt = texture_indices_dat.size(); arg.stride = m_device->GetOptionalProperties() - .descriptor_buffer_properties.sampledImageDescriptorSize >> - 2; + .descriptor_buffer_properties.sampledImageDescriptorSize >> + 2; cmd_list.BindDescriptors( shuffle_sd.handle, @@ -2284,8 +2289,8 @@ class VkCmdVisitor : VulkanDeviceObject { if (b_buffer) { arg.component_cnt = buffer_indices_dat.size(); arg.stride = m_device->GetOptionalProperties() - .descriptor_buffer_properties.storageBufferDescriptorSize >> - 2; + .descriptor_buffer_properties.storageBufferDescriptorSize >> + 2; cmd_list.BindDescriptors( shuffle_sd.handle, @@ -2389,13 +2394,13 @@ class VkCmdVisitor : VulkanDeviceObject { return; } // VulkanRaytracingScene* scene = reinterpret_cast(_cmd.Handle()); - + // Calculate 16-byte aligned offset for TLAS instance data (Vulkan spec requirement) - constexpr uint64 kInstanceDataAlignment = 256; // 256-byte for AMD GPU compatibility - uint64 raw_device_address = instance_buffer->DeviceAddress(); - uint64 aligned_device_address = Moer::AlignUp(raw_device_address, kInstanceDataAlignment); - uint64 alignment_offset = aligned_device_address - raw_device_address; - + constexpr uint64 kInstanceDataAlignment = 256; // 256-byte for AMD GPU compatibility + uint64 raw_device_address = instance_buffer->DeviceAddress(); + uint64 aligned_device_address = Moer::AlignUp(raw_device_address, kInstanceDataAlignment); + uint64 alignment_offset = aligned_device_address - raw_device_address; + if (to_update.size() != 0) { BufferView staging = allocator.AllocateShaderBuffer(to_update.size() * sizeof(VkAccelerationStructureInstanceKHR)); @@ -2426,7 +2431,8 @@ class VkCmdVisitor : VulkanDeviceObject { BufferView aligned_instance_buffer_view( instance_buffer, alignment_offset, - (instance_buffer->GetByteSize() - alignment_offset) / sizeof(VkAccelerationStructureInstanceKHR), + (instance_buffer->GetByteSize() - alignment_offset) / + sizeof(VkAccelerationStructureInstanceKHR), sizeof(VkAccelerationStructureInstanceKHR), EPixelFormat::PF_UNDEFINED ); @@ -2473,7 +2479,7 @@ class VkCmdVisitor : VulkanDeviceObject { geometry.flags = 0; geometry.geometry.instances.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_INSTANCES_DATA_KHR; - geometry.geometry.instances.arrayOfPointers = VK_FALSE; + geometry.geometry.instances.arrayOfPointers = VK_FALSE; // Use 16-byte aligned device address for TLAS instance data (Vulkan spec requirement) // Reuse the aligned address calculated earlier in this function geometry.geometry.instances.data.deviceAddress = aligned_device_address; @@ -2610,7 +2616,8 @@ void VkNativeQueue::SubmitEmpty(VkFence _fence) { // 这个锁只在AMD GPU上使用,因为AMD GPU没有TransferQueue // 在现代NVIDIA GPU上,这个锁不会被触发,接近0开销,不用在意性能 std::unique_lock guard; - if (submit_mutex) guard = std::unique_lock(*submit_mutex); + if (submit_mutex) + guard = std::unique_lock(*submit_mutex); VkSubmitInfo2 submit_info{}; submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO_2; @@ -2629,7 +2636,8 @@ void VkNativeQueue::SubmitEmpty(VkFence _fence) { void VkNativeQueue::Submit(VulkanCmdList& _cmdlist, VkFence _fence) { std::unique_lock guard; - if (submit_mutex) guard = std::unique_lock(*submit_mutex); + if (submit_mutex) + guard = std::unique_lock(*submit_mutex); VkSubmitInfo2 submit_info{}; VkCommandBuffer cmd = _cmdlist.GetHandle(); @@ -2649,10 +2657,15 @@ void VkNativeQueue::Submit(VulkanCmdList& _cmdlist, VkFence _fence) { VkResult submit_result = vkQueueSubmit2(queue, 1, &submit_info, _fence); if (submit_result != VK_SUCCESS) { - LOG_ERROR("[VkNativeQueue] vkQueueSubmit2 FAILED! result={}, queue={:#x}, type={}, " - "wait_count={}, signal_count={}", - (int)submit_result, (uint64)queue, (int)type, - wait_infos.size(), signal_infos.size()); + LOG_ERROR( + "[VkNativeQueue] vkQueueSubmit2 FAILED! result={}, queue={:#x}, type={}, " + "wait_count={}, signal_count={}", + (int)submit_result, + (uint64)queue, + (int)type, + wait_infos.size(), + signal_infos.size() + ); } wait_infos.clear(); signal_infos.clear(); @@ -2660,42 +2673,50 @@ void VkNativeQueue::Submit(VulkanCmdList& _cmdlist, VkFence _fence) { void VkNativeQueue::Wait(VulkanFence* _fence, uint64 _fence_val, VkPipelineStageFlags2 _stage) { VkSemaphore sem = _fence->GetUnderlyingHandle(); - wait_infos.push_back(VkSemaphoreSubmitInfo{ - .sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO, - .pNext = nullptr, - .semaphore = sem, - .value = _fence_val, - .stageMask = _stage - }); + wait_infos.push_back( + VkSemaphoreSubmitInfo{ + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO, + .pNext = nullptr, + .semaphore = sem, + .value = _fence_val, + .stageMask = _stage + } + ); } void VkNativeQueue::Wait(VkSemaphore _sem, VkPipelineStageFlags2 _stage) { - wait_infos.push_back(VkSemaphoreSubmitInfo{ - .sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO, - .pNext = nullptr, - .semaphore = _sem, - .value = 0, - .stageMask = _stage - }); + wait_infos.push_back( + VkSemaphoreSubmitInfo{ + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO, + .pNext = nullptr, + .semaphore = _sem, + .value = 0, + .stageMask = _stage + } + ); } void VkNativeQueue::Signal(VulkanFence* _fence, uint64 _fence_val, VkPipelineStageFlags2 _stage) { VkSemaphore sem = _fence->GetUnderlyingHandle(); - signal_infos.push_back(VkSemaphoreSubmitInfo{ - .sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO, - .pNext = nullptr, - .semaphore = sem, - .value = _fence_val, - .stageMask = _stage - }); + signal_infos.push_back( + VkSemaphoreSubmitInfo{ + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO, + .pNext = nullptr, + .semaphore = sem, + .value = _fence_val, + .stageMask = _stage + } + ); } void VkNativeQueue::Signal(VkSemaphore _sem, VkPipelineStageFlags2 _stage) { - signal_infos.push_back(VkSemaphoreSubmitInfo{ - .sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO, - .pNext = nullptr, - .semaphore = _sem, - .value = 0, - .stageMask = _stage - }); + signal_infos.push_back( + VkSemaphoreSubmitInfo{ + .sType = VK_STRUCTURE_TYPE_SEMAPHORE_SUBMIT_INFO, + .pNext = nullptr, + .semaphore = _sem, + .value = 0, + .stageMask = _stage + } + ); } void VkCommandQueue::Wait(WaitEvent _evt) { auto* fence = reinterpret_cast(_evt.timeline_handle); @@ -3129,8 +3150,8 @@ void VkCommandQueue::ExecuteThread() { return; } uint64 prev_timeline = executed_frame; - while (prev_timeline < timeline && !executed_frame.compare_exchange_weak(prev_timeline, timeline) - ) { + while (prev_timeline < timeline && + !executed_frame.compare_exchange_weak(prev_timeline, timeline)) { std::this_thread::yield(); } }; @@ -3441,8 +3462,8 @@ void VkCopyQueue::ExecuteThread() { return; } uint64 prev_timeline = executed_frame; - while (prev_timeline < timeline && !executed_frame.compare_exchange_weak(prev_timeline, timeline) - ) { + while (prev_timeline < timeline && + !executed_frame.compare_exchange_weak(prev_timeline, timeline)) { std::this_thread::yield(); } }; @@ -3695,13 +3716,21 @@ void VkCopyQueue::Complete(uint64 _timeline) { std::this_thread::yield(); ++spin_count; if (spin_count == 10'000'000) { - LOG_ERROR("[CopyQueue] Complete: STILL WAITING after 10M spins! " - "_timeline={}, executed_frame={}, enabled={}", - _timeline, executed_frame.load(), (bool)enabled); + LOG_ERROR( + "[CopyQueue] Complete: STILL WAITING after 10M spins! " + "_timeline={}, executed_frame={}, enabled={}", + _timeline, + executed_frame.load(), + (bool)enabled + ); } if (spin_count % 50'000'000 == 0) { - LOG_ERROR("[CopyQueue] Complete: STUCK! spins={}, _timeline={}, executed_frame={}", - spin_count, _timeline, executed_frame.load()); + LOG_ERROR( + "[CopyQueue] Complete: STUCK! spins={}, _timeline={}, executed_frame={}", + spin_count, + _timeline, + executed_frame.load() + ); } } } diff --git a/source/runtime/render/shaderheaders/shared/raster/post_process/ShaderParameters.h b/source/runtime/render/shaderheaders/shared/raster/post_process/ShaderParameters.h index 024669fd2..efd2cb884 100644 --- a/source/runtime/render/shaderheaders/shared/raster/post_process/ShaderParameters.h +++ b/source/runtime/render/shaderheaders/shared/raster/post_process/ShaderParameters.h @@ -48,7 +48,6 @@ struct AoPipelineBindlessParam { struct SsdoPipelineBindlessParam { float4x4 clip2world; float4x4 world2clip; - float4x4 world2view; float3 camera_position; float ssdo_depth_bias; From 509b3369491add6b7144255c1fdc45459949769f Mon Sep 17 00:00:00 2001 From: YXHXianYu <2943003@qq.com> Date: Sun, 29 Mar 2026 15:22:15 +0800 Subject: [PATCH 08/17] =?UTF-8?q?chore:=20=E5=9C=A8justfile=E4=B8=AD?= =?UTF-8?q?=E6=B7=BB=E5=8A=A0=E4=BA=86just=20b=E5=92=8Cjust=20g?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- template.justfile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/template.justfile b/template.justfile index 4c857c02f..52c90b782 100644 --- a/template.justfile +++ b/template.justfile @@ -79,4 +79,6 @@ generate-build-run config="Debug" exe="MoerEditor" threads=default_threads: (gen alias gbr := generate-build-run alias br := build-run +alias g := generate +alias b := build alias r := run \ No newline at end of file From 94cabedbc62302c5010ddd7000f32d158f69ad55 Mon Sep 17 00:00:00 2001 From: YXHXianYu <2943003@qq.com> Date: Sun, 29 Mar 2026 17:14:52 +0800 Subject: [PATCH 09/17] =?UTF-8?q?fix(raster):=20=E6=95=B4=E7=90=86raster?= =?UTF-8?q?=E7=9A=84=E9=98=B4=E5=BD=B1=E4=BB=A3=E7=A0=81=EF=BC=8C=E5=B0=86?= =?UTF-8?q?CASCADE=E6=95=B0=E4=BB=8E8=E7=AE=80=E5=8C=96=E8=87=B34?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../raster/deferred/lighting/shadows/CSM.hlsli | 12 +++++------- .../deferred/lighting/shadows/ShadowCore.hlsli | 4 +++- .../deferred/lighting/shadows/ShadowMask.frag.hlsl | 10 ++-------- .../raster/deferred/lighting/shadows/Shadows.hlsli | 12 +++++------- .../renderer/raster/RasterCompileTimeConstants.h | 2 +- source/runtime/render/renderer/raster/RasterConfig.h | 3 +-- .../shared/raster/lighting_pass/ShaderParameters.h | 2 +- 7 files changed, 18 insertions(+), 27 deletions(-) diff --git a/shaders/pipelines/raster/deferred/lighting/shadows/CSM.hlsli b/shaders/pipelines/raster/deferred/lighting/shadows/CSM.hlsli index 44b4a408a..e58a82dd0 100644 --- a/shaders/pipelines/raster/deferred/lighting/shadows/CSM.hlsli +++ b/shaders/pipelines/raster/deferred/lighting/shadows/CSM.hlsli @@ -4,11 +4,11 @@ #include "core/common/Common.hlsl" #include "shared/raster/ShaderParameters.h" -// 获取Cascade Index -int get_cascade_index(Moer::LightingData lighting_data, float3 world_pos) { - float pixel_view_pos_z = abs(mul(lighting_data.world2view, float4(world_pos, 1.0)).z); +// 获取Cascade Index(同时输出 view-space Z,避免重复矩阵乘法) +int get_cascade_index(Moer::LightingData lighting_data, float3 world_pos, out float out_view_z) { + out_view_z = abs(mul(lighting_data.world2view, float4(world_pos, 1.0)).z); float pixel_depth_ratio = - (pixel_view_pos_z - lighting_data.near_clip) / (lighting_data.far_clip - lighting_data.near_clip); + (out_view_z - lighting_data.near_clip) / (lighting_data.far_clip - lighting_data.near_clip); for (int i = 0; i < lighting_data.shadow_csm_num_of_cascades; i++) { if (pixel_depth_ratio < lighting_data.cascade_split_ratios[i]) { return i; @@ -17,9 +17,7 @@ int get_cascade_index(Moer::LightingData lighting_data, float3 world_pos) { return -1; } -float get_cascade_blend_ratio(Moer::LightingData lighting_data, float3 world_pos, int cascade_index) { - float pixel_view_pos_z = - abs(mul(lighting_data.world2view, float4(world_pos, 1.0)).z); //FIXME:需要取负吗? +float get_cascade_blend_ratio(Moer::LightingData lighting_data, float pixel_view_pos_z, int cascade_index) { float blend_band_start_z = lighting_data.near_clip + lighting_data.cascade_blend_start_ratios[cascade_index] * (lighting_data.far_clip - lighting_data.near_clip); diff --git a/shaders/pipelines/raster/deferred/lighting/shadows/ShadowCore.hlsli b/shaders/pipelines/raster/deferred/lighting/shadows/ShadowCore.hlsli index 16821faeb..2c2c6e9c8 100644 --- a/shaders/pipelines/raster/deferred/lighting/shadows/ShadowCore.hlsli +++ b/shaders/pipelines/raster/deferred/lighting/shadows/ShadowCore.hlsli @@ -99,7 +99,9 @@ float2 QuadAverage(float2 val) { // 参考 UE: 基于斜率的 Bias 计算 float GetSlopeScaledBias(float3 normal, float3 lightDir) { float cosTheta = saturate(dot(normal, -lightDir)); - float bias = 0.005 * tan(acos(cosTheta)); // 简单近似 + // tan(acos(x)) == sqrt(1 - x*x) / x,避免两个超越函数 + float sinTheta = sqrt(1.0 - cosTheta * cosTheta); + float bias = 0.005 * sinTheta / max(cosTheta, 1e-4); return clamp(bias, 0.0001, 0.01); } diff --git a/shaders/pipelines/raster/deferred/lighting/shadows/ShadowMask.frag.hlsl b/shaders/pipelines/raster/deferred/lighting/shadows/ShadowMask.frag.hlsl index 826d1189f..77f94b7d3 100644 --- a/shaders/pipelines/raster/deferred/lighting/shadows/ShadowMask.frag.hlsl +++ b/shaders/pipelines/raster/deferred/lighting/shadows/ShadowMask.frag.hlsl @@ -1,18 +1,12 @@ #include "core/common/Bindless.hlsl" -BINDLESS_BINDINGS(3, 2, 4, 5) #include "core/common/Common.hlsl" +BINDLESS_BINDINGS(3, 2, 4, 5) +#include "pipelines/RasterCommon.hlsli" #include "pipelines/raster/deferred/lighting/shadows/Shadows.hlsli" #include "shared/raster/ShaderParameters.h" [[vk::push_constant]] ConstantBuffer param; -float3 WorldPosFromDepth(float depth, float2 screen_uv, float4x4 clip2world) { - float4 clip = float4(screen_uv.x * 2.f - 1.f, 1.f - screen_uv.y * 2.f, depth, 1.0); - float4 world_w = mul(clip2world, clip); - float3 pos = world_w.xyz / world_w.w; - return pos; -} - float main(float2 in_uv : TEXCOORD0) : SV_TARGET { // MARK: Lighting Data ArrayBuffer global_params = ArrayBuffer(param.global_param_hdl); diff --git a/shaders/pipelines/raster/deferred/lighting/shadows/Shadows.hlsli b/shaders/pipelines/raster/deferred/lighting/shadows/Shadows.hlsli index 8f3470d7d..6dc388185 100644 --- a/shaders/pipelines/raster/deferred/lighting/shadows/Shadows.hlsli +++ b/shaders/pipelines/raster/deferred/lighting/shadows/Shadows.hlsli @@ -37,14 +37,11 @@ float get_single_shadow( ctx.normal = normal; ctx.lightDir = lightDir; - float occluder_depth = TextureHandle(ctx.shadowMapHandle).Sample2D(shadow_uv).x; - float fragment_depth = shadow_ndc_pos.z; - if (lighting_data.pcss_enabled == 1) { return CalculatePcssDir(ctx); } else { - occluder_depth = TextureHandle(ctx.shadowMapHandle).Sample2D(ctx.shadowUV).x; - return IsShadowedDir(occluder_depth, fragment_depth, SHADOW_BIAS) ? 0.0 : 1.0; + float occluder_depth = TextureHandle(ctx.shadowMapHandle).Sample2D(shadow_uv).x; + return IsShadowedDir(occluder_depth, shadow_ndc_pos.z, SHADOW_BIAS) ? 0.0 : 1.0; } } @@ -110,13 +107,14 @@ float calculate_csm_shadow( float2 screen_uv, float3 normal ) { - int cascade_index = get_cascade_index(lighting_data, world_pos); + float pixel_view_z; + int cascade_index = get_cascade_index(lighting_data, world_pos, pixel_view_z); if (cascade_index == -1) return 1.0; float3 main_light_dir = lighting_data.main_light_direction; if (lighting_data.is_csm_blend_enabled == 1) { - float cascade_blend_ratio = get_cascade_blend_ratio(lighting_data, world_pos, cascade_index); + float cascade_blend_ratio = get_cascade_blend_ratio(lighting_data, pixel_view_z, cascade_index); float shadow_current = get_single_shadow(lighting_data, world_pos, cascade_index, screen_uv, normal, main_light_dir); diff --git a/source/runtime/render/renderer/raster/RasterCompileTimeConstants.h b/source/runtime/render/renderer/raster/RasterCompileTimeConstants.h index ac1ff173f..53a198b90 100644 --- a/source/runtime/render/renderer/raster/RasterCompileTimeConstants.h +++ b/source/runtime/render/renderer/raster/RasterCompileTimeConstants.h @@ -14,6 +14,6 @@ namespace Moer { // 如果要修改此参数,请同步修改 shaderheaders\shared\raster\lighting_pass\ShaderParameters.h 中的 LightingData 结构体 -static constexpr uint CSM_MAX_CASCADES = 8; +static constexpr uint CSM_MAX_CASCADES = 4; } // namespace Moer \ No newline at end of file diff --git a/source/runtime/render/renderer/raster/RasterConfig.h b/source/runtime/render/renderer/raster/RasterConfig.h index 26dc2f7bd..7e13a5579 100644 --- a/source/runtime/render/renderer/raster/RasterConfig.h +++ b/source/runtime/render/renderer/raster/RasterConfig.h @@ -229,8 +229,7 @@ struct RasterConfig { bool shadow_pcss_enabled = true; float shadow_pcss_light_size_world = 0.01f; - StaticArray shadow_csm_cover_ratio_of_camera = - {0.005, 0.02, 0.1, 0.25, 0.32, 1.0}; + StaticArray shadow_csm_cover_ratio_of_camera = {0.005, 0.02, 0.1, 0.25}; // MARK: Skybox bool skybox_exposure_correct_enabled = true; // 启用的话,就会找到第一个平行光,乘上它的颜色 diff --git a/source/runtime/render/shaderheaders/shared/raster/lighting_pass/ShaderParameters.h b/source/runtime/render/shaderheaders/shared/raster/lighting_pass/ShaderParameters.h index 349f16404..35c241036 100644 --- a/source/runtime/render/shaderheaders/shared/raster/lighting_pass/ShaderParameters.h +++ b/source/runtime/render/shaderheaders/shared/raster/lighting_pass/ShaderParameters.h @@ -11,7 +11,7 @@ #undef CONST #endif -#define MAX_CSM_CASCADES 8 +#define MAX_CSM_CASCADES 4 #ifdef __cplusplus //#define CONST constexpr From 960bf8ea6d5d93d0ccb3297b7fa6f3ceb39bbade Mon Sep 17 00:00:00 2001 From: YXHXianYu <2943003@qq.com> Date: Sun, 29 Mar 2026 18:18:11 +0800 Subject: [PATCH 10/17] =?UTF-8?q?feat(raster):=20=E5=B0=86LightingData?= =?UTF-8?q?=E9=80=9A=E8=BF=87UBO=E4=BC=A0=E5=85=A5Shader=EF=BC=8CRegisters?= =?UTF-8?q?=E5=BC=80=E9=94=80=E4=BB=8E200+=E4=BC=98=E5=8C=96=E4=B8=BA70?= =?UTF-8?q?=EF=BC=8C=E4=BB=8E=E8=80=8C=E5=AE=9E=E7=8E=B0ShadowMaskPass=209?= =?UTF-8?q?0%=E7=9A=84=E6=80=A7=E8=83=BD=E6=8F=90=E5=8D=87?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../lighting/shadows/ShadowMask.frag.hlsl | 5 +- .../raster/DirectionalShadowMaskPass.cpp | 8 +-- .../raster/DirectionalShadowMaskPass.h | 5 +- .../render/renderer/raster/RasterResource.h | 4 +- .../env_and_atmo_pass/ShaderParameters.h | 2 - .../raster/lighting_pass/ShaderParameters.h | 55 +++++++++---------- 6 files changed, 37 insertions(+), 42 deletions(-) diff --git a/shaders/pipelines/raster/deferred/lighting/shadows/ShadowMask.frag.hlsl b/shaders/pipelines/raster/deferred/lighting/shadows/ShadowMask.frag.hlsl index 77f94b7d3..45dbfb0c7 100644 --- a/shaders/pipelines/raster/deferred/lighting/shadows/ShadowMask.frag.hlsl +++ b/shaders/pipelines/raster/deferred/lighting/shadows/ShadowMask.frag.hlsl @@ -5,13 +5,10 @@ BINDLESS_BINDINGS(3, 2, 4, 5) #include "pipelines/raster/deferred/lighting/shadows/Shadows.hlsli" #include "shared/raster/ShaderParameters.h" +[[vk::binding(0, 0)]] ConstantBuffer lighting_data; [[vk::push_constant]] ConstantBuffer param; float main(float2 in_uv : TEXCOORD0) : SV_TARGET { - // MARK: Lighting Data - ArrayBuffer global_params = ArrayBuffer(param.global_param_hdl); - Moer::LightingData lighting_data = global_params.Load(0); - // MARK: GBuffer float depth = TextureHandle(param.depth_hdl).Sample2D(in_uv); diff --git a/source/runtime/render/renderer/raster/DirectionalShadowMaskPass.cpp b/source/runtime/render/renderer/raster/DirectionalShadowMaskPass.cpp index 8a3a44011..344d2b1a5 100644 --- a/source/runtime/render/renderer/raster/DirectionalShadowMaskPass.cpp +++ b/source/runtime/render/renderer/raster/DirectionalShadowMaskPass.cpp @@ -23,11 +23,11 @@ void DirectionalShadowMaskPass::Process( const Camera& camera ) { DirectionalShadowMaskPassBindlessParam param; - param.global_param_hdl = context.lighting_data_buffer.hdl; - param.normal_hdl = context.textures.normal.hdl; - param.depth_hdl = context.textures.depth_nearest_sampler.hdl; + param.normal_hdl = context.textures.normal.hdl; + param.depth_hdl = context.textures.depth_nearest_sampler.hdl; - context.cmd_list.Gfx(directional_shadow_mask_pipeline, context.bdls, param) + context.cmd_list + .Gfx(directional_shadow_mask_pipeline, context.lighting_data_buffer.buf, context.bdls, param) .Draw( "Directional Shadow Mask Pass", context.textures.shadow_mask.GetRect2D(), diff --git a/source/runtime/render/renderer/raster/DirectionalShadowMaskPass.h b/source/runtime/render/renderer/raster/DirectionalShadowMaskPass.h index bf52e835e..5c32e89e5 100644 --- a/source/runtime/render/renderer/raster/DirectionalShadowMaskPass.h +++ b/source/runtime/render/renderer/raster/DirectionalShadowMaskPass.h @@ -12,9 +12,10 @@ namespace Moer::Render::Raster { class DirectionalShadowMaskPassPipeline : public RasterPipeline { public: DEFINE_RASTER_PIPELINE_CLASS(DirectionalShadowMaskPassPipeline); - DEFINE_SHADER_CONSTANT_STRUCT(DirectionalShadowMaskPassBindlessParam, param); + DEFINE_SHADER_BUFFER(lighting_data); DEFINE_SHADER_BINDLESS_ARRAY(bdls); - DEFINE_SHADER_ARGS(bdls, param); + DEFINE_SHADER_CONSTANT_STRUCT(DirectionalShadowMaskPassBindlessParam, param); + DEFINE_SHADER_ARGS(lighting_data, bdls, param); }; class DirectionalShadowMaskPass { diff --git a/source/runtime/render/renderer/raster/RasterResource.h b/source/runtime/render/renderer/raster/RasterResource.h index 79e0bc202..78567eedb 100644 --- a/source/runtime/render/renderer/raster/RasterResource.h +++ b/source/runtime/render/renderer/raster/RasterResource.h @@ -165,7 +165,9 @@ struct RasterContext { //GPU Side lighting_data_buffer.buf = device.CreateBuffer( - "Raster::LightData", sizeof(LightingData), EBufferUsageFlags::UNORDERED_ACCESS + "Raster::LightData", + sizeof(LightingData), + EBufferUsageFlags::UNORDERED_ACCESS | EBufferUsageFlags::CONSTANT_BUFFER ); lighting_data_buffer.hdl = bdls->AllocateBuffer(lighting_data_buffer.buf->GetView()); } diff --git a/source/runtime/render/shaderheaders/shared/raster/env_and_atmo_pass/ShaderParameters.h b/source/runtime/render/shaderheaders/shared/raster/env_and_atmo_pass/ShaderParameters.h index 2d2dffc00..967c0c77f 100644 --- a/source/runtime/render/shaderheaders/shared/raster/env_and_atmo_pass/ShaderParameters.h +++ b/source/runtime/render/shaderheaders/shared/raster/env_and_atmo_pass/ShaderParameters.h @@ -11,8 +11,6 @@ #undef CONST #endif -#define MAX_CSM_CASCADES 8 - #ifdef __cplusplus //#define CONST constexpr #include "misc/Traits.h" diff --git a/source/runtime/render/shaderheaders/shared/raster/lighting_pass/ShaderParameters.h b/source/runtime/render/shaderheaders/shared/raster/lighting_pass/ShaderParameters.h index 35c241036..23bd828db 100644 --- a/source/runtime/render/shaderheaders/shared/raster/lighting_pass/ShaderParameters.h +++ b/source/runtime/render/shaderheaders/shared/raster/lighting_pass/ShaderParameters.h @@ -11,8 +11,6 @@ #undef CONST #endif -#define MAX_CSM_CASCADES 4 - #ifdef __cplusplus //#define CONST constexpr #include "misc/Traits.h" @@ -42,43 +40,43 @@ struct MaterialPassBindlessParam { uint cubemap_handle; uint shadow_mask_handle; }; -struct LightingData { - float4x4 world2shadow_clip[MAX_CSM_CASCADES]; +// UBO (ConstantBuffer),需要遵循std140 +struct LightingData { + float4x4 world2shadow_clip[4]; // 4层CSM + float4x4 world2view; float4x4 clip2world; - float3 camera_position; - // uint padding;// FIXME: need or not? + + float4 scale_data[4]; + + float4 cascade_split_ratios; // [0..3],因为std140才写成float4 + float4 cascade_blend_start_ratios; // [0..3],因为std140才写成float4 + uint4 cascade_shadow_map; // [0..3],因为std140才写成uint4,存放CSM的纹理句柄 + // HLSL支持通过[x]访问float4和uint4,所以HLSL不需要再修改 + + float3 camera_position; uint light_count; + float3 main_light_direction; + uint shadow_map_mode; + + float3 light_pos; + float light_radius; - uint shadow_map_mode; uint shadow_sampling_mode; uint shadow_csm_num_of_cascades; uint shadow_csm_sm_size; uint shadow_csm_visualize_cascade; - uint cascade_shadow_map[MAX_CSM_CASCADES]; - - // Point Light Shadow Map - uint point_shadow_map; //handle - float3 light_pos; - float light_radius; - - uint pcss_enabled; - - float light_size_world; //assumed light size for soft shadow calculation - float4 scale_data[MAX_CSM_CASCADES]; - - float4x4 world2view; - float near_clip; - float far_clip; - float cascade_split_ratios[MAX_CSM_CASCADES]; - float cascade_blend_start_ratios[MAX_CSM_CASCADES]; - uint is_csm_blend_enabled; + uint point_shadow_map; //handle + uint pcss_enabled; + float light_size_world; //assumed light size for soft shadow calculation + float near_clip; - // Shading - uint lut_ggx_emu_handle; - uint lut_ggx_eavg_handle; + float far_clip; + uint is_csm_blend_enabled; + uint lut_ggx_emu_handle; + uint lut_ggx_eavg_handle; uint brdf_enable_multi_scatter; // kulla-conty approximation uint brdf_NDF_mode; // NDF Mode @@ -91,7 +89,6 @@ struct LightingData { }; struct DirectionalShadowMaskPassBindlessParam { - uint global_param_hdl; uint normal_hdl; uint depth_hdl; }; From 35af91cce6e3accfa5df678dbe4c80c7b49cf85f Mon Sep 17 00:00:00 2001 From: YXHXianYu <2943003@qq.com> Date: Sun, 29 Mar 2026 18:23:16 +0800 Subject: [PATCH 11/17] =?UTF-8?q?fix(raster):=20=E5=B0=86PCSS=E7=9A=84[unr?= =?UTF-8?q?oll]=E6=94=B9=E4=B8=BA[loop]=EF=BC=8C16=E7=9A=84=E5=BE=AA?= =?UTF-8?q?=E7=8E=AF=E4=B8=8D=E5=BA=94=E8=AF=A5=E4=BD=BF=E7=94=A8[unroll]?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- shaders/pipelines/raster/deferred/lighting/shadows/PCF.hlsli | 2 +- shaders/pipelines/raster/deferred/lighting/shadows/PCSS.hlsli | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/shaders/pipelines/raster/deferred/lighting/shadows/PCF.hlsli b/shaders/pipelines/raster/deferred/lighting/shadows/PCF.hlsli index 508dea96d..b6bb3f017 100644 --- a/shaders/pipelines/raster/deferred/lighting/shadows/PCF.hlsli +++ b/shaders/pipelines/raster/deferred/lighting/shadows/PCF.hlsli @@ -18,7 +18,7 @@ } \ } \ \ - [unroll] for (int i = 0; i < PCSS_SAMPLES; ++i) { \ + [loop] for (int i = 0; i < PCSS_SAMPLES; ++i) { \ float2 disk_sample = mul(rotation, POISSON_DISK[i]); \ /* 应用椭圆变换 */ \ float2 offset_uv = mul(pcf_transform_matrix, disk_sample) * penumbra_uv; \ diff --git a/shaders/pipelines/raster/deferred/lighting/shadows/PCSS.hlsli b/shaders/pipelines/raster/deferred/lighting/shadows/PCSS.hlsli index 3a7de79fa..cb4cf3814 100644 --- a/shaders/pipelines/raster/deferred/lighting/shadows/PCSS.hlsli +++ b/shaders/pipelines/raster/deferred/lighting/shadows/PCSS.hlsli @@ -66,7 +66,7 @@ bool IsDirectionalLight_Dir() { } \ } \ \ - [unroll] for (int i = 0; i < PCSS_SEARCH_SAMPLES; ++i) { \ + [loop] for (int i = 0; i < PCSS_SEARCH_SAMPLES; ++i) { \ float2 raw_offset = mul(rotation, POISSON_DISK[i]); \ \ float3 sample_pos = GetShadowSamplingPos##SUFFIX(ctx, raw_offset * search_radius_uv); \ From 109c36ca5c27e98369d31e442a7305eff40bdec7 Mon Sep 17 00:00:00 2001 From: YXHXianYu <2943003@qq.com> Date: Sun, 29 Mar 2026 18:36:00 +0800 Subject: [PATCH 12/17] =?UTF-8?q?feat(raster):=20=E5=B0=86LightingData?= =?UTF-8?q?=E9=80=9A=E8=BF=87UBO=E4=BC=A0=E5=85=A5LightingPass=EF=BC=8C?= =?UTF-8?q?=E4=BC=98=E5=8C=96=E6=80=A7=E8=83=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../lighting/RasterLightingPass.frag.hlsl | 5 +---- .../render/renderer/raster/LightingPass.h | 20 +++++++++---------- .../raster/lighting_pass/ShaderParameters.h | 1 - 3 files changed, 11 insertions(+), 15 deletions(-) diff --git a/shaders/pipelines/raster/deferred/lighting/RasterLightingPass.frag.hlsl b/shaders/pipelines/raster/deferred/lighting/RasterLightingPass.frag.hlsl index 567255f84..393bfc3d4 100644 --- a/shaders/pipelines/raster/deferred/lighting/RasterLightingPass.frag.hlsl +++ b/shaders/pipelines/raster/deferred/lighting/RasterLightingPass.frag.hlsl @@ -9,6 +9,7 @@ BINDLESS_BINDINGS(3, 2, 4, 5) #include "shared/raster/ShaderParameters.h" +[[vk::binding(0, 0)]] ConstantBuffer lighting_data; [[vk::push_constant]] ConstantBuffer param; float4 main(float2 in_uv : TEXCOORD0) : SV_TARGET { @@ -17,10 +18,6 @@ float4 main(float2 in_uv : TEXCOORD0) : SV_TARGET { ArrayBuffer material_buf = ArrayBuffer(param.material_buf_hdl); Moer::GMaterial mat = material_buf.Load(material_id); - // MARK: Lighting Data - ArrayBuffer global_params = ArrayBuffer(param.global_param_handle); - Moer::LightingData lighting_data = global_params.Load(0); - // MARK: GBuffer float2 uv = TextureHandle(param.gbuffer_uv).Sample2D(in_uv); float depth = TextureHandle(param.gbuffer_depth).Sample2D(in_uv); diff --git a/source/runtime/render/renderer/raster/LightingPass.h b/source/runtime/render/renderer/raster/LightingPass.h index c801295ac..dcc3e3e53 100644 --- a/source/runtime/render/renderer/raster/LightingPass.h +++ b/source/runtime/render/renderer/raster/LightingPass.h @@ -13,9 +13,10 @@ namespace Moer::Render::Raster { class PbrMaterialShadingPipeline : public RasterPipeline { public: DEFINE_RASTER_PIPELINE_CLASS(PbrMaterialShadingPipeline); - DEFINE_SHADER_CONSTANT_STRUCT(MaterialPassBindlessParam, param); + DEFINE_SHADER_BUFFER(lighting_data); DEFINE_SHADER_BINDLESS_ARRAY(bdls); - DEFINE_SHADER_ARGS(bdls, param); + DEFINE_SHADER_CONSTANT_STRUCT(MaterialPassBindlessParam, param); + DEFINE_SHADER_ARGS(lighting_data, bdls, param); }; class LightingPass { @@ -52,13 +53,12 @@ class LightingPass { material_param.enable_extra_ambient = ui_config.shading_enable_extra_ambient; material_param.shading_mode = static_cast(ui_config.shading_mode); - material_param.material_buf_hdl = context.scene.GetGpuSceneRes().material_buf.hdl; - material_param.vbuffer = context.textures.vbuffer.hdl; - material_param.gbuffer_normal = context.textures.normal.hdl; - material_param.gbuffer_tangent = context.textures.tangent.hdl; - material_param.gbuffer_uv = context.textures.uv.hdl; - material_param.gbuffer_depth = context.textures.depth_nearest_sampler.hdl; - material_param.global_param_handle = context.lighting_data_buffer.hdl; + material_param.material_buf_hdl = context.scene.GetGpuSceneRes().material_buf.hdl; + material_param.vbuffer = context.textures.vbuffer.hdl; + material_param.gbuffer_normal = context.textures.normal.hdl; + material_param.gbuffer_tangent = context.textures.tangent.hdl; + material_param.gbuffer_uv = context.textures.uv.hdl; + material_param.gbuffer_depth = context.textures.depth_nearest_sampler.hdl; material_param.light_buf_hdl = context.scene.GetGpuSceneRes().light_buf.hdl; material_param.cubemap_handle = context.textures.cubemap_tex.hdl; @@ -71,7 +71,7 @@ class LightingPass { DepthAttachment(context.textures.depth_linear_sampler.tex->GetView().GetTexture()); depth_attachment.action = AC_DS_LOAD_STORE; - context.cmd_list.Gfx(pbr_pipeline, context.bdls, material_param) + context.cmd_list.Gfx(pbr_pipeline, context.lighting_data_buffer.buf, context.bdls, material_param) .Draw( "Lighting Pass", context.textures.lighting_output.GetRect2D(), diff --git a/source/runtime/render/shaderheaders/shared/raster/lighting_pass/ShaderParameters.h b/source/runtime/render/shaderheaders/shared/raster/lighting_pass/ShaderParameters.h index 23bd828db..1aed5d029 100644 --- a/source/runtime/render/shaderheaders/shared/raster/lighting_pass/ShaderParameters.h +++ b/source/runtime/render/shaderheaders/shared/raster/lighting_pass/ShaderParameters.h @@ -35,7 +35,6 @@ struct MaterialPassBindlessParam { uint gbuffer_uv; uint gbuffer_depth; uint gbuffer_position; - uint global_param_handle; uint shading_mode; uint cubemap_handle; uint shadow_mask_handle; From e0bb6865dc8ee11f861b9331474ffefefd148ad2 Mon Sep 17 00:00:00 2001 From: YXHXianYu <2943003@qq.com> Date: Mon, 30 Mar 2026 13:38:53 +0800 Subject: [PATCH 13/17] =?UTF-8?q?docs:=20=E6=B7=BB=E5=8A=A0=E4=BA=86AGENTS?= =?UTF-8?q?.md=EF=BC=8C=E6=95=B4=E7=90=86=E4=BA=86=E6=96=87=E6=A1=A3?= =?UTF-8?q?=E7=BB=93=E6=9E=84?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- AGENTS.md | 177 +++++ README.md | 7 +- ...73\347\273\237\350\257\246\350\247\243.md" | 633 ++++++++++++++++++ README.en.md => docs/README.en.md | 0 4 files changed, 812 insertions(+), 5 deletions(-) create mode 100644 AGENTS.md create mode 100644 "docs/GeneratedByAI/260330-Bindless\347\263\273\347\273\237\350\257\246\350\247\243.md" rename README.en.md => docs/README.en.md (100%) diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 000000000..0a69225d2 --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,177 @@ +# MoerEngine — Project Guidelines + +## Build & Run + +``` +just b # Build (Debug, 30 threads, Ninja + Clang) +just r # Run MoerEditor +just br # Build + Run +just gbr # Generate + Build + Run (full rebuild) +just b Release # Release build +just clean # Remove build/ and target/ +``` + +- CMake generator: **Ninja**, compiler: **Clang** (C++20) +- Shader compiler: **DXC** with `-O3 -spirv -fspv-target-env=vulkan1.3 -fvk-use-dx-layout -all-resources-bound`, target `ps_6_0` / `cs_6_0` +- Shaders are compiled at runtime, cached in `asset/shader_cache/{platform}.sdc` +- Output: `target/bin/{Config}/MoerEditor.exe` + +## Architecture Overview + +``` +source/ + editor/ # ImGui editor app + main.cpp, Editor.cpp/h # Entry point, editor lifecycle + EditorUI.cpp/h # Main UI layout + raster_ui/ # Raster pipeline UI controls (RasterUI.cpp/h) + raytracing_ui/ # RT pipeline UI controls + runtime/ + core/ # Math, containers, platform, logging, task graph + include/math/ # Vector, Matrix (Base.h, Matrix.h) + include/misc/ # Traits.h (C++ ↔ HLSL type aliases: float3, uint4, float4x4) + Engine.cpp/h # Engine init/loop + render/ + renderer/ + Renderer.cpp/h # Top-level renderer (selects raster or RT) + raster/ # ★ RASTER PIPELINE (most frequently modified) + RasterRenderer.cpp/h # Frame loop: calls each pass in order + RasterResource.h # Per-frame resources (textures, buffers, CSM data) + RasterTextures.h # GBuffer / RT texture definitions + RasterConfig.h # UI-driven config struct (shadow, AO, AA settings) + DirectionalShadowMaskPass.cpp/h # Shadow Mask pass + ShadowDepthPass.cpp/h # CSM depth rendering (cascade setup, frustum split) + LightingPass.h # Deferred lighting pass + GeometryPass.h # Geometry / GBuffer pass + AoPass.h # SSAO / RTAO / SSDO + AaPass.h # SMAA / FXAA + SkyboxPass.cpp/h # Skybox rendering + TonemappingPass.h # Tonemapping + bloom + raytracing/ # RT pipeline (ReSTIR DI, path tracing) + common/ui/ # ImGui renderer integration + rhi/ # ★ RHI ABSTRACTION LAYER + RHI.h, RHIResource.h # Core types (BufferRef, TextureRef, EBufferUsageFlags) + RHICommon.h # Enums (EBufferUsageFlags, EPixelFormat, etc.) + RHICommand.h # Command list / draw dispatching + vulkan/ # Vulkan backend + VulkanDevice.cpp/h # Pipeline creation, descriptor set layout, reflection + VulkanQueue.cpp/h # Command submission, draw/dispatch recording + VulkanRHIResource.cpp/h # Buffer/Texture creation, enum translation + VulkanDescriptor.cpp/h # Descriptor pool & set management + d3d12/ # D3D12 backend (partial) + shader/ + ShaderPipeline.h # ★ Pipeline class macros (DEFINE_SHADER_CONSTANT_STRUCT, etc.) + ShaderCompiler.cpp/h # DXC invocation, SPIR-V generation + ShaderManager.cpp # PSO caching, hot-reload + shaderheaders/shared/ # ★ C++/HLSL SHARED HEADERS (dual-language via #ifdef __cplusplus) + raster/ + ShaderParameters.h # Raster shared params entry (includes sub-headers) + SharedEnum.h # Shared enums (EShadowMapMode, ERtaoSampleMode, etc.) + lighting_pass/ShaderParameters.h # ★ LightingData, pass param structs + scene/ # Scene graph, GPU scene, camera, lights + GpuScene.cpp/h # GPU-side scene buffers (instances, materials, lights) + resources/ # Vertex factories, mesh resources +``` + +``` +shaders/ # HLSL shader sources + core/ + common/ + Bindless.hlsl # ★ Bindless heap definition (ArrayBuffer, TextureHandle, BINDLESS_BINDINGS) + Common.hlsl # Shared utilities (WorldPosFromDepth, packing, etc.) + materials/ + Brdf.hlsli # PBR BRDF (GGX, multi-scatter) + Material.hlsli # Material fetching + pipelines/ + RasterCommon.hlsli # Raster shared helpers + raster/deferred/ + geometry/ # GBuffer shaders + lighting/ + Lighting.hlsli # Light accumulation + RasterLightingPass.frag.hlsl # Deferred lighting entry point + shadows/ # ★ SHADOW SYSTEM + ShadowMask.frag.hlsl # Shadow mask entry (full-screen pass) + Shadows.hlsli # Shadow dispatch (CSM / point) + CSM.hlsli # Cascade selection, blend ratio + PCSS.hlsli # PCSS blocker search + penumbra + PCF.hlsli # PCF filtering + ShadowCore.hlsli # Bias, blocker stats + ShadowSampling.hlsli # Poisson disk, rotation, sampling utils + env_and_atmo/ # Skybox shaders + postprocess/ + lighting_effects/ # AO (SSAO, RTAO, SSDO), SSR + aa/ # SMAA, FXAA + denoise/ # Bilateral, RTAO denoiser + color/ # Tonemapping, bloom + common/ # Upsample, copy pass +``` + +## Key Patterns + +### Bindless Architecture +All resources go through a unified bindless heap. Shaders declare `BINDLESS_BINDINGS(BufferSpace, TextureSpace, SamplerSpace, AccelSpace)` and access resources by uint handles via `ArrayBuffer(handle).Load()` and `TextureHandle(handle).Sample2D()`. + +### Shared C++/HLSL Headers +Files in `shaderheaders/shared/` use `#ifdef __cplusplus` guards: +- C++ side: `namespace Moer::Render`, types from `Traits.h` (`float3` = `Vector3f`, `float4x4` = `Matrix4x4f`) +- HLSL side: `namespace Moer`, native HLSL types +- Both sides see the same struct layout. **Alignment must be manually kept in sync** (see pitfalls below). + +### Pipeline Definition (C++ side) +```cpp +class MyPipeline : public RasterPipeline { + DEFINE_RASTER_PIPELINE_CLASS(MyPipeline); + DEFINE_SHADER_BUFFER(lighting_data); // [[vk::binding(N, S)]] ConstantBuffer / StructuredBuffer + DEFINE_SHADER_CONSTANT_STRUCT(MyParam, param); // [[vk::push_constant]] + DEFINE_SHADER_BINDLESS_ARRAY(bdls); // Bindless heap + DEFINE_SHADER_ARGS(lighting_data, bdls, param); // Arg order must match Gfx() call order +}; +``` +Pass code calls `cmd_list.Gfx(pipeline, bufferRef, bdls, paramStruct)` — argument order matches `DEFINE_SHADER_ARGS`. + +### Pass Lifecycle +`RasterRenderer::Render()` calls each pass's `Process()` in order. Each pass owns its pipeline and fills its param struct from `RasterContext`. + +## Shadow System Details + +- CSM: up to `MAX_CSM_CASCADES` (currently 4) cascades, 4096² shadow maps +- PCSS: 16 blocker + 16 PCF samples, `[unroll]` loops, Poisson disk sampling +- Shadow mask is a separate full-screen pass (`DirectionalShadowMaskPass`) writing to a single-channel texture, read later by `RasterLightingPass` +- `LightingData` is bound as a **UBO (ConstantBuffer)** to the shadow mask shader for hardware-assisted scalarization (fields only enter registers when accessed, not all at once) + +## Pitfalls & Lessons Learned + +### Register Pressure from Large Struct Loads +**Problem:** `ByteAddressBuffer.Load()` loads the ENTIRE struct into vector registers (vgpr) at once. With `LightingData` (~600 bytes), this consumed ~150 live registers, causing 98% register-limited stalls. +**Root cause:** Dynamic array indexing (e.g., `world2shadow_clip[cascade_index]`) forces the compiler to keep all array elements alive simultaneously. +**Solution:** Use `ConstantBuffer` (UBO) instead of `ByteAddressBuffer` for uniform data. The GPU has a dedicated **constant cache** (separate from registers) — fields are fetched via scalar loads (`s_buffer_load`) and shared across all lanes without per-lane register cost. + +### cbuffer / std140 Array Padding +**Problem:** In `ConstantBuffer` (std140 layout), each element of a scalar array (e.g., `float[4]`, `uint[4]`) is padded to 16 bytes. A C++ struct with `float cascade_split_ratios[4]` = 16B but GPU sees 64B. +**Solution:** Use vector types instead: `float4 cascade_split_ratios`, `uint4 cascade_shadow_map`. HLSL supports `float4[i]` indexing, so shader code doesn't change. + +### Shadow-Specific Optimizations (Applied) +- Sky pixel early-out: `if (depth < 1e-6) return 1.0` (reverse-Z, sky = 0) +- Full-blocker early-out in PCSS: skip PCF if all 16 blocker samples are shadow +- Cascade blend early-out: only sample next cascade when `blend_ratio > 0` +- Redundant texture sample removal in `get_single_shadow()` +- `tan(acos(x))` → `sqrt(1-x²)/x` in slope-scaled bias + +### `[unroll]` vs `[loop]` with Static Arrays +Do NOT convert `[unroll]` to `[loop]` when the loop body indexes a `static const` array (e.g., Poisson disk). With `[loop]`, the array must be dynamically indexed, which pushes it into registers or local memory — registers **increase** instead of decrease. + +### Depth Format +D32_FLOAT is sufficient (no need for D32_FLOAT_S8_UINT); stencil is unused by the shadow system. Saves VRAM and bandwidth. + +### Push Constants Size Limit +Vulkan push constants are limited to 128 bytes on most hardware. Keep pass param structs small — remove fields that can be loaded from buffers instead. + +## C++ Type Reference +| HLSL Type | C++ Type | Size | Align | +|-----------|----------|------|-------| +| `float3` | `Vector3f` (12B, union of 3 floats) | 12 | 4 | +| `float4` | `Vector4f` (16B) | 16 | 4 | +| `float4x4`| `Matrix4x4f` (64B, union of 4×Vector4f) | 64 | 4 | +| `uint` | `uint32_t` | 4 | 4 | +| `uint4` | `Vector4ui` | 16 | 4 | + +Note: C++ side has **no implicit padding** (all 4-byte aligned). But **HLSL ConstantBuffer (std140) adds padding** to arrays of scalars — always use vector types for arrays in shared structs. diff --git a/README.md b/README.md index b2b6df996..b9a6d5df6 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ 实时渲染引擎 -**| 简体中文 | [English](README.en.md) |** +**| 简体中文 | [English](./docs/README.en.md) |** [![Ask DeepWiki](https://deepwiki.com/badge.svg)](https://deepwiki.com/NJUCG/MoerEngine) @@ -10,13 +10,10 @@ ## 目录 +- [目录](#目录) - [1. 如何构建](#1-如何构建) - [2. 如何使用](#2-如何使用) - - [2.1 如何渲染场景](#21-如何渲染场景) - - [2.2 如何移动摄像机](#22-如何移动摄像机) - [3. 效果图](#3-效果图) - - [RayTracing Renderer](#raytracing-renderer) - - [Raster Renderer](#raster-renderer) - [4. 如何贡献](#4-如何贡献) - [开源协议](#开源协议) diff --git "a/docs/GeneratedByAI/260330-Bindless\347\263\273\347\273\237\350\257\246\350\247\243.md" "b/docs/GeneratedByAI/260330-Bindless\347\263\273\347\273\237\350\257\246\350\247\243.md" new file mode 100644 index 000000000..58b92fdab --- /dev/null +++ "b/docs/GeneratedByAI/260330-Bindless\347\263\273\347\273\237\350\257\246\350\247\243.md" @@ -0,0 +1,633 @@ +# MoerEngine Bindless 资源系统详解 + +> 本文面向引擎新手,从零开始介绍 Bindless 技术的背景、原理,以及 MoerEngine 中的具体实现。 + +--- + +## 目录 + +1. [传统资源绑定的困境](#1-传统资源绑定的困境) +2. [什么是 Bindless?](#2-什么是-bindless) +3. [Vulkan 中的 Bindless 扩展](#3-vulkan-中的-bindless-扩展) +4. [D3D12 中的 Bindless 支持](#4-d3d12-中的-bindless-支持) +5. [MoerEngine Bindless 架构总览](#5-moerengine-bindless-架构总览) +6. [Shader 侧实现详解](#6-shader-侧实现详解) +7. [C++ 侧实现详解](#7-c-侧实现详解) +8. [完整数据流:从 CPU 到 Shader](#8-完整数据流从-cpu-到-shader) +9. [实战:一个光照 Shader 的 Bindless 用法](#9-实战一个光照-shader-的-bindless-用法) +10. [Vulkan vs D3D12 差异对比](#10-vulkan-vs-d3d12-差异对比) +11. [总结](#11-总结) + +--- + +## 1. 传统资源绑定的困境 + +在传统图形管线中,GPU 需要访问纹理、缓冲区等资源。这些资源通过**描述符(Descriptor)** 来引用——描述符就像一张"资源名片",告诉 GPU 某个资源在显存中的位置和格式。 + +传统做法是: + +``` +// 伪代码 —— 传统绑定 +BindTexture(slot=0, albedoTexture); // 把漫反射纹理绑到槽位 0 +BindTexture(slot=1, normalTexture); // 把法线纹理绑到槽位 1 +BindTexture(slot=2, roughnessTexture); // 把粗糙度纹理绑到槽位 2 +DrawCall(); +``` + +这种方式有以下问题: + +| 问题 | 说明 | +|---|---| +| **槽位有限** | 每个 Shader 能绑定的资源数量有上限(通常几十个) | +| **频繁切换** | 每画一个物体就要重新绑定资源,产生大量 API 调用 | +| **不够灵活** | Shader 必须在编译时就确定使用哪些槽位 | +| **难以合批** | 不同材质使用不同纹理,阻碍了 Draw Call 合并 | + +想象一个有 1000 个不同材质的场景——传统方式需要 1000 次绑定 + 1000 次 Draw Call。 + +--- + +## 2. 什么是 Bindless? + +**Bindless**(无绑定)的核心思想很简单: + +> 把所有资源一次性放入一个巨大的"资源池",Shader 通过**整数索引**来访问任意资源,不再需要逐个绑定。 + +``` +// 伪代码 —— Bindless +GlobalResourcePool = [texture0, texture1, texture2, ... texture9999]; +// Shader 内部 +color = GlobalResourcePool[materialData.albedoIndex].Sample(uv); +``` + +这样的好处是: + +- **零绑定开销**:只需要把"索引号"传给 Shader,不需要频繁切换资源绑定 +- **无限资源**:Shader 可以访问池中的任意资源 +- **方便合批**:不同材质只是索引不同,可以在一次 Draw Call 中渲染多种材质 +- **动态灵活**:材质可以在运行时任意切换引用的纹理 + +--- + +## 3. Vulkan 中的 Bindless 扩展 + +Vulkan 通过以下扩展来支持 Bindless: + +### 3.1 Descriptor Indexing(VK_EXT_descriptor_indexing) + +这是 Bindless 的基础。它允许: + +- **运行时变长数组**:声明一个大小不确定的描述符数组 +- **非一致索引**:用 `NonUniformResourceIndex()` 包裹索引,通知 GPU "这个索引在 Warp/Wave 内可能不同" +- **部分绑定**:数组中的某些槽位可以不填充,不会导致错误 + +```hlsl +// HLSL 中声明无界数组(Vulkan 需要 vk::binding 注解) +[[vk::binding(0, 2)]] Texture2D gTexture2Dfloat4__114514_bdls[]; +[[vk::binding(0, 4)]] SamplerState gsampler__114514_bdls[]; + +// 通过索引访问 +Texture2D tex = gTexture2Dfloat4__114514_bdls[NonUniformResourceIndex(texIdx)]; +SamplerState spl = gsampler__114514_bdls[NonUniformResourceIndex(splIdx)]; +float4 color = tex.Sample(spl, uv); +``` + +### 3.2 Descriptor Buffer(VK_EXT_descriptor_buffer) + +MoerEngine 更进一步,使用了 **Descriptor Buffer 扩展**。传统 Vulkan 通过 `VkDescriptorSet` 来管理描述符,而 Descriptor Buffer 允许把描述符直接当做普通 buffer 数据来操作: + +- 描述符被写成字节数据,存储在 GPU buffer 中 +- 不需要 `vkAllocateDescriptorSets` / `vkUpdateDescriptorSets` +- 可以通过 `memcpy` 直接操作描述符数据 + +这是 MoerEngine 选择的方案,性能更高,也更灵活。 + +--- + +## 4. D3D12 中的 Bindless 支持 + +D3D12 对 Bindless 的支持更加"原生": + +```cpp +// D3D12 Root Signature Flag +D3D12_ROOT_SIGNATURE_FLAG_CBV_SRV_UAV_HEAP_DIRECTLY_INDEXED +D3D12_ROOT_SIGNATURE_FLAG_SAMPLER_HEAP_DIRECTLY_INDEXED +``` + +设置了这两个 Flag 后,Shader 可以直接通过全局的 `ResourceDescriptorHeap` 和 `SamplerDescriptorHeap` 来索引: + +```hlsl +// D3D12 HLSL —— 不需要声明任何数组,直接索引全局堆 +Texture2D tex = Texture2D(ResourceDescriptorHeap[texIdx]); +SamplerState spl = (SamplerState)SamplerDescriptorHeap[splIdx]; +``` + +D3D12 的模型更简单——两个全局堆,直接索引。不需要额外扩展。 + +--- + +## 5. MoerEngine Bindless 架构总览 + +MoerEngine 的 Bindless 系统由以下几个核心部分构成: + +``` +┌─────────────────────────────────────────────────────────────────┐ +│ CPU (C++) │ +│ │ +│ ┌──────────────────┐ ┌──────────────────────────────────┐ │ +│ │ VulkanBindless │────▶│ VulkanDescriptorHeap │ │ +│ │ Array │ │ (全局描述符堆) │ │ +│ │ ├ AllocateTexture│ │ ├ image_desc_data (纹理描述符) │ │ +│ │ ├ AllocateBuffer │ │ ├ buffer_desc_data (缓冲描述符) │ │ +│ │ └ CreateUpdate │ │ └ accel_desc_data (加速结构) │ │ +│ │ Command() │ └──────────────────────────────────┘ │ +│ └──────────────────┘ │ +│ │ │ +│ │ 生成更新命令 │ +│ ▼ │ +│ ┌──────────────────────────────────────────────────────────┐ │ +│ │ GPU Buffer (3 个) │ │ +│ │ ├ bindless_array_buffer ─── 间接索引数组 │ │ +│ │ ├ bindless_texture_descs ─── 纹理描述符数据 │ │ +│ │ └ bindless_buffer_descs ─── 缓冲描述符数据 │ │ +│ └──────────────────────────────────────────────────────────┘ │ +└─────────────────────────────────────────────────────────────────┘ + │ + │ Shader 读取 + ▼ +┌─────────────────────────────────────────────────────────────────┐ +│ GPU (Shader/HLSL) │ +│ │ +│ g__array_114514_bdls[] ← 间接索引数组 (StructuredBuffer) │ +│ gbuffer__114514_bdls[] ← 缓冲描述符数组 │ +│ gTexture2D*__114514_bdls[] ← 纹理描述符数组 │ +│ gsampler__114514_bdls[] ← 采样器数组 │ +│ │ +│ TextureHandle / ArrayBuffer ← 用户友好的封装结构体 │ +└─────────────────────────────────────────────────────────────────┘ +``` + +### 核心设计思路 + +MoerEngine 的 Bindless 使用了一层**间接寻址(Indirection)**: + +``` +Shader Handle (uint) + │ + ▼ +g__array_114514_bdls[handle] → packed_value (uint) + │ + ├── 纹理: (texture_slot << 8) | sampler_idx + │ ↓ ↓ + │ gTexture2D[texture_slot] gsampler[sampler_idx] + │ + └── 缓冲: buffer_slot + ↓ + gbuffer[buffer_slot] +``` + +为什么需要间接寻址?因为它允许**热替换**:只要修改间接数组中某个位置的值,所有引用该位置的 Shader 都会自动获取新的资源,无需重新编译。 + +--- + +## 6. Shader 侧实现详解 + +Shader 侧代码位于 `shaders/core/common/Bindless.hlsl`。 + +### 6.1 资源声明:`BINDLESS_BINDINGS` 宏 + +每个需要使用 Bindless 的 Shader 都需要调用这个宏: + +```hlsl +#include "core/common/Bindless.hlsl" +BINDLESS_BINDINGS(space3, space2, space4, space5) +// │ │ │ │ +// │ │ │ └─ 加速结构 descriptor set +// │ │ └───────── 采样器 descriptor set +// │ └───────────────── 纹理 descriptor set +// └────────────────────────── 缓冲 descriptor set +``` + +这个宏展开后(以 Vulkan 为例),将声明: + +```hlsl +// 1. 间接索引数组 —— 所有 Handle 的入口 +[[vk::binding(0, space3)]] StructuredBuffer g__array_114514_bdls; + +// 2. 缓冲描述符的无界数组 +[[vk::binding(1, space3)]] ByteAddressBuffer gbuffer__114514_bdls[]; + +// 3. 采样器的无界数组 +[[vk::binding(0, space4)]] SamplerState gsampler__114514_bdls[]; + +// 4. 纹理描述符的无界数组(每种类型 × 每种格式) +[[vk::binding(0, space2)]] Texture2D gTexture2Dfloat4__114514_bdls[]; +[[vk::binding(0, space2)]] Texture2D gTexture2Dfloat__114514_bdls[]; +[[vk::binding(0, space2)]] Texture2D gTexture2Duint4__114514_bdls[]; +// ... 还有 Texture1D, Texture3D, TextureCube 等 + +// 5. 加速结构(用于光线追踪) +[[vk::binding(0, space5)]] RaytracingAccelerationStructure gaccelg__114514_bdls[]; + +// 6. 资源访问封装结构体 +// struct VKResourceDescriptorHeap { ... }; +// struct ArrayBuffer { ... }; +// struct TextureHandle { ... }; +// struct SamplerHandle { ... }; +``` + +> `_114514_bdls` 这个后缀是引擎内部的命名约定,用于避免与用户代码的命名冲突。 + +### 6.2 Handle 类型体系 + +Bindless.hlsl 定义了一套 Handle 类型,每种 Handle 封装一个 `uint` 索引值: + +```hlsl +struct ByteBufferHandle { uint internalIndex; }; // 缓冲 +struct TlasHandle { uint internalIndex; }; // 光追加速结构 + +template struct Texture2DHandle { uint internalIndex; }; // 纹理(取texel) +template struct Texture2DSampleHandle { uint internalIndex; }; // 纹理(带采样器) +struct Texture2DHandleNative { uint internalIndex; }; // 不带模板的版本 +struct Texture2DSampleHandleNative { uint internalIndex; }; // 不带模板的版本 + +struct SamplerHeapHandle { uint internalIndex; }; // 采样器 +// ... Texture1D, Texture3D, TextureCube 等类似 +``` + +### 6.3 资源访问流程(以纹理采样为例) + +当你写下 `TextureHandle(handle).Sample2D(uv)` 时,背后发生了什么: + +```hlsl +// 第 1 步:从间接数组取出打包值 +uint tex_handle = g__array_114514_bdls[NonUniformResourceIndex(handle.internalIndex)]; + +// 第 2 步:解包 +uint tex_idx = tex_handle >> 8; // 高 24 位 = 纹理在描述符堆中的索引 +uint sampler_idx = tex_handle & 0xff; // 低 8 位 = 采样器索引(最多 256 个采样器) + +// 第 3 步:从全局数组中取出实际资源 +Texture2D tex = gTexture2Dfloat4__114514_bdls[NonUniformResourceIndex(tex_idx)]; +SamplerState spl = gsampler__114514_bdls[NonUniformResourceIndex(sampler_idx)]; + +// 第 4 步:采样 +return tex.Sample(spl, uv); +``` + +打包格式图示: + +``` +g__array_114514_bdls[handle] 的值(32 位 uint): +┌───────────────────────────┬──────────┐ +│ texture_slot (24 bits) │ sampler │ +│ 高 24 位 │ (8 bits) │ +│ 纹理描述符索引 │ 采样器索引│ +└───────────────────────────┴──────────┘ +``` + +### 6.4 用户友好的封装:`TextureHandle` 和 `ArrayBuffer` + +为了让 Shader 开发者不需要手写上述流程,Bindless.hlsl 提供了两个高层封装: + +```hlsl +struct TextureHandle { + uint handle; + + // 2D 纹理采样 + template T Sample2D(float2 uv); + template T SampleLevel(float2 uv, float level); + template T SampleGrad(float2 uv, float2 grad_x, float2 grad_y); + + // Cube 纹理采样 + template T SampleCube(float3 uv); + template T SampleLevelCube(float3 uv, float level); + + // 获取底层 Texture2D 对象(用于 Load 等操作) + template Texture2D GetTexture2D(); +}; + +struct ArrayBuffer { + uint handle; + + // 获取底层 ByteAddressBuffer + ByteAddressBuffer GetByteAddressBuffer(); + + // 按结构体类型 + 索引读取数据 + template T Load(uint index); + template T Load(uint index, uint offset); +}; + +struct SamplerHandle { + uint handle; + SamplerState GetSampler(); +}; +``` + +### 6.5 `RenderResourceHandle`:调试友好的资源标识 + +还有一个辅助结构体,用于资源管理和调试: + +```hlsl +struct RenderResourceHandle { + uint index; + // 32 位打包布局: + // [0:22] 23 位 —— 索引(可寻址 800 万+ 资源) + // [23:24] 2 位 —— ResourceTag(资源类型标识) + // [25] 1 位 —— IsWritable(是否可写) + // [26:31] 6 位 —— Version(版本号,防止 use-after-free) + + bool IsValid() { return index != ~0; } + uint ReadIndex() { return index & ((1 << 23) - 1); } + uint ResourceTag(){ return (index >> 23) & 3; } + bool IsWritable() { return (index >> 25) & 1; } + uint Version() { return (index >> 26) & 0x3F; } +}; +``` + +--- + +## 7. C++ 侧实现详解 + +### 7.1 全局描述符堆:`VulkanDescriptorHeap` + +这是整个 Bindless 系统的基础设施,管理所有 GPU 描述符: + +```cpp +struct VulkanDescriptorHeap { + // 三类描述符的原始数据(CPU 端副本) + Array buffer_desc_data; // 缓冲描述符 + Array image_desc_data; // 图像描述符 + Array accel_desc_data; // 加速结构描述符 + + // 空闲索引回收列表 + Array buffer_free_list; + Array image_free_list; + Array accel_free_list; + + // 核心接口 + uint GetBufferDescIdx(const BufferView& _in_buffer, ...); // 分配缓冲描述符 + uint GetImageDescIdx(const TextureView* _in_image, ...); // 分配图像描述符 + uint GetSamplerDescIdx(Sampler _sampler); // 分配采样器描述符 + uint GetAccelDescIdx(VulkanAccelerationStructure* _as); // 分配加速结构描述符 +}; +``` + +它利用了 `VK_EXT_descriptor_buffer`,把描述符当作字节数据来管理,通过 `memcpy` 操作来写入和复制描述符。 + +### 7.2 Bindless 数组:`VulkanBindlessArray` + +这是 CPU 端的核心类,管理间接索引数组和资源分配: + +```cpp +class VulkanBindlessArray : public BindlessArray { +public: + // ----- 资源类型 ----- + enum EType : uint { Texture, Buffer }; + + // ----- 内部 Handle 结构 ----- + struct Handle { + uint ptr_1; // 资源指针高 32 位 + uint ptr_2; // 资源指针低 32 位 + uint slot : 22; // 描述符堆中的槽位 + uint attrib : 8; // 属性(mip level 等) + uint type : 2; // 类型:Texture(0) 或 Buffer(1) + }; + + // ----- 核心接口 ----- + uint AllocateTexture(const TextureView& _texture, Sampler _sampler); + uint AllocateBuffer(BufferView _buffer); + void UnbindTexture(uint _slot); + void UnbindBuffer(uint _slot); + + // ----- GPU 缓冲 ----- + VulkanBuffer* bindless_array_buffer; // 间接索引数组 → Shader 的 g__array_114514_bdls + VulkanBuffer* bindless_texture_descs; // 纹理描述符 → Shader 的 gTexture2D*__114514_bdls + VulkanBuffer* bindless_buffer_descs; // 缓冲描述符 → Shader 的 gbuffer__114514_bdls + + // ----- 内部管理 ----- + Array handles; // 所有已分配的 Handle + Array update_cmds; // 待执行的更新命令队列 + LockFreeQueue free_slots; // 空闲的间接数组槽位 + LockFreeQueue free_texture_slots;// 空闲的纹理描述符槽位 + LockFreeQueue free_buffer_slots; // 空闲的缓冲描述符槽位 +}; +``` + +### 7.3 资源分配流程 + +以 `AllocateTexture` 为例: + +```cpp +uint VulkanBindlessArray::AllocateTexture(const TextureView& _texture, Sampler _sampler) { + // 第 1 步: 从空闲列表或原子计数器获取一个间接数组槽位 + uint slot_idx = free_slots.Pop(); + if (slot_idx == 0) slot_idx = slot_offset++; + + // 第 2 步: 获取一个纹理描述符槽位 + uint texture_slot = free_texture_slots.Pop(); + if (texture_slot == 0) texture_slot = texture_slot_offset++; + + // 第 3 步: 推入更新命令(延迟执行) + update_cmds.emplace_back(TextureUpdateInfo{ + _texture.texture, _sampler, _texture.format, + slot_idx, texture_slot, + _texture.mip_level, _texture.num_mips, + _texture.array_layer, _texture.num_array, + false // free=false 表示这是分配,不是释放 + }); + + // 第 4 步: 返回间接数组索引给调用者 + return slot_idx; // 这就是 Shader 中 TextureHandle.handle 的值 +} +``` + +调用者获得 `slot_idx` 后,将它作为 `uint` 传入 Shader 的常量缓冲区或 Push Constant。 + +### 7.4 帧同步更新:`CreateUpdateCommand()` + +真正的描述符写入发生在帧开始时,由 `CreateUpdateCommand()` 执行: + +```cpp +UniquePtr VulkanBindlessArray::CreateUpdateCommand() { + VulkanDescriptorHeap& heap = m_device->GetGlobalDescriptorHeap(); + + for (const UpdateCmd& cmd : update_cmds) { + if (is TextureUpdateInfo && !free) { + // 1. 从全局描述符堆获取纹理描述符数据 + uint src_idx = heap.GetImageDescIdx(&view, layout); + + // 2. 复制描述符字节数据到上传缓冲 + memcpy(texture_dat + offset, &heap.image_desc_data[src_idx], stride); + + // 3. 构造间接数组的值:(texture_slot << 8) | sampler_idx + uint indirect_handle = (sampler_idx & 0xff) | (texture_slot & 0xffffff) << 8; + memcpy(array_dat + offset, &indirect_handle, sizeof(uint)); + + // 4. 记录 Handle 信息用于后续释放 + handles[array_idx] = Handle(ptr, slot, attrib, Texture); + } + else if (is BufferUpdateInfo && !free) { + // 缓冲区类似,但间接数组直接存储 buffer_slot + uint src_idx = heap.GetBufferDescIdx(view, type); + memcpy(buffer_dat + offset, &heap.buffer_desc_data[src_idx], stride); + memcpy(array_dat + offset, &buffer_slot, sizeof(uint)); + } + else if (free) { + // 释放:回收槽位到空闲列表 + free_slots.Push(array_idx); + free_texture_slots.Push(slot); + } + } + + // 返回一个 Command 对象,在 GPU 时间线上执行 buffer 更新 + return MakeUnique(...); +} +``` + +### 7.5 资源释放与版本管理 + +```cpp +void VulkanBindlessArray::UnbindTexture(uint _array_idx) { + // 检查是否有 pending 的更新命令 + if (auto iter = temp_slot_to_cmd.find(_array_idx); iter != end) { + // 如果资源还没真正写入 GPU,直接取消该命令 + update_cmds[iter->second] = InvalidUpdateInfo{_array_idx}; + } else { + // 资源已在 GPU 上,推入释放命令 + update_cmds.emplace_back(TextureUpdateInfo{nullptr, ..., true/*free=true*/}); + } +} + +// 帧结束时,真正回收槽位 +void VulkanBindlessArray::OnFree(const Array& slots, ...) { + for (uint idx : slots) free_slots.Push(idx); + for (uint idx : textures) free_texture_slots.Push(idx); + for (uint idx : buffers) free_buffer_slots.Push(idx); +} +``` + +释放是延迟的——必须等 GPU 不再使用该资源后才能回收槽位。 + +--- + +## 8. 完整数据流:从 CPU 到 Shader + +下面用一个完整的例子,追踪一张纹理从"C++ 注册"到"Shader 采样"的完整路径: + +``` + C++ 侧 + ══════ +Step 1: 用户调用 + uint handle = bindlessArray->AllocateTexture(albedoView, linearSampler); + // 返回: handle = 42 (间接数组中的槽位) + +Step 2: 将 handle 写入 Push Constant / Constant Buffer + pushConstant.albedo_handle = 42; + + 帧同步 + ══════ +Step 3: CreateUpdateCommand() 执行 + ├── 从全局堆分配纹理描述符 → texture_slot = 7 + ├── 查询采样器索引 → sampler_idx = 3 + ├── 写入间接数组: + │ g__array_114514_bdls[42] = (7 << 8) | 3 = 0x00000703 + ├── 写入纹理描述符堆: + │ bindless_texture_descs[7] = + └── GPU Buffer Upload + + Shader 侧 + ═════════ +Step 4: Shader 代码 + TextureHandle tex = TextureHandle(param.albedo_handle); // handle = 42 + float4 color = tex.Sample2D(uv); + +Step 5: 展开后的实际执行 + uint packed = g__array_114514_bdls[42]; // = 0x00000703 + uint tex_idx = packed >> 8; // = 7 + uint sampler_idx = packed & 0xFF; // = 3 + Texture2D texObj = gTexture2Dfloat4_bdls[7]; + SamplerState spl = gsampler_bdls[3]; + return texObj.Sample(spl, uv); // → 最终颜色 +``` + +--- + +## 9. 实战:一个光照 Shader 的 Bindless 用法 + +以 `RasterLightingPass.frag.hlsl` 为例,展示 Bindless 在实际 Shader 中的使用模式: + +```hlsl +#include "core/common/Bindless.hlsl" +#include "core/common/Common.hlsl" +BINDLESS_BINDINGS(3, 2, 4, 5) // 声明所有 Bindless 全局资源 + +// Push Constant 中只有 uint handle,不需要直接绑定任何纹理 +[[vk::push_constant]] ConstantBuffer param; + +float4 main(float2 in_uv : TEXCOORD0) : SV_TARGET { + + // -------- 通过 handle 访问纹理 -------- + // 从 VBuffer 纹理中采样材质 ID + uint material_id = TextureHandle(param.vbuffer).Sample2D(in_uv); + + // 从 GBuffer 纹理中采样几何信息 + float2 uv = TextureHandle(param.gbuffer_uv).Sample2D(in_uv); + float depth = TextureHandle(param.gbuffer_depth).Sample2D(in_uv); + float3 normal = TextureHandle(param.gbuffer_normal).Sample2D(in_uv); + + // -------- 通过 handle 访问缓冲区 -------- + // 加载材质数据 + ArrayBuffer material_buf = ArrayBuffer(param.material_buf_hdl); + GMaterial mat = material_buf.Load(material_id); + + // 加载光源数据 + ArrayBuffer light_buf = ArrayBuffer(param.light_buf_hdl); + for (uint i = 0; i < light_count; i++) { + GLight light = light_buf.Load(i); + // ... 光照计算 + } + + // -------- 材质纹理也是 Bindless 的 -------- + // mat.albedo_map_hdl 也是一个 uint handle + float3 albedo = TextureHandle(mat.albedo_map_hdl).Sample2D(uv); + + // ... PBR 计算 + return float4(finalColor, 1.0); +} +``` + +注意看:整个 Shader 中**没有任何** `Texture2D myTexture : register(t0)` 这样的传统绑定。所有资源都通过 `uint handle` 索引访问。Push Constant 只需要传递几个 `uint` 值。 + +--- + +## 10. Vulkan vs D3D12 差异对比 + +MoerEngine 使用条件编译 (`#if VULKAN` / `#elif DXIL`) 来处理两个 API 的差异: + +| 维度 | Vulkan | D3D12 | +|---|---|---| +| **描述符管理** | Descriptor Buffer 扩展,描述符存储为字节数据 | 系统内置 `ResourceDescriptorHeap` | +| **纹理数组声明** | 每种类型 × 格式一个无界数组 `Texture2D arr[];` | 不需要声明,直接 `ResourceDescriptorHeap[idx]` | +| **采样器声明** | `SamplerState gsampler[];` 无界数组 | `SamplerDescriptorHeap[idx]` | +| **缓冲声明** | `ByteAddressBuffer gbuffer[];` 无界数组 | `ResourceDescriptorHeap[idx]` | +| **绑定注解** | `[[vk::binding(binding, set)]]` | Root Signature Flag 控制 | +| **描述符堆封装** | `VKResourceDescriptorHeap` 静态结构体 | `DXResourceDescriptorHeapAccessor` 静态结构体 | +| **写索引(UAV)** | `WriteIndex() = ReadIndex()` (同一个描述符) | `WriteIndex() = ReadIndex() + 1` (写描述符在读描述符之后) | + +尽管底层不同,但上层 API(`TextureHandle`, `ArrayBuffer`)是完全统一的——Shader 开发者不需要关心底层是 Vulkan 还是 D3D12。 + +--- + +## 11. 总结 + +MoerEngine 的 Bindless 系统可以总结为以下要点: + +1. **一层间接寻址**:`g__array_114514_bdls` 间接数组是系统的核心,所有资源访问都从这里开始 +2. **打包索引**:纹理的间接值中打包了纹理描述符索引(24 位)和采样器索引(8 位) +3. **延迟更新**:CPU 端的分配是立即返回的,但描述符的实际写入是延迟到帧开始的 `CreateUpdateCommand()` 中执行 +4. **槽位回收**:使用无锁队列管理空闲槽位,支持安全的延迟回收 +5. **跨 API 统一**:Shader 层提供的 `TextureHandle` / `ArrayBuffer` 接口对 Vulkan 和 D3D12 完全一致 +6. **宏生成**:使用大量 HLSL 宏来自动生成所有纹理类型 × 格式的访问函数,避免手写重复代码 + +这套系统让 MoerEngine 能够在一次 Draw Call 中渲染使用不同纹理的多种材质,极大减少了 CPU 端的绑定开销,是现代高性能渲染引擎的标配架构。 diff --git a/README.en.md b/docs/README.en.md similarity index 100% rename from README.en.md rename to docs/README.en.md From 3a591606ead22fbe7e3e044200fcb0166281303f Mon Sep 17 00:00:00 2001 From: YXHXianYu <2943003@qq.com> Date: Mon, 30 Mar 2026 15:04:17 +0800 Subject: [PATCH 14/17] =?UTF-8?q?feat(raster):=20=E4=BC=98=E5=8C=96RTAO?= =?UTF-8?q?=E5=AF=84=E5=AD=98=E5=99=A8=E5=BC=80=E9=94=80=EF=BC=9A=E7=94=A8?= =?UTF-8?q?Shader=E5=8F=98=E4=BD=93=E4=BB=A3=E6=9B=BFif=EF=BC=8C=E7=AE=80?= =?UTF-8?q?=E5=8C=96=E6=8E=A7=E5=88=B6=E6=B5=81=E3=80=81=E9=87=8D=E6=8E=92?= =?UTF-8?q?=E4=BB=A3=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit name: RTAO Register Optimization overview: 通过代码重排、简化 RayQuery 控制流、消除运行时分支等手段,降低 RTAO shader 的寄存器峰值占用,提升 occupancy,缓解 MIOT stall。 todos: - id: simplify-rayquery content: "优化 1: 简化 CastVisibilityRay — 添加 RAY_FLAG_FORCE_OPAQUE,移除 while 循环和候选处理分支" status: completed - id: reorder-ray-weight content: "优化 2: 重排循环体 — 将 ray_weight 计算移到 CastVisibilityRay 之前,让 rand_vec 提前死亡" status: completed - id: eliminate-branch content: "优化 3: 消除 sample_mode 运行时分支 — 使用编译期方案替代" status: completed - id: blue-noise content: "优化 4: Blue Noise 替代 Hash RNG — 添加纹理参数,改善 BVH cache 命中率" status: completed --- .../postprocess/lighting_effects/Rtao.hlsl | 75 ++++++------------- .../runtime/render/renderer/raster/AoPass.h | 33 ++++++-- .../raster/post_process/ShaderParameters.h | 3 + 3 files changed, 52 insertions(+), 59 deletions(-) diff --git a/shaders/pipelines/postprocess/lighting_effects/Rtao.hlsl b/shaders/pipelines/postprocess/lighting_effects/Rtao.hlsl index feb0965c5..c7ca2da5f 100644 --- a/shaders/pipelines/postprocess/lighting_effects/Rtao.hlsl +++ b/shaders/pipelines/postprocess/lighting_effects/Rtao.hlsl @@ -17,10 +17,9 @@ BINDLESS_BINDINGS(3, 2, 4, 5) // 定义了AoOutput、CameraMotionVector等函�? #include "pipelines/postprocess/lighting_effects/AoCommon.hlsl" -// TODO: 代码整理 -namespace Moer { -typedef Math::Rng::Hash RandomState; -} +// R2 quasi-random sequence offsets (generalized golden ratio for 2D) +// phi_2 = 1.32471795724..., alpha1 = 1/phi_2, alpha2 = 1/phi_2^2 +static const float2 R2_ALPHA = float2(0.7548776662466927, 0.5698402909980532); // y>=0半球上均匀采样 float4 SampleHemisphere(float2 u) { // uv in [0, 1)^2 @@ -50,10 +49,8 @@ float3 LocalVectorToWorld(float3 local_vector, float3 normal) { + local_vector.z * bitangent; } -// TODO: 和RT那边的函数合�? bool CastVisibilityRay(float3 origin, float3 direction, float tmin, float tmax, - RaytracingAccelerationStructure accel, - uint instance_mask, uint ray_flags) { + RaytracingAccelerationStructure accel) { RayDesc ray_desc; ray_desc.Origin = origin; ray_desc.Direction = direction; @@ -61,18 +58,11 @@ bool CastVisibilityRay(float3 origin, float3 direction, float tmin, float tmax, ray_desc.TMax = tmax; RayQuery + RAY_FLAG_ACCEPT_FIRST_HIT_AND_END_SEARCH | + RAY_FLAG_FORCE_OPAQUE> ray_query; - ray_query.TraceRayInline(accel, ray_flags, instance_mask, ray_desc); - - while (ray_query.Proceed()) { - if (ray_query.CandidateType() == CANDIDATE_NON_OPAQUE_TRIANGLE) { - ray_query.CommitNonOpaqueTriangleHit(); - } else { - ray_query.Abort(); - break; - } - } + ray_query.TraceRayInline(accel, RAY_FLAG_NONE, Moer::RTVM_ALL, ray_desc); + ray_query.Proceed(); return ray_query.CommittedStatus() == COMMITTED_NOTHING; } @@ -93,59 +83,38 @@ AoOutput get_rtao(float2 uv) { } frag_normal = Raster::UnpackNormal(frag_normal); - Moer::RandomState rng = Moer::RandomState::Create(uv * param.resolution, param.frame_idx); - float3 frag_position = WorldPosFromDepthTexture(param.depth_tex, uv, param.clip2world); + // Blue noise base value — spatially coherent across neighboring pixels + float2 noise_uv = uv * param.resolution / 256.0; + float2 blue_noise_base = TextureHandle(param.noise_tex).Sample2D(noise_uv); + // Raytraced AO float total_ray_contrib = 0.0; float visible_ray_contrib = 0.0; for (uint i = 0; i < param.spp; i++) { - float2 rand_value = rng.GetFloat2(); - float4 rand_vec; - if (param.sample_mode == 0) { - rand_vec = SampleHemisphere(rand_value); - } else { - rand_vec = SampleCosineHemisphere(rand_value); - } + // Cranley-Patterson rotation with R2 quasi-random sequence for temporal + per-sample variation + float2 rand_value = frac(blue_noise_base + R2_ALPHA * float(param.frame_idx * param.spp + i)); +#if RTAO_COSINE_WEIGHTED + float4 rand_vec = SampleCosineHemisphere(rand_value); +#else + float4 rand_vec = SampleHemisphere(rand_value); +#endif float3 direction = LocalVectorToWorld(rand_vec.xyz, frag_normal); + float ray_weight = max(dot(frag_normal, direction), 0.05f) / max(/* pdf */ rand_vec.w, 0.05f); + bool is_miss = CastVisibilityRay( frag_position + frag_normal * 0.01, direction, 0.f, param.ray_trace_distance, - tlas, - Moer::RTVM_ALL, // instance_mask - RAY_FLAG_NONE // ray_flags + tlas ); - float ray_weight = max(dot(frag_normal, direction), 0.05f) / max(/* pdf */ rand_vec.w, 0.05f); - total_ray_contrib += ray_weight; visible_ray_contrib += ray_weight * (is_miss ? 1.0 : (1.0 - param.intensity)); } - // if (uv.x <= param.inv_resolution.x && uv.y <= param.inv_resolution.y) { - // printf( - // "FragPos (%.2f %.2f %.2f); FragNormal (%.2f %.2f %.2f); RandVa (%.4f %.4f); RandVector (%.4f %.4f %.4f); Direction (%.4f %.4f %.4f); is_sky: %d\n", - // frag_position.x, - // frag_position.y, - // frag_position.z, - // frag_normal.x, - // frag_normal.y, - // frag_normal.z, - // rand_value.x, - // rand_value.y, - // rand_vec.x, - // rand_vec.y, - // rand_vec.z, - // direction.x, - // direction.y, - // direction.z, - // int(is_sky) - // ); - // } - float ao = visible_ray_contrib / total_ray_contrib; AoOutput output; diff --git a/source/runtime/render/renderer/raster/AoPass.h b/source/runtime/render/renderer/raster/AoPass.h index 506d17d2c..d50747471 100644 --- a/source/runtime/render/renderer/raster/AoPass.h +++ b/source/runtime/render/renderer/raster/AoPass.h @@ -28,8 +28,12 @@ class RtaoPipeline : public RasterPipeline { DEFINE_SHADER_BINDLESS_ARRAY(bdls); DEFINE_SHADER_ARGS(tlas, bdls, param); + + MUTATION_BOOL(RTAO_COSINE_WEIGHTED); }; +MUTATION_SET(RtaoSampleModeMacros, RtaoPipeline::RTAO_COSINE_WEIGHTED); + class SsdoPipeline : public RasterPipeline { public: DEFINE_RASTER_PIPELINE_CLASS(SsdoPipeline); @@ -71,10 +75,21 @@ class AoPass { .Pixel("pipelines/postprocess/lighting_effects/Ao.hlsl") .Build(std::move(create_pso_func())); - rtao_pipeline = context.manager.Raster() - .Vertex("core/utils/FullScreenQuad.hlsl") - .Pixel("pipelines/postprocess/lighting_effects/Rtao.hlsl") - .Build(std::move(create_pso_func())); + { + RtaoSampleModeMacros uniform_macros{}; + uniform_macros.SetMutation(false); + rtao_pipeline_uniform = context.manager.Raster() + .Vertex("core/utils/FullScreenQuad.hlsl") + .Pixel("pipelines/postprocess/lighting_effects/Rtao.hlsl", "main", uniform_macros) + .Build(create_pso_func()); + + RtaoSampleModeMacros cosine_macros{}; + cosine_macros.SetMutation(true); + rtao_pipeline_cosine = context.manager.Raster() + .Vertex("core/utils/FullScreenQuad.hlsl") + .Pixel("pipelines/postprocess/lighting_effects/Rtao.hlsl", "main", cosine_macros) + .Build(create_pso_func()); + } ssdo_pipeline = context.manager.Raster() .Vertex("core/utils/FullScreenQuad.hlsl") @@ -192,8 +207,13 @@ class AoPass { UpdateMotionVectorData(context, camera); param.camera_mv_data_handle = camera_mv_data_in_gpu.hdl; + param.noise_tex = context.textures.noise_tex.hdl; + + auto& active_rtao_pipeline = (ui_config.rtao_sample_mode == ERtaoSampleMode::COSINE_WEIGHTED) + ? rtao_pipeline_cosine + : rtao_pipeline_uniform; - context.cmd_list.Gfx(rtao_pipeline, context.rt_scene()->GetTlas(), context.bdls, param) + context.cmd_list.Gfx(active_rtao_pipeline, context.rt_scene()->GetTlas(), context.bdls, param) .Draw( "RTAO Pass", context.textures.ao_output.GetRect2D(), @@ -245,7 +265,8 @@ class AoPass { private: AoPipeline ao_pipeline; - RtaoPipeline rtao_pipeline; + RtaoPipeline rtao_pipeline_uniform; + RtaoPipeline rtao_pipeline_cosine; SsdoPipeline ssdo_pipeline; CameraMotionVectorData camera_mv_data_in_cpu; // mv: motion vector diff --git a/source/runtime/render/shaderheaders/shared/raster/post_process/ShaderParameters.h b/source/runtime/render/shaderheaders/shared/raster/post_process/ShaderParameters.h index efd2cb884..b82eabb3b 100644 --- a/source/runtime/render/shaderheaders/shared/raster/post_process/ShaderParameters.h +++ b/source/runtime/render/shaderheaders/shared/raster/post_process/ShaderParameters.h @@ -88,6 +88,9 @@ struct RtaoPipelineBindlessParam { float intensity; uint camera_mv_data_handle; // for camera motion vector + uint noise_tex; // blue noise texture for spatially coherent sampling + uint padding0; + uint padding1; }; struct RtaoDenoiserPassBindlessParam { From b7fb635fbd4ecd693b472b5d32c0786cb7948d49 Mon Sep 17 00:00:00 2001 From: YXHXianYu <2943003@qq.com> Date: Mon, 30 Mar 2026 18:17:40 +0800 Subject: [PATCH 15/17] =?UTF-8?q?feat(raster):=20=E5=B0=86RTAO=E9=87=8D?= =?UTF-8?q?=E6=9E=84=E4=B8=BAComputeShader=EF=BC=9B=E8=BE=93=E5=87=BA1/2?= =?UTF-8?q?=20DownSample=E7=9A=84AO=E7=BA=B9=E7=90=86=EF=BC=9B=E4=BD=BF?= =?UTF-8?q?=E7=94=A8=E7=8B=AC=E7=AB=8B=E7=9A=84AoCompositePass=E6=9D=A5?= =?UTF-8?q?=E8=9E=8D=E5=90=88AO=E5=92=8CSceneColor=EF=BC=8CAO=E6=9C=AC?= =?UTF-8?q?=E8=BA=AB=E4=B8=8D=E8=BE=93=E5=87=BASceneColor=EF=BC=9B?= =?UTF-8?q?=E4=BC=98=E5=8C=96AoPass=E6=95=B0=E6=8D=AE=E6=B5=81=EF=BC=9B?= =?UTF-8?q?=E5=8E=BB=E9=99=A4SR=E7=9B=B8=E5=85=B3=E4=BB=A3=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitignore | 6 +- .../postprocess/denoise/RtaoDenoiser.hlsl | 20 +- .../postprocess/lighting_effects/Ao.hlsl | 43 +-- .../lighting_effects/AoCommon.hlsl | 23 +- .../lighting_effects/AoComposite.hlsl | 24 ++ .../postprocess/lighting_effects/Rtao.hlsl | 125 ++++--- .../postprocess/lighting_effects/Ssdo.hlsl | 102 +----- source/editor/raster_ui/RasterUI.cpp | 3 + .../runtime/render/renderer/raster/AoPass.h | 173 ++++++---- .../render/renderer/raster/AssetTool.h | 31 +- .../render/renderer/raster/RasterConfig.h | 10 +- .../render/renderer/raster/RasterRenderer.cpp | 22 +- .../render/renderer/raster/RasterRenderer.h | 2 - .../render/renderer/raster/RasterResource.h | 11 +- .../render/renderer/raster/RasterTextures.h | 314 +++++++++--------- .../render/renderer/raster/RtaoDenoiserPass.h | 48 +-- .../render/renderer/raster/UpsamplePass.h | 74 ----- source/runtime/render/rhi/RHIResource.cpp | 4 + source/runtime/render/rhi/RHIResource.h | 2 + .../raster/post_process/ShaderParameters.h | 38 +-- 20 files changed, 460 insertions(+), 615 deletions(-) create mode 100644 shaders/pipelines/postprocess/lighting_effects/AoComposite.hlsl delete mode 100644 source/runtime/render/renderer/raster/UpsamplePass.h diff --git a/.gitignore b/.gitignore index 8d000b922..3a805e1ff 100644 --- a/.gitignore +++ b/.gitignore @@ -20,9 +20,13 @@ justfile EnableFeatures.cmake MoerEngine.toml -# 4. Logs +# 4. Vibe Coding +/.cursor/ + +# 5. Logs logs/ log*.txt +debug-*.log .VSCodeCounter/ rhi_docs/01-overview.md rhi_docs/02-core-abstractions.md diff --git a/shaders/pipelines/postprocess/denoise/RtaoDenoiser.hlsl b/shaders/pipelines/postprocess/denoise/RtaoDenoiser.hlsl index 612a3f7cb..2b66ab445 100644 --- a/shaders/pipelines/postprocess/denoise/RtaoDenoiser.hlsl +++ b/shaders/pipelines/postprocess/denoise/RtaoDenoiser.hlsl @@ -7,12 +7,12 @@ BINDLESS_BINDINGS(3, 2, 4, 5) struct RtaoDenoiserOutput { float accumulate_ao : SV_TARGET0; - float4 color_with_ao : SV_TARGET1; }; void get_reprojected_ao(float2 uv, float curr_ao, out float out_history_ao, out float out_history_weight) { - float2 motion_vector = TextureHandle(param.motion_vector_tex).Sample2D(uv).rg; // value in [-1, 1] (NDC Space) + float2 motion_vector = + TextureHandle(param.motion_vector_tex).Sample2D(uv).rg; // value in [-1, 1] (NDC Space) float2 prev_uv = float2(uv.x - motion_vector.x, uv.y + motion_vector.y); if (prev_uv.x <= 0.0 || prev_uv.x >= 1.0 || prev_uv.y <= 0.0 || prev_uv.y >= 1.0) { prev_uv = uv; @@ -26,11 +26,11 @@ void get_reprojected_ao(float2 uv, float curr_ao, out float out_history_ao, out // validation if (param.is_validation_enable) { - float curr_depth = TextureHandle(param.depth_tex).Sample2D(uv); - float prev_depth = TextureHandle(param.depth_tex).Sample2D(prev_uv); + float curr_depth = TextureHandle(param.depth_tex).Sample2D(uv); + float prev_depth = TextureHandle(param.depth_tex).Sample2D(prev_uv); float3 curr_normal = Raster::UnpackNormal(TextureHandle(param.normal_tex).Sample2D(uv)); float3 prev_normal = Raster::UnpackNormal(TextureHandle(param.normal_tex).Sample2D(prev_uv)); - + if (abs(curr_depth - prev_depth) > curr_depth * param.valid_depth_threshold) { out_history_ao = curr_ao; } @@ -57,7 +57,7 @@ RtaoDenoiserOutput main(float2 uv : TEXCOORD0) { // current info float curr_ao = TextureHandle(param.curr_ao_tex).Sample2D(uv); - float history_ao = 0.0; + float history_ao = 0.0; float history_weight = param.history_ratio; // reprojection @@ -67,14 +67,6 @@ RtaoDenoiserOutput main(float2 uv : TEXCOORD0) { history_ao = TextureHandle(param.history_ao_tex).Sample2D(uv); } - // get result output.accumulate_ao = history_ao * history_weight + curr_ao * (1.0 - history_weight); - - if (param.is_rtao_ao_only) { - output.color_with_ao = output.accumulate_ao; - } else { - float4 color = TextureHandle(param.color_tex).Sample2D(uv); - output.color_with_ao = float4(color.rgb * output.accumulate_ao, 1.0); - } return output; } \ No newline at end of file diff --git a/shaders/pipelines/postprocess/lighting_effects/Ao.hlsl b/shaders/pipelines/postprocess/lighting_effects/Ao.hlsl index 200edcde7..191c8133c 100644 --- a/shaders/pipelines/postprocess/lighting_effects/Ao.hlsl +++ b/shaders/pipelines/postprocess/lighting_effects/Ao.hlsl @@ -14,13 +14,11 @@ BINDLESS_BINDINGS(3, 2, 4, 5) // 定义了AoOutput、CameraMotionVector等函�? #include "pipelines/postprocess/lighting_effects/AoCommon.hlsl" -static const float3 ABNORMAL_COLOR = float3(0.0, 0.0, 1.0); - // Per-pixel hash: 以整数像素坐标为输入,保证相邻像素得到不同的随机值, // 不依赖 noise texture,彻底避免 UV 采样粒度导致的 screen-fixed pattern。 float2 hash22(float2 pixel, uint sample_idx) { float2 p = pixel + float2(float(sample_idx) * 17.0, float(sample_idx) * 31.0); - p = frac(p * float2(443.8975, 397.2973)); + p = frac(p * float2(443.8975, 397.2973)); p += dot(p.xy, p.yx + 19.19); return frac(float2(p.x * p.y, p.x + p.y)); } @@ -30,23 +28,21 @@ float ssao_games202(float2 uv) { float3 normal = Raster::UnpackNormal(TextureHandle(param.normal_tex).Sample2D(uv)); float3 position = WorldPosFromDepthTexture(param.depth_tex, uv, param.clip2world); - // if (uv.x < param.inv_resolution.x && uv.y < param.inv_resolution.y) { - // printf("uv: %f, %f; pos: %f, %f, %f\n", uv.x, uv.y, position.x, position.y, position.z); - // } - float ao = 0.0; float2 tmp1 = param.ssao_radius * param.inv_resolution; float2 pixel = floor(uv / param.inv_resolution); // 整数像素坐标 for (uint i = 0; i < param.ssao_sample_count; i++) { - float2 offset = hash22(pixel, i) * 2.0 - 1.0; - float3 sample_position = WorldPosFromDepthTexture(param.depth_tex, uv + offset * tmp1, param.clip2world); + float2 offset = hash22(pixel, i) * 2.0 - 1.0; + float3 sample_position = + WorldPosFromDepthTexture(param.depth_tex, uv + offset * tmp1, param.clip2world); float3 vec = sample_position - position; float3 len = length(vec); float3 norm_vec = vec / len; - ao += max(0.0, dot(normal, norm_vec) - 0.05) * smoothstep(param.ssao_max_distance, param.ssao_max_distance * 0.5, len); + ao += max(0.0, dot(normal, norm_vec) - 0.05) * + smoothstep(param.ssao_max_distance, param.ssao_max_distance * 0.5, len); } ao = clamp(1.0 - ao / param.ssao_sample_count * param.ssao_intensity, 0.0, 1.0); @@ -59,34 +55,13 @@ float get_ao(float2 uv) { AoOutput main(float2 uv : TEXCOORD0) { AoOutput output; - - float3 color = TextureHandle(param.input_image).Sample2D(uv).rgb; - - - if (param.ao_mode == Moer::EAoMode::NONE) { - output.color_with_ao = float4(color, 1.0); - output.ambient_only = 1.0; - - } else if (param.ao_mode == Moer::EAoMode::SSAO) { - float ao = get_ao(uv); - - output.color_with_ao = float4(color * ao, 1.0); - output.ambient_only = ao; - - } else if (param.ao_mode == Moer::EAoMode::SSAO_AO_ONLY) { - float ao = get_ao(uv); - - output.color_with_ao = float4(ao, ao, ao, 1.0); - output.ambient_only = ao; + if (param.ao_mode == Moer::EAoMode::SSAO || param.ao_mode == Moer::EAoMode::SSAO_AO_ONLY) { + output.ambient_only = get_ao(uv); } else { - output.color_with_ao = float4(ABNORMAL_COLOR, 1.0); - output.ambient_only = 1.0; + output.ambient_only = 1.0; } output.camera_motion_vector = GetCameraMotionVector(uv); - - output.camera_motion_vector = output.camera_motion_vector * 0.5f + output.color_with_ao.xy * 0.5f; - return output; } \ No newline at end of file diff --git a/shaders/pipelines/postprocess/lighting_effects/AoCommon.hlsl b/shaders/pipelines/postprocess/lighting_effects/AoCommon.hlsl index 96de959dd..230f224bf 100644 --- a/shaders/pipelines/postprocess/lighting_effects/AoCommon.hlsl +++ b/shaders/pipelines/postprocess/lighting_effects/AoCommon.hlsl @@ -2,9 +2,8 @@ #include "pipelines/RasterCommon.hlsli" struct AoOutput { - float4 color_with_ao : SV_Target0; - float ambient_only : SV_Target1; - float2 camera_motion_vector : SV_Target2; + float ambient_only : SV_Target0; + float2 camera_motion_vector : SV_Target1; }; // 结果是 NDC Space,目标为当前像素,上一帧指向当前帧 @@ -12,20 +11,20 @@ float2 GetCameraMotionVector(float2 uv) { // camera_mv.world2clip // camera_mv.world2clip_prev // param.depth_tex - + // MARK: Lighting Data - ArrayBuffer camera_mv_data = ArrayBuffer(param.camera_mv_data_handle); - Moer::CameraMotionVectorData camera_mv = camera_mv_data.Load(0); - - float depth = TextureHandle(param.depth_tex).Sample2D(uv); + ArrayBuffer camera_mv_data = ArrayBuffer(param.camera_mv_data_handle); + Moer::CameraMotionVectorData camera_mv = camera_mv_data.Load(0); + + float depth = TextureHandle(param.depth_tex).SampleLevel(uv); if (depth < Epsilon) { return float2(0.0, 0.0); } - - float3 world_pos_vec3 = WorldPosFromDepthTexture(param.depth_tex, uv, param.clip2world); - float4 world_pos = float4(world_pos_vec3, 1.0); - float4 clip_pos = mul(camera_mv.world2clip, world_pos); + float3 world_pos_vec3 = WorldPosFromDepth(depth, uv, param.clip2world); + float4 world_pos = float4(world_pos_vec3, 1.0); + + float4 clip_pos = mul(camera_mv.world2clip, world_pos); float4 clip_pos_prev = mul(camera_mv.world2clip_prev, world_pos); // calculate in NDC space diff --git a/shaders/pipelines/postprocess/lighting_effects/AoComposite.hlsl b/shaders/pipelines/postprocess/lighting_effects/AoComposite.hlsl new file mode 100644 index 000000000..734fbc5a4 --- /dev/null +++ b/shaders/pipelines/postprocess/lighting_effects/AoComposite.hlsl @@ -0,0 +1,24 @@ +#include "core/common/Bindless.hlsl" +#include "core/common/Common.hlsl" +BINDLESS_BINDINGS(3, 2, 4, 5) +#include "shared/ShaderParameters.h" +#include "shared/raster/ShaderParameters.h" + +[[vk::push_constant]] ConstantBuffer param; +[[vk::binding(0, 0)]] RWTexture2D rw_output; + +[numthreads(8, 8, 1)] void main(uint2 pixel_pos : SV_DispatchThreadID) { + if (pixel_pos.x >= uint(param.full_resolution.x) || pixel_pos.y >= uint(param.full_resolution.y)) + return; + + float2 uv = (float2(pixel_pos) + 0.5) * param.inv_full_resolution; + float ao = TextureHandle(param.ao_tex).SampleLevel(uv); + + if (param.ao_mode == Moer::EAoMode::RTAO_AO_ONLY || param.ao_mode == Moer::EAoMode::SSAO_AO_ONLY || + param.ao_mode == Moer::EAoMode::SSDO_AO_ONLY) { + rw_output[pixel_pos] = float4(ao, ao, ao, 1.0); + } else { + float3 color = TextureHandle(param.color_tex).SampleLevel(uv); + rw_output[pixel_pos] = float4(color * ao, 1.0); + } +} diff --git a/shaders/pipelines/postprocess/lighting_effects/Rtao.hlsl b/shaders/pipelines/postprocess/lighting_effects/Rtao.hlsl index c7ca2da5f..5b62c3830 100644 --- a/shaders/pipelines/postprocess/lighting_effects/Rtao.hlsl +++ b/shaders/pipelines/postprocess/lighting_effects/Rtao.hlsl @@ -6,15 +6,13 @@ BINDLESS_BINDINGS(3, 2, 4, 5) #include "core/math/Math.hlsli" -#ifndef DI_BINDING_SLOT -#define DI_BINDING_SLOT 0 -#endif - [[vk::push_constant]] ConstantBuffer param; -[[vk::binding(0, DI_BINDING_SLOT)]] RaytracingAccelerationStructure tlas; +[[vk::binding(0, 0)]] RWTexture2D rw_ao_only; +[[vk::binding(1, 0)]] RWTexture2D rw_camera_mv; +[[vk::binding(2, 0)]] RaytracingAccelerationStructure tlas; -// 定义了AoOutput、CameraMotionVector等函�? +// 定义了AoOutput、CameraMotionVector等函数 #include "pipelines/postprocess/lighting_effects/AoCommon.hlsl" // R2 quasi-random sequence offsets (generalized golden ratio for 2D) @@ -23,74 +21,83 @@ static const float2 R2_ALPHA = float2(0.7548776662466927, 0.5698402909980532); // y>=0半球上均匀采样 float4 SampleHemisphere(float2 u) { // uv in [0, 1)^2 - float y = u.x; // cos theta - float r = sqrt(max(0.f, 1.f - y * y)); + float y = u.x; // cos theta + float r = sqrt(max(0.f, 1.f - y * y)); float phi = PI2 * u.y; return float4(r * cos(phi), y, r * sin(phi), /* pdf */ 1.0 / PI2); } // y>=0半球上cosine-weighted采样(即 半球正面采样概率大,适合漫反射材质) float4 SampleCosineHemisphere(float2 u) { - float r = sqrt(u.x); + float r = sqrt(u.x); float theta = 2.0 * PI * u.y; - float y = sqrt(max(0.0, 1.0 - u.x)); + float y = sqrt(max(0.0, 1.0 - u.x)); return float4(r * cos(theta), y, r * sin(theta), /* pdf */ y / PI); } -// 将一个半球坐标系中的vector转换到以某个特定normal�?z的半球上 (Written by AI) +// 将一个半球坐标系中的vector转换到以某个特定normal为z的半球上 (Written by AI) float3 LocalVectorToWorld(float3 local_vector, float3 normal) { - float3 up = abs(normal.y) < 0.999 ? float3(0, 1, 0) : float3(1, 0, 0); - float3 tangent = normalize(cross(up, normal)); + float3 up = abs(normal.y) < 0.999 ? float3(0, 1, 0) : float3(1, 0, 0); + float3 tangent = normalize(cross(up, normal)); float3 bitangent = cross(normal, tangent); - return local_vector.x * tangent - + local_vector.y * normal - + local_vector.z * bitangent; + return local_vector.x * tangent + local_vector.y * normal + local_vector.z * bitangent; } -bool CastVisibilityRay(float3 origin, float3 direction, float tmin, float tmax, - RaytracingAccelerationStructure accel) { - RayDesc ray_desc; - ray_desc.Origin = origin; - ray_desc.Direction = direction; - ray_desc.TMin = tmin; - ray_desc.TMax = tmax; - - RayQuery - ray_query; - ray_query.TraceRayInline(accel, RAY_FLAG_NONE, Moer::RTVM_ALL, ray_desc); - ray_query.Proceed(); - - return ray_query.CommittedStatus() == COMMITTED_NOTHING; +bool CastVisibilityRay( + float3 origin, + float3 direction, + float tmin, + float tmax, + RaytracingAccelerationStructure accel +) { + RayDesc ray_desc; + ray_desc.Origin = origin; + ray_desc.Direction = direction; + ray_desc.TMin = tmin; + ray_desc.TMax = tmax; + + RayQuery< + RAY_FLAG_SKIP_PROCEDURAL_PRIMITIVES | RAY_FLAG_ACCEPT_FIRST_HIT_AND_END_SEARCH | + RAY_FLAG_FORCE_OPAQUE> + ray_query; + ray_query.TraceRayInline(accel, RAY_FLAG_NONE, Moer::RTVM_ALL, ray_desc); + ray_query.Proceed(); + + return ray_query.CommittedStatus() == COMMITTED_NOTHING; } -AoOutput get_rtao(float2 uv) { +[numthreads(8, 8, 1)] void main(uint2 pixel_pos : SV_DispatchThreadID) { + if (pixel_pos.x >= uint(param.resolution.x) || pixel_pos.y >= uint(param.resolution.y)) + return; // Reference: shaders/hwrt/GBufferUtils.hlsli: SetupPrimaryRay() - float3 color = TextureHandle(param.input_image).Sample2D(uv); - float3 frag_normal = TextureHandle(param.normal_tex).Sample2D(uv); + float2 uv = (float2(pixel_pos) + 0.5) * param.inv_resolution; + + // Snap UV to the full-res depth texel center so that WorldPosFromDepth + // reconstructs along the same view ray the depth was measured on. + float2 snapped_uv = (floor(uv * param.depth_tex_resolution) + 0.5) / param.depth_tex_resolution; + + float3 frag_normal = TextureHandle(param.normal_tex).SampleLevel(snapped_uv); if (abs(frag_normal.x) < Epsilon && abs(frag_normal.y) < Epsilon && abs(frag_normal.z) < Epsilon) { - // direct light is sky - AoOutput output; - output.color_with_ao = float4(color, 1.f); - output.ambient_only = 1.f; - return output; + rw_ao_only[pixel_pos] = 1.f; + rw_camera_mv[pixel_pos] = GetCameraMotionVector(snapped_uv); + return; } frag_normal = Raster::UnpackNormal(frag_normal); - float3 frag_position = WorldPosFromDepthTexture(param.depth_tex, uv, param.clip2world); + float depth = TextureHandle(param.depth_tex).SampleLevel(snapped_uv); + float3 frag_position = WorldPosFromDepth(depth, snapped_uv, param.clip2world); // Blue noise base value — spatially coherent across neighboring pixels - float2 noise_uv = uv * param.resolution / 256.0; - float2 blue_noise_base = TextureHandle(param.noise_tex).Sample2D(noise_uv); + float2 noise_uv = uv * param.resolution / 256.0; + float2 blue_noise_base = TextureHandle(param.noise_tex).SampleLevel(noise_uv); // Raytraced AO - float total_ray_contrib = 0.0; + float total_ray_contrib = 0.0; float visible_ray_contrib = 0.0; for (uint i = 0; i < param.spp; i++) { // Cranley-Patterson rotation with R2 quasi-random sequence for temporal + per-sample variation @@ -100,15 +107,11 @@ AoOutput get_rtao(float2 uv) { #else float4 rand_vec = SampleHemisphere(rand_value); #endif - float3 direction = LocalVectorToWorld(rand_vec.xyz, frag_normal); - float ray_weight = max(dot(frag_normal, direction), 0.05f) / max(/* pdf */ rand_vec.w, 0.05f); + float3 direction = LocalVectorToWorld(rand_vec.xyz, frag_normal); + float ray_weight = max(dot(frag_normal, direction), 0.05f) / max(/* pdf */ rand_vec.w, 0.05f); bool is_miss = CastVisibilityRay( - frag_position + frag_normal * 0.01, - direction, - 0.f, - param.ray_trace_distance, - tlas + frag_position + frag_normal * 0.01, direction, 0.f, param.ray_trace_distance, tlas ); total_ray_contrib += ray_weight; @@ -117,22 +120,6 @@ AoOutput get_rtao(float2 uv) { float ao = visible_ray_contrib / total_ray_contrib; - AoOutput output; - if (param.ao_mode == Moer::EAoMode::RTAO_AO_ONLY) { - output.color_with_ao = float4(ao, ao, ao, 1.0); - } else { - output.color_with_ao = float4(color * ao, 1.0); - } - output.ambient_only = ao; - - return output; + rw_ao_only[pixel_pos] = ao; + rw_camera_mv[pixel_pos] = GetCameraMotionVector(snapped_uv); } - - -AoOutput main(float2 uv : TEXCOORD0) { - AoOutput output = get_rtao(uv); - - output.camera_motion_vector = GetCameraMotionVector(uv); - - return output; -} \ No newline at end of file diff --git a/shaders/pipelines/postprocess/lighting_effects/Ssdo.hlsl b/shaders/pipelines/postprocess/lighting_effects/Ssdo.hlsl index 76bb8c7f3..c890bb905 100644 --- a/shaders/pipelines/postprocess/lighting_effects/Ssdo.hlsl +++ b/shaders/pipelines/postprocess/lighting_effects/Ssdo.hlsl @@ -8,14 +8,8 @@ BINDLESS_BINDINGS(3, 2, 4, 5) #include "pipelines/postprocess/lighting_effects/AoCommon.hlsl" -static const float3 ABNORMAL_COLOR = float3(0.0, 0.0, 1.0); static const float3 DIFFUSE_ALBEDO = float3(0.5, 0.5, 0.5); -// uv in [0, 1]; output in [0, 1] -// float2 random_2to2(float2 uv) { -// return TextureHandle(param.noise_tex).Sample2D(uv).rg; -// } - float random_1to1(float2 seed) { // 使用 sin 和一个大数的小数部分来产生伪随机�? return frac(sin(dot(seed, float2(12.9898, 78.233))) * 43758.5453123); @@ -67,10 +61,10 @@ float3 GetVplIndirectLight(float3 vpl_pos, float3 vpl_normal, float3 shading_pos // 面积�? // vpl_linear_depth ≈ abs(view_z), 透视投影下 clip.w == view_z - float4 vpl_clip = mul(param.world2clip, float4(vpl_pos, 1.0)); + float4 vpl_clip = mul(param.world2clip, float4(vpl_pos, 1.0)); float vpl_linear_depth = abs(vpl_clip.w); - float area_weight = vpl_linear_depth * vpl_linear_depth + 0.0001; - area_weight = min(2, area_weight); //防止过大 + float area_weight = vpl_linear_depth * vpl_linear_depth + 0.0001; + area_weight = min(2, area_weight); //防止过大 // 简单的漫反射间接光 //由于采用了余弦加权采样,这里不用再乘�?shadingCosine �? @@ -140,96 +134,12 @@ float4 GetSsdo(float2 uv) { AoOutput main(float2 uv : TEXCOORD0) { AoOutput output; - float3 color = TextureHandle(param.input_image).Sample2D(uv); output.camera_motion_vector = GetCameraMotionVector(uv); - output.camera_motion_vector = output.camera_motion_vector * 0.5f + output.color_with_ao.xy * 0.5f; - - float4 ssdo_result = GetSsdo(uv); - - if (param.ao_mode == Moer::EAoMode::NONE) { - output.color_with_ao = float4(color, 1.0); - output.ambient_only = 1.0; - } else if (param.ao_mode == Moer::EAoMode::SSDO) { - output.ambient_only = ssdo_result.w; - output.color_with_ao = float4(ssdo_result.xyz, 1.0) + float4(ssdo_result.w * color, 1.0); - } else if (param.ao_mode == Moer::EAoMode::SSDO_AO_ONLY) { - output.ambient_only = ssdo_result.w; - output.color_with_ao = float4(ssdo_result.w, ssdo_result.w, ssdo_result.w, 1.0); - } + // FIXME: 目前AO输出结果只有AO值,SSDO坏掉了,需要重构 + float4 ssdo_result = GetSsdo(uv); + output.ambient_only = ssdo_result.w; return output; } - -//===================magic ssao-based ssdo code=================== - -// float3 get_ssdo_ao_test(float2 uv) { -// float3 normal = Raster::UnpackNormal(TextureHandle(param.normal_tex).Sample2D(uv)); -// float3 position = WorldPosFromDepthTexture(param.depth_tex, uv, param.clip2world); - -// float ao = 0.0; -// float2 tmp1 = param.ssdo_radius * param.inv_resolution; - -// for (uint i = 0; i < param.ssdo_sample_count; i++) { -// float2 offset = random_2to2(uv + 0.093 * float2(i, i)) * 2.0 - 1.0; -// float3 sample_position = WorldPosFromDepthTexture(param.depth_tex, uv + offset * tmp1, param.clip2world); - -// float3 vec = sample_position - position; -// float3 len = length(vec); -// float3 norm_vec = vec / len; - -// ao += max(0.0, dot(normal, norm_vec) - 0.05) * -// smoothstep(param.ssdo_max_distance, param.ssdo_max_distance * 0.5, len); -// } -// ao = clamp(1.0 - ao / param.ssdo_sample_count * param.ssdo_intensity, 0.0, 1.0); - -// return ao; -// } - -// float3 get_ssdo_indirect_test(float2 uv) { -// float3 normal = Raster::UnpackNormal(TextureHandle(param.normal_tex).Sample2D(uv)); -// float3 position = WorldPosFromDepthTexture(param.depth_tex, uv, param.clip2world); - -// float3 indirect_light = float3(0.0, 0.0, 0.0); -// float2 tmp1 = param.ssdo_radius * param.inv_resolution; - -// for (uint i = 0; i < param.ssdo_sample_count; i++) { -// float2 offset = random_2to2(uv + 0.093 * float2(i, i)) * 2.0 - 1.0; -// float3 sample_position = WorldPosFromDepthTexture(param.depth_tex, uv + offset * tmp1, param.clip2world); - -// float3 vec = sample_position - position; -// float3 len = length(vec); -// float3 norm_vec = vec / len; - -// float ao_to_add = max(0.0, dot(normal, norm_vec) - 0.05) * -// smoothstep(param.ssdo_max_distance, param.ssdo_max_distance * 0.5, len); -// indirect_light += ao_to_add * GetVplContribution(uv + offset * tmp1, uv); -// } - -// return indirect_light / (float)param.ssdo_sample_count * 0.01; -// } - -// AoOutput main(float2 uv : TEXCOORD0) { -// AoOutput output; -// float3 color = TextureHandle(param.input_image).Sample2D(uv); - -// output.camera_motion_vector = GetCameraMotionVector(uv); - -// output.camera_motion_vector = output.camera_motion_vector * 0.5f + output.color_with_ao.xy * 0.5f; - -// float ao = get_ssdo_ao_test(uv); - -// if (param.ao_mode == Moer::EAoMode::NONE) { -// output.color_with_ao = float4(color, 1.0); -// output.ambient_only = 1.0; -// } else if (param.ao_mode == Moer::EAoMode::SSDO) { -// output.ambient_only = ao; -// output.color_with_ao = float4(get_ssdo_indirect_test(uv) * ao, 1.0) + float4(ao * color, 1.0); -// } else if (param.ao_mode == Moer::EAoMode::SSDO_AO_ONLY) { -// output.ambient_only = ao; -// output.color_with_ao = float4(ao, ao, ao, 1.0); -// } - -// return output; -// } \ No newline at end of file diff --git a/source/editor/raster_ui/RasterUI.cpp b/source/editor/raster_ui/RasterUI.cpp index e5a0d8c0d..0b02c5e80 100644 --- a/source/editor/raster_ui/RasterUI.cpp +++ b/source/editor/raster_ui/RasterUI.cpp @@ -285,6 +285,9 @@ void RasterUI::ShowConfig() { draw_border(); } + ImGui::Separator(); + ImGui::Checkbox("Half Resolution AO", &m_config.ao_half_resolution); + if (m_config.ao_mode == EAoMode::SSAO || m_config.ao_mode == EAoMode::SSAO_AO_ONLY) { ImGui::SliderFloat("Intensity", &m_config.ssao_intensity, 0.0f, 2.0f); ImGui::SliderFloat("Ray Trace Radius", &m_config.ssao_max_distance, 0.0f, 5.0f); diff --git a/source/runtime/render/renderer/raster/AoPass.h b/source/runtime/render/renderer/raster/AoPass.h index d50747471..46ac19ab5 100644 --- a/source/runtime/render/renderer/raster/AoPass.h +++ b/source/runtime/render/renderer/raster/AoPass.h @@ -19,15 +19,17 @@ class AoPipeline : public RasterPipeline { DEFINE_SHADER_ARGS(bdls, param); }; -class RtaoPipeline : public RasterPipeline { +class RtaoPipeline : public ComputePipeline { public: - DEFINE_RASTER_PIPELINE_CLASS(RtaoPipeline); + DEFINE_COMPUTE_PIPELINE_CLASS(RtaoPipeline); - DEFINE_SHADER_TLAS(tlas); DEFINE_SHADER_CONSTANT_STRUCT(RtaoPipelineBindlessParam, param); + DEFINE_SHADER_TEX(rw_ao_only); + DEFINE_SHADER_TEX(rw_camera_mv); + DEFINE_SHADER_TLAS(tlas); DEFINE_SHADER_BINDLESS_ARRAY(bdls); - DEFINE_SHADER_ARGS(tlas, bdls, param); + DEFINE_SHADER_ARGS(param, rw_ao_only, rw_camera_mv, tlas, bdls); MUTATION_BOOL(RTAO_COSINE_WEIGHTED); }; @@ -42,20 +44,32 @@ class SsdoPipeline : public RasterPipeline { DEFINE_SHADER_ARGS(bdls, param); }; +class AoCompositePipeline : public ComputePipeline { +public: + DEFINE_COMPUTE_PIPELINE_CLASS(AoCompositePipeline); + DEFINE_SHADER_CONSTANT_STRUCT(AoCompositeParam, param); + DEFINE_SHADER_TEX(rw_output); + DEFINE_SHADER_BINDLESS_ARRAY(bdls); + DEFINE_SHADER_ARGS(param, rw_output, bdls); +}; + /** * MARK: AO Pass * * AO Pass will calculate CameraMotionVector simultaneously. - * - * TODO: SSDO Support + * When ao_half_resolution is enabled, all AO computation and denoising runs + * at half resolution, and the result is bilinear-upsampled back to full res. */ class AoPass { public: struct AoPassOutput { - TextureWithHandle ao_with_color; - uint ao_only; - uint ao_only_idx; // 0 or 1 表示ao_only的顺序 - uint camera_motion_vector; + uint ao_only; // bindless hdl, for composite + uint ao_only_idx; // 0 or 1, for denoiser double-buffering + }; + + struct AoTextureSet { + TextureWithHandle ao_only; + TextureWithHandle camera_mv; }; AoPass(RasterContext& context) { @@ -63,8 +77,7 @@ class AoPass { GfxPsoCreateInfo pso_full_screen_info( RHIRasterizeInfo::Preset(), {}, - {RHIColorAttachmentInfo::Preset(context.textures.ao_output.tex->GetFormat()), - RHIColorAttachmentInfo::Preset(context.textures.ao_output_ambient_only.tex->GetFormat()), + {RHIColorAttachmentInfo::Preset(context.textures.ao_output_ambient_only.tex->GetFormat()), RHIColorAttachmentInfo::Preset(context.textures.camera_motion_vector.tex->GetFormat())} ); return pso_full_screen_info; @@ -78,17 +91,15 @@ class AoPass { { RtaoSampleModeMacros uniform_macros{}; uniform_macros.SetMutation(false); - rtao_pipeline_uniform = context.manager.Raster() - .Vertex("core/utils/FullScreenQuad.hlsl") - .Pixel("pipelines/postprocess/lighting_effects/Rtao.hlsl", "main", uniform_macros) - .Build(create_pso_func()); + rtao_pipeline_uniform = context.manager.Compute( + "pipelines/postprocess/lighting_effects/Rtao.hlsl", uniform_macros + ); RtaoSampleModeMacros cosine_macros{}; cosine_macros.SetMutation(true); - rtao_pipeline_cosine = context.manager.Raster() - .Vertex("core/utils/FullScreenQuad.hlsl") - .Pixel("pipelines/postprocess/lighting_effects/Rtao.hlsl", "main", cosine_macros) - .Build(create_pso_func()); + rtao_pipeline_cosine = context.manager.Compute( + "pipelines/postprocess/lighting_effects/Rtao.hlsl", cosine_macros + ); } ssdo_pipeline = context.manager.Raster() @@ -96,6 +107,10 @@ class AoPass { .Pixel("pipelines/postprocess/lighting_effects/Ssdo.hlsl") .Build(std::move(create_pso_func())); + ao_composite_pipeline = context.manager.Compute( + "pipelines/postprocess/lighting_effects/AoComposite.hlsl" + ); + CreateMotionVectorData(context); } @@ -123,46 +138,69 @@ class AoPass { AoPassOutput Process(RasterContext& context, const RasterConfig& ui_config, const Camera& camera, uint64 frame_idx) { - TextureWithHandle ao_only = context.textures.ao_output_ambient_only; - static uint ao_only_idx = 0; + const bool half_res = ui_config.ao_half_resolution; + + TextureWithHandle ao_only_full = context.textures.ao_output_ambient_only; + TextureWithHandle ao_only_half = context.textures.ao_output_ambient_only_half; + static uint ao_only_idx = 0; ao_only_idx ^= 1; if (ao_only_idx) { - ao_only = context.textures.ao_output_ambient_only_1; + ao_only_full = context.textures.ao_output_ambient_only_1; + ao_only_half = context.textures.ao_output_ambient_only_1_half; + } + + AoTextureSet tex_set; + if (half_res) { + tex_set.ao_only = ao_only_half; + tex_set.camera_mv = context.textures.camera_motion_vector_half; + } else { + tex_set.ao_only = ao_only_full; + tex_set.camera_mv = context.textures.camera_motion_vector; } if (ui_config.ao_mode == EAoMode::RTAO || ui_config.ao_mode == EAoMode::RTAO_AO_ONLY) { - ProcessRtao(context, ui_config, camera, frame_idx, ao_only); + ProcessRtao(context, ui_config, camera, frame_idx, tex_set); } else if (ui_config.ao_mode == EAoMode::SSDO || ui_config.ao_mode == EAoMode::SSDO_AO_ONLY) { - ProcessSsdo(context, ui_config, camera, frame_idx, ao_only); + ProcessSsdo(context, ui_config, camera, frame_idx, tex_set); } else { - ProcessAo(context, ui_config, camera, frame_idx, ao_only); + ProcessAo(context, ui_config, camera, frame_idx, tex_set); } return AoPassOutput{ - .ao_with_color = context.textures.ao_output, // - .ao_only = ao_only.hdl, // - .ao_only_idx = ao_only_idx, // - .camera_motion_vector = context.textures.camera_motion_vector.hdl, // + .ao_only = (half_res ? ao_only_half : ao_only_full).hdl, + .ao_only_idx = ao_only_idx, }; } + void CompositeAo(RasterContext& context, const RasterConfig& ui_config, uint ao_only_hdl) { + AoCompositeParam param; + param.ao_tex = ao_only_hdl; + param.color_tex = context.textures.lighting_output.hdl; + param.ao_mode = static_cast(ui_config.ao_mode); + param.full_resolution = float2(context.textures.ao_output.GetSize()); + param.inv_full_resolution = float2(1.0f) / param.full_resolution; + + uint2 res = uint2(context.textures.ao_output.GetSize()); + context.cmd_list.Compute(ao_composite_pipeline, param, context.textures.ao_output.tex, context.bdls) + .Dispatch(uint3((res.x + 7u) / 8u, (res.y + 7u) / 8u, 1), "AO Composite Pass"); + } + void ProcessAo( RasterContext& context, const RasterConfig& ui_config, const Camera& camera, uint64 frame_idx, - TextureWithHandle ao_only + AoTextureSet tex_set ) { AoPipelineBindlessParam param; param.clip2world = Transpose(camera.GetViewProjectionMatrixInv()); - param.inv_resolution = float2(1.0f) / float2(context.textures.ao_output.GetSize()); + param.inv_resolution = float2(1.0f) / float2(tex_set.ao_only.GetSize()); param.ssao_intensity = ui_config.ssao_intensity; param.ssao_max_distance = ui_config.ssao_max_distance; param.ssao_sample_count = ui_config.ssao_spp; param.ssao_radius = ui_config.ssao_sample_radius; param.ao_mode = static_cast(ui_config.ao_mode); - param.input_image = context.textures.lighting_output.hdl; param.normal_tex = context.textures.normal.hdl; param.depth_tex = context.textures.depth_linear_sampler.hdl; param.noise_tex = context.textures.noise_tex.hdl; @@ -173,11 +211,10 @@ class AoPass { context.cmd_list.Gfx(ao_pipeline, context.bdls, param) .Draw( "AO Pass", - context.textures.ao_output.GetRect2D(), + tex_set.ao_only.GetRect2D(), std::move(RasterTool::GetFullScreenDrawDatas()), - ColorAttachment(context.textures.ao_output.tex), - ColorAttachment(ao_only.tex), - ColorAttachment(context.textures.camera_motion_vector.tex) + ColorAttachment(tex_set.ao_only.tex), + ColorAttachment(tex_set.camera_mv.tex) ); } @@ -186,42 +223,40 @@ class AoPass { const RasterConfig& ui_config, const Camera& camera, uint64 frame_idx, - TextureWithHandle ao_only + AoTextureSet tex_set ) { - RtaoPipelineBindlessParam param; param.clip2world = Transpose(camera.GetViewProjectionMatrixInv()); - param.camera_pos = camera.GetPosition(); param.frame_idx = frame_idx; - param.resolution = float2(context.textures.ao_output.GetSize()); - param.inv_resolution = float2(1.0) / float2(context.textures.ao_output.GetSize()); - param.input_image = context.textures.lighting_output.hdl; param.normal_tex = context.textures.normal.hdl; param.depth_tex = context.textures.depth_nearest_sampler.hdl; - param.ao_mode = static_cast(ui_config.ao_mode); - param.sample_mode = static_cast(ui_config.rtao_sample_mode); param.spp = ui_config.rtao_spp; + param.resolution = float2(tex_set.ao_only.GetSize()); + param.inv_resolution = float2(1.0) / param.resolution; param.ray_trace_distance = ui_config.rtao_ray_trace_distance; param.intensity = ui_config.rtao_intensity; UpdateMotionVectorData(context, camera); param.camera_mv_data_handle = camera_mv_data_in_gpu.hdl; param.noise_tex = context.textures.noise_tex.hdl; - - auto& active_rtao_pipeline = (ui_config.rtao_sample_mode == ERtaoSampleMode::COSINE_WEIGHTED) - ? rtao_pipeline_cosine - : rtao_pipeline_uniform; - - context.cmd_list.Gfx(active_rtao_pipeline, context.rt_scene()->GetTlas(), context.bdls, param) - .Draw( - "RTAO Pass", - context.textures.ao_output.GetRect2D(), - std::move(RasterTool::GetFullScreenDrawDatas()), - ColorAttachment(context.textures.ao_output.tex), - ColorAttachment(ao_only.tex), - ColorAttachment(context.textures.camera_motion_vector.tex) - ); + param.depth_tex_resolution = float2(context.textures.depth_nearest_sampler.GetSize()); + + auto& active_rtao_pipeline = (ui_config.rtao_sample_mode == ERtaoSampleMode::COSINE_WEIGHTED) ? + rtao_pipeline_cosine : + rtao_pipeline_uniform; + + uint2 res = uint2(tex_set.ao_only.GetSize()); + context.cmd_list + .Compute( + active_rtao_pipeline, + param, + tex_set.ao_only.tex, + tex_set.camera_mv.tex, + context.rt_scene()->GetTlas(), + context.bdls + ) + .Dispatch(uint3((res.x + 7u) / 8u, (res.y + 7u) / 8u, 1), "RTAO Compute Pass"); } void ProcessSsdo( @@ -229,12 +264,12 @@ class AoPass { const RasterConfig& ui_config, const Camera& camera, uint64 frame_idx, - TextureWithHandle ao_only + AoTextureSet tex_set ) { SsdoPipelineBindlessParam param; param.clip2world = Transpose(camera.GetViewProjectionMatrixInv()); - param.inv_resolution = float2(1.0f) / float2(context.textures.ao_output.GetSize()); + param.inv_resolution = float2(1.0f) / float2(tex_set.ao_only.GetSize()); param.ssdo_sample_count = ui_config.ssao_spp; param.ssdo_radius = ui_config.ssdo_sample_radius; param.ssdo_max_distance = ui_config.ssdo_max_distance; @@ -255,19 +290,19 @@ class AoPass { context.cmd_list.Gfx(ssdo_pipeline, context.bdls, param) .Draw( "SSDO Pass", - context.textures.ao_output.GetRect2D(), + tex_set.ao_only.GetRect2D(), std::move(RasterTool::GetFullScreenDrawDatas()), - ColorAttachment(context.textures.ao_output.tex), - ColorAttachment(ao_only.tex), - ColorAttachment(context.textures.camera_motion_vector.tex) + ColorAttachment(tex_set.ao_only.tex), + ColorAttachment(tex_set.camera_mv.tex) ); } private: - AoPipeline ao_pipeline; - RtaoPipeline rtao_pipeline_uniform; - RtaoPipeline rtao_pipeline_cosine; - SsdoPipeline ssdo_pipeline; + AoPipeline ao_pipeline; + RtaoPipeline rtao_pipeline_uniform; + RtaoPipeline rtao_pipeline_cosine; + SsdoPipeline ssdo_pipeline; + AoCompositePipeline ao_composite_pipeline; CameraMotionVectorData camera_mv_data_in_cpu; // mv: motion vector BufferWithHandle camera_mv_data_in_gpu; // mv: motion vector diff --git a/source/runtime/render/renderer/raster/AssetTool.h b/source/runtime/render/renderer/raster/AssetTool.h index b432a0d7f..8af387af6 100644 --- a/source/runtime/render/renderer/raster/AssetTool.h +++ b/source/runtime/render/renderer/raster/AssetTool.h @@ -36,7 +36,7 @@ struct TexConfig { void* alias_ptr = nullptr; bool is_asset = false; bool b_create_mip_views = false; - bool b_super_resolution = false; + bool b_downsampled = false; //为Depth设计,共用一张纹理 template @@ -107,8 +107,8 @@ struct TexConfig { return *this; } - TexConfig& SR(bool b) { - b_super_resolution = b; + TexConfig& DownSampled(bool b = true) { + b_downsampled = b; return *this; } @@ -237,11 +237,7 @@ class AssetTool { cfg.dim = ETextureDimension::TEX_2D; target.tex = device.CreateDepthBuffer( - name, - (cfg.b_super_resolution ? Extent2D(size.x / 2, size.y / 2) : Extent2D(size.x, size.y)), - cfg.format, - 1, - cfg.usage + name, Extent2D(size.x, size.y), cfg.format, 1, cfg.usage ); } else if constexpr (std::is_same_v) { cfg.type = TexType::TEX_TYPE_CUBE; @@ -249,34 +245,21 @@ class AssetTool { cfg.size = {0, 0, 6}; target.tex = device.CreateCubeMap( - name, - (cfg.b_super_resolution ? Extent2D(size.x / 2, size.y / 2) : Extent2D(size.x, size.y)), - cfg.format, - cfg.usage, - cfg.mip_cnt + name, Extent2D(size.x, size.y), cfg.format, cfg.usage, cfg.mip_cnt ); } else if constexpr (std::is_same_v) { cfg.type = TexType::TEX_TYPE_2D; cfg.dim = ETextureDimension::TEX_2D; target.tex = device.CreateTexture( - name, - (cfg.b_super_resolution ? Extent2D(size.x / 2, size.y / 2) : Extent2D(size.x, size.y)), - cfg.format, - cfg.usage, - cfg.mip_cnt + name, Extent2D(size.x, size.y), cfg.format, cfg.usage, cfg.mip_cnt ); } else { static_assert(always_false, "Unsupported Tex IntentTag"); } if (is_verbose) { - LOG_DEBUG( - "tex {}, size {} x {}", - name, - (cfg.b_super_resolution ? size.x / 2 : size.x), - (cfg.b_super_resolution ? size.y / 2 : size.y) - ); + LOG_DEBUG("tex {}, size {} x {}", name, size.x, size.y); } } } diff --git a/source/runtime/render/renderer/raster/RasterConfig.h b/source/runtime/render/renderer/raster/RasterConfig.h index 7e13a5579..758d93446 100644 --- a/source/runtime/render/renderer/raster/RasterConfig.h +++ b/source/runtime/render/renderer/raster/RasterConfig.h @@ -171,10 +171,12 @@ struct RasterConfig { // MARK: AO EAoMode ao_mode = EAoMode::RTAO; - float ssao_intensity = 1.0f; - int ssao_spp = 16; - int ssao_sample_radius = 16; - float ssao_max_distance = 1.0f; + bool ao_half_resolution = true; + + float ssao_intensity = 1.0f; + int ssao_spp = 16; + int ssao_sample_radius = 16; + float ssao_max_distance = 1.0f; ERtaoSampleMode rtao_sample_mode = ERtaoSampleMode::COSINE_WEIGHTED; float rtao_intensity = 1.0f; diff --git a/source/runtime/render/renderer/raster/RasterRenderer.cpp b/source/runtime/render/renderer/raster/RasterRenderer.cpp index bbfa8960e..93f760223 100644 --- a/source/runtime/render/renderer/raster/RasterRenderer.cpp +++ b/source/runtime/render/renderer/raster/RasterRenderer.cpp @@ -22,7 +22,6 @@ #if WITH_CUDA #include "CudaPass.h" #include "TensorRTPass.h" -#include "UpsamplePass.h" #endif namespace Moer::Render::Raster { @@ -71,7 +70,6 @@ RasterRenderer::RasterRenderer( raster_context.textures.camera_motion_vector.tex, raster_context.textures.ao_output_ambient_only_1.tex ); - upsample_pass = MakeUnique(raster_context); #endif cmd_list.UpdateBindlessArray(bindless_array); @@ -274,18 +272,17 @@ bool RasterRenderer::RunSingle(const SharedPtr editor_config, cons // Post Process Passes // - Ambient Occlusion - auto ao_result = ao_pass->Process(raster_context, raster_config, camera, time); - TextureWithHandle processing_image = ao_result.ao_with_color; - uint ao_only_idx = ao_result.ao_only_idx; + auto ao_result = ao_pass->Process(raster_context, raster_config, camera, time); - rtao_denoiser_pass->ProcessInPlace(raster_context, raster_config, ao_only_idx); + rtao_denoiser_pass->ProcessInPlace(raster_context, raster_config, ao_result.ao_only_idx); + ao_pass->CompositeAo(raster_context, raster_config, ao_result.ao_only); + + TextureWithHandle processing_image = raster_context.textures.ao_output; // - CUDA Pass #if WITH_CUDA if (raster_config.ai_is_cuda_enabled) { - processing_image = tensor_rt_pass->Process( - raster_context, raster_config, ao_only_idx - ); //如果开启了该Pass,Ao结果会被替换成TensorRT的结果(在纹理context.textures.lighting_output上执行),后续在该纹理上处理。否则,在纹理ao_with_color上处理 + processing_image = tensor_rt_pass->Process(raster_context, raster_config, ao_result.ao_only_idx); } #endif @@ -298,12 +295,7 @@ bool RasterRenderer::RunSingle(const SharedPtr editor_config, cons // - Anti-aliasing processing_image = aa_pass->Process(raster_context, raster_config, camera, processing_image); -#if WITH_CUDA && SUPER_RESOLUTION_ENABLED - // - Upsample Pass - processing_image = upsample_pass->Process(raster_context, raster_config, processing_image); -#endif - - //Bloom Pass + // - Bloom Pass processing_image = bloom_pass->Process(raster_context, raster_config, processing_image); // - Tonemapping Pass diff --git a/source/runtime/render/renderer/raster/RasterRenderer.h b/source/runtime/render/renderer/raster/RasterRenderer.h index e192a2540..aa994a9c1 100644 --- a/source/runtime/render/renderer/raster/RasterRenderer.h +++ b/source/runtime/render/renderer/raster/RasterRenderer.h @@ -22,7 +22,6 @@ class TonemappingPass; #if WITH_CUDA class CudaPass; class TensorRTPass; -class UpsamplePass; #endif /** @@ -80,7 +79,6 @@ class RENDER_API RasterRenderer : public Renderer { #if WITH_CUDA UniquePtr cuda_pass; UniquePtr tensor_rt_pass; - UniquePtr upsample_pass; #endif // Other vars diff --git a/source/runtime/render/renderer/raster/RasterResource.h b/source/runtime/render/renderer/raster/RasterResource.h index 78567eedb..5441a41a5 100644 --- a/source/runtime/render/renderer/raster/RasterResource.h +++ b/source/runtime/render/renderer/raster/RasterResource.h @@ -42,16 +42,7 @@ struct RasterContext { float frame_time; - // 超分Pass前的分辨率 - uint2 GetResolutionBeforeSR() { -#if WITH_CUDA && SUPER_RESOLUTION_ENABLED - return uint2(resolution.x / 2.0f, resolution.y / 2.0f); -#else - return uint2(resolution.x, resolution.y); -#endif - } - // 超分Pass后的分辨率(原始分辨率) - uint2 GetResolutionOriginal() { + uint2 GetResolution() { return uint2(resolution.x, resolution.y); } diff --git a/source/runtime/render/renderer/raster/RasterTextures.h b/source/runtime/render/renderer/raster/RasterTextures.h index 3eec2fd9e..a9d28b63e 100644 --- a/source/runtime/render/renderer/raster/RasterTextures.h +++ b/source/runtime/render/renderer/raster/RasterTextures.h @@ -13,183 +13,186 @@ namespace Moer::Render::Raster { #define CUSTOMIZED_SIZE(x, y) Extent2D(x, y) -// 启用超分 -// #define SUPER_RESOLUTION_ENABLED WITH_CUDA -// 关闭超分 -#define SUPER_RESOLUTION_ENABLED 0 - -#if WITH_CUDA && SUPER_RESOLUTION_ENABLED -// 超分标记 -#define SR_TAG_true true -#define SR_TAG_false false -#else -// 超分标记 -#define SR_TAG_true false -#define SR_TAG_false false -#endif - -#define RASTER_TEXTURES_TABLE_CONFIG \ - X(TexHandle, vbuffer, Tex2DTag, TexConfig::Default(PF_R32_UINT).Usage(E_SAMPLED_COLOR).SR(SR_TAG_true)) \ - X(TexHandle, \ - normal, \ - Tex2DTag, \ - TexConfig::Default(PF_A2R10G10B10_UNORM_PACK32).Usage(E_SAMPLED_COLOR).SR(SR_TAG_true)) \ - X(TexHandle, \ - tangent, \ - Tex2DTag, \ - TexConfig::Default(PF_A2R10G10B10_UNORM_PACK32).Usage(E_SAMPLED_COLOR).SR(SR_TAG_true)) \ - X(TexHandle, uv, Tex2DTag, TexConfig::Default(PF_R32G32_SFLOAT).Usage(E_SAMPLED_COLOR).SR(SR_TAG_true)) \ - X(TexHandle, \ - shadow_mask, \ - Tex2DTag, \ - TexConfig::Default(PF_R8_UNORM).Usage(E_SAMPLED_COLOR).SR(SR_TAG_true)) \ - X(TexHandle, \ - lighting_output, \ - Tex2DTag, \ - TexConfig::Default(PF_R16G16B16A16_SFLOAT).Usage(E_SAMPLED_COLOR).SR(SR_TAG_true)) \ - X(TexHandle, \ - ao_output, \ - Tex2DTag, \ - TexConfig::Default(PF_R16G16B16A16_SFLOAT).Usage(E_SAMPLED_COLOR).SR(SR_TAG_true)) \ - X(TexHandle, \ - ao_output_ambient_only, \ - Tex2DTag, \ - TexConfig::Default(PF_R8_UNORM).Usage(E_SAMPLED_COLOR).SR(SR_TAG_true)) \ - X(TexHandle, \ - ao_output_ambient_only_1, \ - Tex2DTag, \ - TexConfig::Default(PF_R8_UNORM).Usage(E_SAMPLED_COLOR).SR(SR_TAG_true)) \ - X(TexHandle, \ - ao_denoiser_accumulate, \ - Tex2DTag, \ - TexConfig::Default(PF_R8_UNORM).Usage(E_SAMPLED_COLOR).SR(SR_TAG_true)) \ - X(TexHandle, \ - ao_denoiser_accumulate_1, \ - Tex2DTag, \ - TexConfig::Default(PF_R8_UNORM).Usage(E_SAMPLED_COLOR).SR(SR_TAG_true)) \ - X(TexHandle, \ - camera_motion_vector, \ - Tex2DTag, \ - TexConfig::Default(PF_R16G16B16A16_SFLOAT).Usage(E_SAMPLED_COLOR).SR(SR_TAG_true)) \ - X(TexHandle, \ - denoiser_output, \ - Tex2DTag, \ - TexConfig::Default(PF_R16G16B16A16_SFLOAT).Usage(E_SAMPLED_COLOR).SR(SR_TAG_true)) \ - X(TexHandle, \ - upsample_output, \ - Tex2DTag, \ - TexConfig::Default(PF_R16G16B16A16_SFLOAT).Usage(E_SAMPLED_COLOR)) \ - X(TexHandle, ssr_output, Tex2DTag, TexConfig::Default(PF_R16G16B16A16_SFLOAT).Usage(E_SAMPLED_COLOR)) \ - X(TexHandle, aa_texture_1, Tex2DTag, TexConfig::Default(PF_R16G16B16A16_SFLOAT).Usage(E_SAMPLED_COLOR)) \ - X(TexHandle, aa_texture_2, Tex2DTag, TexConfig::Default(PF_R16G16B16A16_SFLOAT).Usage(E_SAMPLED_COLOR)) \ - X(TexHandle, aa_texture_3, Tex2DTag, TexConfig::Default(PF_R16G16B16A16_SFLOAT).Usage(E_SAMPLED_COLOR)) \ - X(TexHandle, aa_texture_4, Tex2DTag, TexConfig::Default(PF_R16G16B16A16_SFLOAT).Usage(E_SAMPLED_COLOR)) \ - X(TexHandle, aa_output, Tex2DTag, TexConfig::Default(PF_R16G16B16A16_SFLOAT).Usage(E_SAMPLED_COLOR)) \ - X(TexHandle, \ - bloom_downsample_chain, \ - Tex2DTag, \ - TexConfig::Default(PF_B10G11R11_UFLOAT_PACK32) \ - .Usage(E_SAMPLED_COLOR) \ - .Mips(6) \ - .IndivisualMips() \ - .SamplerConfig(SF_LINEAR, SAM_CLAMP_TO_EDGE)) \ - X(TexHandle, \ - bloom_upsample_chain, \ - Tex2DTag, \ - TexConfig::Default(PF_B10G11R11_UFLOAT_PACK32) \ - .Usage(E_SAMPLED_COLOR) \ - .Mips(6) \ - .IndivisualMips() \ - .SamplerConfig(SF_LINEAR, SAM_CLAMP_TO_EDGE)) \ - X(TexHandle, tonemapping_output, Tex2DTag, TexConfig::Default(PF_R8G8B8A8_UNORM).Usage(E_SAMPLED_COLOR)) \ - X(TexHandle, ui_frame_buffer, Tex2DTag, TexConfig::Default(PF_R8G8B8A8_UNORM).Usage(E_SAMPLED_COLOR)) \ - X(TexHandle, output, Tex2DTag, TexConfig::Default(PF_R8G8B8A8_SRGB).Usage(E_C_ATTACH)) \ - X(DepthBufferWithHandle, \ - depth_linear_sampler, \ - TexDepthTag, \ - TexConfig::Default(WITH_CUDA ? PF_D32_SFLOAT : PF_D32_SFLOAT_S8_UINT) \ - .Usage(E_S_DEPTH) \ - .SR(SR_TAG_true) \ - .SamplerConfig(SF_LINEAR, SAM_CLAMP_TO_EDGE)) \ - X(DepthBufferWithHandle, \ - depth_nearest_sampler, \ - TexDepthTag, \ - TexConfig::Default(WITH_CUDA ? PF_D32_SFLOAT : PF_D32_SFLOAT_S8_UINT) \ - .Usage(E_S_DEPTH) \ - .SR(SR_TAG_true) \ - .SamplerConfig(SF_NEAREST, SAM_CLAMP_TO_EDGE) \ - .From(depth_linear_sampler)) \ - X(TexHandle, \ - noise_tex, \ - Tex2DTag, \ - TexConfig::Asset("noise_256x256.png") \ - .Format(PF_R8G8B8A8_UNORM) \ - .Usage(E_S_TRANSFER) \ - .SamplerConfig(SF_LINEAR, SAM_REPEAT)) \ - X(TexHandle, \ - lut_ggx_emu, \ - Tex2DTag, \ - TexConfig::Asset("LUT/GGX_E_LUT.png") \ - .Format(PF_R8G8B8A8_UNORM) \ - .Usage(E_S_TRANSFER) \ - .SamplerConfig(SF_LINEAR, SAM_REPEAT)) \ - X(TexHandle, \ - lut_ggx_eavg, \ - Tex2DTag, \ - TexConfig::Asset("LUT/GGX_Eavg_LUT.png") \ - .Format(PF_R8G8B8A8_UNORM) \ - .Usage(E_S_TRANSFER) \ - .SamplerConfig(SF_LINEAR, SAM_REPEAT)) \ - X(TexHandle, \ - cubemap_tex, \ - TexCubeTag, \ - TexConfig::Asset("Skybox/WaterScene") \ - .Format(PF_R8G8B8A8_UNORM) \ - .Usage(E_S_TRANSFER) \ +// Full-resolution only textures +#define RASTER_TEXTURES_TABLE \ + X(TexHandle, vbuffer, Tex2DTag, TexConfig::Default(PF_R32_UINT).Usage(E_SAMPLED_COLOR)) \ + X(TexHandle, normal, Tex2DTag, TexConfig::Default(PF_A2R10G10B10_UNORM_PACK32).Usage(E_SAMPLED_COLOR)) \ + X(TexHandle, tangent, Tex2DTag, TexConfig::Default(PF_A2R10G10B10_UNORM_PACK32).Usage(E_SAMPLED_COLOR)) \ + X(TexHandle, uv, Tex2DTag, TexConfig::Default(PF_R32G32_SFLOAT).Usage(E_SAMPLED_COLOR)) \ + X(TexHandle, shadow_mask, Tex2DTag, TexConfig::Default(PF_R8_UNORM).Usage(E_SAMPLED_COLOR)) \ + X(TexHandle, lighting_output, Tex2DTag, TexConfig::Default(PF_R16G16B16A16_SFLOAT).Usage(E_SAMPLED_COLOR)) \ + X(TexHandle, \ + ao_output, \ + Tex2DTag, \ + TexConfig::Default(PF_R16G16B16A16_SFLOAT) \ + .Usage(E_SAMPLED_COLOR | ETextureUsageFlags::UNORDERED_ACCESS)) \ + X(TexHandle, denoiser_output, Tex2DTag, TexConfig::Default(PF_R16G16B16A16_SFLOAT).Usage(E_SAMPLED_COLOR)) \ + X(TexHandle, upsample_output, Tex2DTag, TexConfig::Default(PF_R16G16B16A16_SFLOAT).Usage(E_SAMPLED_COLOR)) \ + X(TexHandle, ssr_output, Tex2DTag, TexConfig::Default(PF_R16G16B16A16_SFLOAT).Usage(E_SAMPLED_COLOR)) \ + X(TexHandle, aa_texture_1, Tex2DTag, TexConfig::Default(PF_R16G16B16A16_SFLOAT).Usage(E_SAMPLED_COLOR)) \ + X(TexHandle, aa_texture_2, Tex2DTag, TexConfig::Default(PF_R16G16B16A16_SFLOAT).Usage(E_SAMPLED_COLOR)) \ + X(TexHandle, aa_texture_3, Tex2DTag, TexConfig::Default(PF_R16G16B16A16_SFLOAT).Usage(E_SAMPLED_COLOR)) \ + X(TexHandle, aa_texture_4, Tex2DTag, TexConfig::Default(PF_R16G16B16A16_SFLOAT).Usage(E_SAMPLED_COLOR)) \ + X(TexHandle, aa_output, Tex2DTag, TexConfig::Default(PF_R16G16B16A16_SFLOAT).Usage(E_SAMPLED_COLOR)) \ + X(TexHandle, \ + bloom_downsample_chain, \ + Tex2DTag, \ + TexConfig::Default(PF_B10G11R11_UFLOAT_PACK32) \ + .Usage(E_SAMPLED_COLOR) \ + .Mips(6) \ + .IndivisualMips() \ + .SamplerConfig(SF_LINEAR, SAM_CLAMP_TO_EDGE)) \ + X(TexHandle, \ + bloom_upsample_chain, \ + Tex2DTag, \ + TexConfig::Default(PF_B10G11R11_UFLOAT_PACK32) \ + .Usage(E_SAMPLED_COLOR) \ + .Mips(6) \ + .IndivisualMips() \ + .SamplerConfig(SF_LINEAR, SAM_CLAMP_TO_EDGE)) \ + X(TexHandle, tonemapping_output, Tex2DTag, TexConfig::Default(PF_R8G8B8A8_UNORM).Usage(E_SAMPLED_COLOR)) \ + X(TexHandle, ui_frame_buffer, Tex2DTag, TexConfig::Default(PF_R8G8B8A8_UNORM).Usage(E_SAMPLED_COLOR)) \ + X(TexHandle, output, Tex2DTag, TexConfig::Default(PF_R8G8B8A8_SRGB).Usage(E_C_ATTACH)) \ + X(DepthBufferWithHandle, \ + depth_linear_sampler, \ + TexDepthTag, \ + TexConfig::Default(WITH_CUDA ? PF_D32_SFLOAT : PF_D32_SFLOAT_S8_UINT) \ + .Usage(E_S_DEPTH) \ + .SamplerConfig(SF_LINEAR, SAM_CLAMP_TO_EDGE)) \ + X(DepthBufferWithHandle, \ + depth_nearest_sampler, \ + TexDepthTag, \ + TexConfig::Default(WITH_CUDA ? PF_D32_SFLOAT : PF_D32_SFLOAT_S8_UINT) \ + .Usage(E_S_DEPTH) \ + .SamplerConfig(SF_NEAREST, SAM_CLAMP_TO_EDGE) \ + .From(depth_linear_sampler)) \ + X(TexHandle, \ + noise_tex, \ + Tex2DTag, \ + TexConfig::Asset("noise_256x256.png") \ + .Format(PF_R8G8B8A8_UNORM) \ + .Usage(E_S_TRANSFER) \ + .SamplerConfig(SF_LINEAR, SAM_REPEAT)) \ + X(TexHandle, \ + lut_ggx_emu, \ + Tex2DTag, \ + TexConfig::Asset("LUT/GGX_E_LUT.png") \ + .Format(PF_R8G8B8A8_UNORM) \ + .Usage(E_S_TRANSFER) \ + .SamplerConfig(SF_LINEAR, SAM_REPEAT)) \ + X(TexHandle, \ + lut_ggx_eavg, \ + Tex2DTag, \ + TexConfig::Asset("LUT/GGX_Eavg_LUT.png") \ + .Format(PF_R8G8B8A8_UNORM) \ + .Usage(E_S_TRANSFER) \ + .SamplerConfig(SF_LINEAR, SAM_REPEAT)) \ + X(TexHandle, \ + cubemap_tex, \ + TexCubeTag, \ + TexConfig::Asset("Skybox/WaterScene") \ + .Format(PF_R8G8B8A8_UNORM) \ + .Usage(E_S_TRANSFER) \ .SamplerConfig(SF_LINEAR, SAM_REPEAT)) +// Textures that also have a half-resolution (size/2) variant (NAME_half) +// Used for AO half-resolution mode +#define RASTER_TEXTURES_TABLE_DOWNSAMPLED \ + X(TexHandle, \ + ao_output_ambient_only, \ + Tex2DTag, \ + TexConfig::Default(PF_R8_UNORM) \ + .Usage(E_SAMPLED_COLOR | ETextureUsageFlags::UNORDERED_ACCESS).DownSampled()) \ + X(TexHandle, \ + ao_output_ambient_only_1, \ + Tex2DTag, \ + TexConfig::Default(PF_R8_UNORM) \ + .Usage(E_SAMPLED_COLOR | ETextureUsageFlags::UNORDERED_ACCESS).DownSampled()) \ + X(TexHandle, \ + ao_denoiser_accumulate, \ + Tex2DTag, \ + TexConfig::Default(PF_R8_UNORM).Usage(E_SAMPLED_COLOR).DownSampled()) \ + X(TexHandle, \ + ao_denoiser_accumulate_1, \ + Tex2DTag, \ + TexConfig::Default(PF_R8_UNORM).Usage(E_SAMPLED_COLOR).DownSampled()) \ + X(TexHandle, \ + camera_motion_vector, \ + Tex2DTag, \ + TexConfig::Default(PF_R16G16_SFLOAT) \ + .Usage(E_SAMPLED_COLOR | ETextureUsageFlags::UNORDERED_ACCESS).DownSampled()) + struct RasterTextures { - // 批量生成 + // Full-resolution textures #define X(TYPE, NAME, TEXTYPE, CONFIG) TYPE NAME; - RASTER_TEXTURES_TABLE_CONFIG + RASTER_TEXTURES_TABLE + RASTER_TEXTURES_TABLE_DOWNSAMPLED +#undef X + + // Auto-generated half-resolution variants for downsampled textures +#define X(TYPE, NAME, TEXTYPE, CONFIG) TYPE NAME##_half; + RASTER_TEXTURES_TABLE_DOWNSAMPLED #undef X void CreateFrameBuffers(RenderDevice& device, const uint2& size) { - // 批量生成 + // Full-resolution textures #define X(TYPE, NAME, TEXTYPE, CONFIG) \ { \ TexConfig cfg = (CONFIG); \ AssetTool::CreateRasterResource(this->NAME, device, #NAME, size, cfg); \ } - RASTER_TEXTURES_TABLE_CONFIG + RASTER_TEXTURES_TABLE + RASTER_TEXTURES_TABLE_DOWNSAMPLED #undef X + + // Half-resolution variants + { + uint2 half_size = uint2(std::max(1u, size.x / 2), std::max(1u, size.y / 2)); +#define X(TYPE, NAME, TEXTYPE, CONFIG) \ + { \ + TexConfig cfg = (CONFIG); \ + AssetTool::CreateRasterResource(this->NAME##_half, device, #NAME "_half", half_size, cfg); \ + } + RASTER_TEXTURES_TABLE_DOWNSAMPLED +#undef X + } } void LoadAndUploadAssets(RenderDevice& device, CommandList& cmd_list) { - // 批量生成 #define X(TYPE, NAME, TEXTYPE, CONFIG) \ { \ TexConfig cfg = (CONFIG); \ - if ((CONFIG).is_asset) { \ + if (cfg.is_asset) { \ AssetTool::LoadTexture(device, cmd_list, NAME.tex, cfg, #NAME); \ } \ } - RASTER_TEXTURES_TABLE_CONFIG + RASTER_TEXTURES_TABLE + RASTER_TEXTURES_TABLE_DOWNSAMPLED #undef X } void AllocateFrameBuffers(CommandList& cmd_list, BindlessArrayRef& bindless_array) { - // 批量生成 + // Full-resolution textures #define X(TYPE, NAME, TEXTYPE, CONFIG) \ AssetTool::AllocateRasterResourceHandle(bindless_array, NAME, (CONFIG)); - RASTER_TEXTURES_TABLE_CONFIG + RASTER_TEXTURES_TABLE + RASTER_TEXTURES_TABLE_DOWNSAMPLED #undef X - // 提交 + + // Half-resolution variants (LINEAR sampler for bilinear upsampling) +#define X(TYPE, NAME, TEXTYPE, CONFIG) \ + { \ + TexConfig half_cfg; \ + half_cfg.sampler = {ESamplerFilter::SF_LINEAR, ESamplerAddressMode::SAM_CLAMP_TO_EDGE};\ + AssetTool::AllocateRasterResourceHandle(bindless_array, NAME##_half, half_cfg); \ + } + RASTER_TEXTURES_TABLE_DOWNSAMPLED +#undef X + cmd_list.UpdateBindlessArray(bindless_array); } - void FreeFrameBuffers(BindlessArrayRef& bindless_array, bool is_free_external_assets){ - // 批量生成 + void FreeFrameBuffers(BindlessArrayRef& bindless_array, bool is_free_external_assets) { #define X(TYPE, NAME, TEXTYPE, CONFIG) \ { \ TexConfig cfg = (CONFIG); \ @@ -197,17 +200,24 @@ struct RasterTextures { AssetTool::FreeRasterResourceHandle(bindless_array, NAME); \ } \ } - RASTER_TEXTURES_TABLE_CONFIG + RASTER_TEXTURES_TABLE + RASTER_TEXTURES_TABLE_DOWNSAMPLED +#undef X + + // Half-resolution variants +#define X(TYPE, NAME, TEXTYPE, CONFIG) \ + AssetTool::FreeRasterResourceHandle(bindless_array, NAME##_half); + RASTER_TEXTURES_TABLE_DOWNSAMPLED #undef X } Array GetDisplayableFrameBuffersView() { Array views; - // 批量生成 #define X(TYPE, NAME, TEXTYPE, CONFIG) \ assert(this->NAME.tex != nullptr && "There is an empty FrameBuffer!"); \ views.emplace_back(this->NAME.tex->GetView()); - RASTER_TEXTURES_TABLE_CONFIG + RASTER_TEXTURES_TABLE + RASTER_TEXTURES_TABLE_DOWNSAMPLED #undef X // 去除 output bool b_has_erased = false; @@ -219,19 +229,19 @@ struct RasterTextures { } } assert(b_has_erased && "output not found in views"); - // 返回 return views; } -}; // namespace Moer::Render::Raster +}; -#undef RASTER_TEXTURES_TABLE_CONFIG +#undef RASTER_TEXTURES_TABLE +#undef RASTER_TEXTURES_TABLE_DOWNSAMPLED #undef CUSTOMIZED_SIZE #undef TexHandle #undef E_SAMPLED_COLOR #undef E_C_ATTACH -#undef E_D_S_ATTACH #undef E_S_DEPTH +#undef E_S_TRANSFER } // namespace Moer::Render::Raster diff --git a/source/runtime/render/renderer/raster/RtaoDenoiserPass.h b/source/runtime/render/renderer/raster/RtaoDenoiserPass.h index f8ef673ea..4509a056c 100644 --- a/source/runtime/render/renderer/raster/RtaoDenoiserPass.h +++ b/source/runtime/render/renderer/raster/RtaoDenoiserPass.h @@ -1,4 +1,4 @@ -#pragma once +#pragma once #include "math/Function.h" #include "scene/camera/Camera.h" @@ -31,8 +31,8 @@ class CopyPassPipeline : public RasterPipeline { /** * MARK: RtaoDenoiser Pass * - * 这个Pass主要是为了解决TensorRT Pass网络产生的噪点; - * 注意,这个pass重用了ao_output作为输出 + * 时序降噪:读取当前帧 ao_only 和历史 accumulate, + * 输出降噪后的 accumulate,再通过 Copy Pass 写回 ao_only。 */ class RtaoDenoiserPass { public: @@ -43,8 +43,7 @@ class RtaoDenoiserPass { GfxPsoCreateInfo pso_full_screen_info( RHIRasterizeInfo::Preset(), {}, - {RHIColorAttachmentInfo::Preset(img_denoiser_history_write.tex->GetFormat()), - RHIColorAttachmentInfo::Preset(context.textures.ao_output.tex->GetFormat())} + {RHIColorAttachmentInfo::Preset(img_denoiser_history_write.tex->GetFormat())} ); rtao_denoiser_pso = context.manager.Raster() @@ -72,24 +71,36 @@ class RtaoDenoiserPass { if (!ui_config.rtao_denoiser_enable) return ao_only_idx; + const bool half_res = ui_config.ao_half_resolution; + if (ao_only_idx == 0) { - img_denoiser_history_read = context.textures.ao_denoiser_accumulate_1; - img_denoiser_history_write = context.textures.ao_denoiser_accumulate; - img_ao_only = context.textures.ao_output_ambient_only; - img_ao_only_prev = context.textures.ao_output_ambient_only_1; + img_denoiser_history_read = half_res ? context.textures.ao_denoiser_accumulate_1_half : + context.textures.ao_denoiser_accumulate_1; + img_denoiser_history_write = half_res ? context.textures.ao_denoiser_accumulate_half : + context.textures.ao_denoiser_accumulate; + img_ao_only = half_res ? context.textures.ao_output_ambient_only_half : + context.textures.ao_output_ambient_only; + img_ao_only_prev = half_res ? context.textures.ao_output_ambient_only_1_half : + context.textures.ao_output_ambient_only_1; } else { - img_denoiser_history_read = context.textures.ao_denoiser_accumulate; - img_denoiser_history_write = context.textures.ao_denoiser_accumulate_1; - img_ao_only = context.textures.ao_output_ambient_only_1; - img_ao_only_prev = context.textures.ao_output_ambient_only; + img_denoiser_history_read = half_res ? context.textures.ao_denoiser_accumulate_half : + context.textures.ao_denoiser_accumulate; + img_denoiser_history_write = half_res ? context.textures.ao_denoiser_accumulate_1_half : + context.textures.ao_denoiser_accumulate_1; + img_ao_only = half_res ? context.textures.ao_output_ambient_only_1_half : + context.textures.ao_output_ambient_only_1; + img_ao_only_prev = half_res ? context.textures.ao_output_ambient_only_half : + context.textures.ao_output_ambient_only; } - // Pass 1 + TextureWithHandle camera_mv_target = + half_res ? context.textures.camera_motion_vector_half : context.textures.camera_motion_vector; + + // Pass 1: temporal accumulation RtaoDenoiserPassBindlessParam param; param.history_ao_tex = img_denoiser_history_read.hdl; param.curr_ao_tex = img_ao_only.hdl; - param.color_tex = context.textures.lighting_output.hdl; - param.motion_vector_tex = context.textures.camera_motion_vector.hdl; + param.motion_vector_tex = camera_mv_target.hdl; param.depth_tex = context.textures.depth_nearest_sampler.hdl; param.normal_tex = context.textures.normal.hdl; @@ -97,7 +108,6 @@ class RtaoDenoiserPass { param.valid_depth_threshold = ui_config.rtao_denoiser_valid_depth_threshold; param.valid_normal_threshold = ui_config.rtao_denoiser_valid_normal_threshold; - param.is_rtao_ao_only = (ui_config.ao_mode == EAoMode::RTAO_AO_ONLY) ? 1 : 0; param.is_reprojection_enable = ui_config.rtao_denoiser_reprojection_enable; param.is_validation_enable = ui_config.rtao_denoiser_validation_enable; @@ -106,8 +116,7 @@ class RtaoDenoiserPass { "RtaoDenoiser Pass", img_denoiser_history_write.GetRect2D(), std::move(RasterTool::GetFullScreenDrawDatas()), - ColorAttachment(img_denoiser_history_write.tex), - ColorAttachment(context.textures.ao_output.tex) + ColorAttachment(img_denoiser_history_write.tex) ); // Pass 2 @@ -122,7 +131,6 @@ class RtaoDenoiserPass { ColorAttachment(img_ao_only.tex) ); - // return output_image.hdl; return ao_only_idx ^ 1; // 0 <-> 1 } diff --git a/source/runtime/render/renderer/raster/UpsamplePass.h b/source/runtime/render/renderer/raster/UpsamplePass.h deleted file mode 100644 index 7ec260b4b..000000000 --- a/source/runtime/render/renderer/raster/UpsamplePass.h +++ /dev/null @@ -1,74 +0,0 @@ -/** - * 此文件应该只有在宏 WITH_CUDA 被设置的情况下使用 -*/ -#pragma once - -#if !defined(WITH_CUDA) -#error "This header requires WITH_CUDA=1" -#endif - -#include "shader/ShaderPipeline.h" -#include "shaderheaders/shared/raster/post_process/ShaderParameters.h" - -#include "RasterConfig.h" -#include "RasterResource.h" -#include "RasterTool.h" - -namespace Moer::Render::Raster { - -class UpsamplePipeline : public RasterPipeline { -public: - DEFINE_RASTER_PIPELINE_CLASS(UpsamplePipeline); - DEFINE_SHADER_CONSTANT_STRUCT(UpsamplePipelineBindlessParam, param); - DEFINE_SHADER_BINDLESS_ARRAY(bdls); - DEFINE_SHADER_ARGS(bdls, param); -}; - -class UpsamplePass { -public: - UpsamplePass(RasterContext& context) { - GfxPsoCreateInfo pso_info( - RHIRasterizeInfo::Preset(), - {}, - {RHIColorAttachmentInfo::Preset(context.textures.upsample_output.tex->GetFormat())} - ); - - upsample_pipeline = context.manager.Raster() - .Vertex("core/utils/FullScreenQuad.hlsl") - .Pixel("pipelines/postprocess/common/Upsample.hlsl") - .Build(std::move(pso_info)); - } - - TextureWithHandle - Process(RasterContext& context, const RasterConfig& ui_config, TextureWithHandle input_image) { - UpsamplePipelineBindlessParam param; - - //param.low_res_tex = low_res_tex; - param.upsample_mode = static_cast(ui_config.upsample_mode); - param.outSize = ui_config.outSize_x; - param.inSize = ui_config.inSize_x; - param.input_image = input_image.hdl; - //param.high_res_depth = context.textures.position.hdl; // 可用 position/深度图作为引导 - //param.inv_low_res = float2(1.0f / ui_config.render_res.x, 1.0f / ui_config.render_res.y); - //param.inv_high_res = float2(1.0f / ui_config.display_res.x, 1.0f / ui_config.display_res.y); - //param.scale_ratio = float2( - //ui_config.display_res.x / ui_config.render_res.x, - //ui_config.display_res.y / ui_config.render_res.y - //); - //param.sharpness = ui_config.upsample_sharpness; - - context.cmd_list.Gfx(upsample_pipeline, context.bdls, param) - .Draw( - "Upsample Pass", - context.textures.upsample_output.GetRect2D(), - RasterTool::GetFullScreenDrawDatas(), - ColorAttachment(context.textures.upsample_output.tex) - ); - - return context.textures.upsample_output; - } - -private: - UpsamplePipeline upsample_pipeline; -}; -} // namespace Moer::Render::Raster diff --git a/source/runtime/render/rhi/RHIResource.cpp b/source/runtime/render/rhi/RHIResource.cpp index 05acf682d..3d4f537f7 100644 --- a/source/runtime/render/rhi/RHIResource.cpp +++ b/source/runtime/render/rhi/RHIResource.cpp @@ -32,6 +32,10 @@ uint TextureWithHandle::GetMipHandle(uint mip) { return hdl; } +uint2 DepthBufferWithHandle::GetSize() { + return uint2(tex->GetWidth(), tex->GetHeight()); +} + TextureView::TextureView(Texture* _texture) : texture(_texture), offset(0), diff --git a/source/runtime/render/rhi/RHIResource.h b/source/runtime/render/rhi/RHIResource.h index db11fa500..80c4ef484 100644 --- a/source/runtime/render/rhi/RHIResource.h +++ b/source/runtime/render/rhi/RHIResource.h @@ -166,6 +166,8 @@ struct TextureWithHandle { struct DepthBufferWithHandle { DepthBufferRef tex; uint hdl = 0; + + uint2 GetSize(); }; struct BufferWithHandle { diff --git a/source/runtime/render/shaderheaders/shared/raster/post_process/ShaderParameters.h b/source/runtime/render/shaderheaders/shared/raster/post_process/ShaderParameters.h index b82eabb3b..bf3f6ab08 100644 --- a/source/runtime/render/shaderheaders/shared/raster/post_process/ShaderParameters.h +++ b/source/runtime/render/shaderheaders/shared/raster/post_process/ShaderParameters.h @@ -37,9 +37,8 @@ struct AoPipelineBindlessParam { uint ssao_sample_count; uint ssao_radius; uint ao_mode; - uint input_image; - uint normal_tex; + uint depth_tex; uint noise_tex; // linear & repeat sampler uint camera_mv_data_handle; // for camera motion vector @@ -71,45 +70,46 @@ struct SsdoPipelineBindlessParam { struct RtaoPipelineBindlessParam { float4x4 clip2world; - float3 camera_pos; uint frame_idx; + uint normal_tex; + uint depth_tex; + uint spp; float2 resolution; float2 inv_resolution; - uint input_image; - uint normal_tex; - uint depth_tex; - uint ao_mode; - - uint sample_mode; - uint spp; float ray_trace_distance; float intensity; + uint camera_mv_data_handle; + uint noise_tex; - uint camera_mv_data_handle; // for camera motion vector - uint noise_tex; // blue noise texture for spatially coherent sampling - uint padding0; - uint padding1; + float2 depth_tex_resolution; }; struct RtaoDenoiserPassBindlessParam { uint history_ao_tex; uint curr_ao_tex; - uint color_tex; uint motion_vector_tex; - - uint depth_tex; - uint normal_tex; - uint is_rtao_ao_only; uint is_reprojection_enable; + uint depth_tex; + uint normal_tex; uint is_validation_enable; float history_ratio; + float valid_depth_threshold; float valid_normal_threshold; }; +struct AoCompositeParam { + uint ao_tex; + uint color_tex; + uint ao_mode; + uint padding0; + float2 full_resolution; + float2 inv_full_resolution; +}; + struct BilateralFilterDenoiserPipelineBindlessParam { float2 inv_resolution; float spatial_sigma_square; From bc458a6b8462f0a44f56e88b8e1fdc16125a4daa Mon Sep 17 00:00:00 2001 From: YXHXianYu <2943003@qq.com> Date: Tue, 31 Mar 2026 19:48:38 +0800 Subject: [PATCH 16/17] =?UTF-8?q?feat(raster):=20=E7=BB=991/2=20DownSample?= =?UTF-8?q?=E7=9A=84RTAO=E6=B7=BB=E5=8A=A0=E4=BA=86=E5=8F=8C=E8=BE=B9?= =?UTF-8?q?=E6=BB=A4=E6=B3=A2Upsample=EF=BC=9B=E9=BB=98=E8=AE=A4=E5=90=AF?= =?UTF-8?q?=E7=94=A8DownSample=E4=B8=94SPP=E4=BB=8E4=E6=94=B9=E4=B8=BA8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../lighting_effects/AoComposite.hlsl | 62 ++++++++++++++++++- .../runtime/render/renderer/raster/AoPass.h | 10 +++ .../render/renderer/raster/RasterConfig.h | 2 +- .../raster/post_process/ShaderParameters.h | 9 ++- 4 files changed, 78 insertions(+), 5 deletions(-) diff --git a/shaders/pipelines/postprocess/lighting_effects/AoComposite.hlsl b/shaders/pipelines/postprocess/lighting_effects/AoComposite.hlsl index 734fbc5a4..9e1d5be3b 100644 --- a/shaders/pipelines/postprocess/lighting_effects/AoComposite.hlsl +++ b/shaders/pipelines/postprocess/lighting_effects/AoComposite.hlsl @@ -7,12 +7,72 @@ BINDLESS_BINDINGS(3, 2, 4, 5) [[vk::push_constant]] ConstantBuffer param; [[vk::binding(0, 0)]] RWTexture2D rw_output; +// Joint Bilateral Upsample:用全分辨率深度+法线引导,对半分辨率AO进行边缘保持上采样 +float JointBilateralUpsampleAO(float2 uv) { + float2 ao_res = param.ao_resolution; + float2 inv_ao_res = 1.0 / ao_res; + + // 全分辨率像素处的深度和法线 + float depth_center = TextureHandle(param.depth_tex).SampleLevel(uv); + float3 normal_center = Raster::UnpackNormal(TextureHandle(param.normal_tex).SampleLevel(uv)); + + // 低分辨率纹理空间中的亚像素位置 + float2 low_pos = uv * ao_res - 0.5; + int2 base_texel = int2(floor(low_pos)); + float2 frac_pos = frac(low_pos); + + static const float kDepthScale = 100.0; // 深度差异灵敏度(相对深度) + static const float kNormalPower = 32.0; // 法线差异灵敏度 + + float ao_sum = 0.0; + float w_sum = 0.0; + + [unroll] + for (int y = 0; y <= 1; y++) { + [unroll] + for (int x = 0; x <= 1; x++) { + int2 tap = base_texel + int2(x, y); + float2 tap_uv = (float2(tap) + 0.5) * inv_ao_res; + + float ao_sample = TextureHandle(param.ao_tex).SampleLevel(tap_uv); + + // 在低分辨率像素中心位置采样全分辨率深度和法线(用于边缘判断) + float depth_sample = TextureHandle(param.depth_tex).SampleLevel(tap_uv); + float3 normal_sample = Raster::UnpackNormal(TextureHandle(param.normal_tex).SampleLevel(tap_uv)); + + // 空间权重(双线性插值权重) + float w_spatial = (x == 0 ? (1.0 - frac_pos.x) : frac_pos.x) + * (y == 0 ? (1.0 - frac_pos.y) : frac_pos.y); + + // 深度权重:相对深度差越大,权重越低 + float depth_diff = abs(depth_center - depth_sample) / max(depth_center, 1e-6); + float w_depth = exp(-depth_diff * kDepthScale); + + // 法线权重:法线夹角越大,权重越低 + float w_normal = pow(max(dot(normal_center, normal_sample), 0.0), kNormalPower); + + float w = w_spatial * w_depth * w_normal; + ao_sum += ao_sample * w; + w_sum += w; + } + } + + // 所有权重都很小时回退到双线性采样(天空等无几何区域) + return w_sum > 1e-6 ? ao_sum / w_sum : TextureHandle(param.ao_tex).SampleLevel(uv); +} + [numthreads(8, 8, 1)] void main(uint2 pixel_pos : SV_DispatchThreadID) { if (pixel_pos.x >= uint(param.full_resolution.x) || pixel_pos.y >= uint(param.full_resolution.y)) return; float2 uv = (float2(pixel_pos) + 0.5) * param.inv_full_resolution; - float ao = TextureHandle(param.ao_tex).SampleLevel(uv); + + float ao; + if (param.is_half_resolution) { + ao = JointBilateralUpsampleAO(uv); + } else { + ao = TextureHandle(param.ao_tex).SampleLevel(uv); + } if (param.ao_mode == Moer::EAoMode::RTAO_AO_ONLY || param.ao_mode == Moer::EAoMode::SSAO_AO_ONLY || param.ao_mode == Moer::EAoMode::SSDO_AO_ONLY) { diff --git a/source/runtime/render/renderer/raster/AoPass.h b/source/runtime/render/renderer/raster/AoPass.h index 46ac19ab5..a6dad31bd 100644 --- a/source/runtime/render/renderer/raster/AoPass.h +++ b/source/runtime/render/renderer/raster/AoPass.h @@ -177,8 +177,18 @@ class AoPass { param.ao_tex = ao_only_hdl; param.color_tex = context.textures.lighting_output.hdl; param.ao_mode = static_cast(ui_config.ao_mode); + param.is_half_resolution = ui_config.ao_half_resolution ? 1u : 0u; param.full_resolution = float2(context.textures.ao_output.GetSize()); param.inv_full_resolution = float2(1.0f) / param.full_resolution; + param.depth_tex = context.textures.depth_nearest_sampler.hdl; + param.normal_tex = context.textures.normal.hdl; + + if (ui_config.ao_half_resolution) { + uint2 full = uint2(context.textures.ao_output.GetSize()); + param.ao_resolution = float2(std::max(1u, full.x / 2), std::max(1u, full.y / 2)); + } else { + param.ao_resolution = param.full_resolution; + } uint2 res = uint2(context.textures.ao_output.GetSize()); context.cmd_list.Compute(ao_composite_pipeline, param, context.textures.ao_output.tex, context.bdls) diff --git a/source/runtime/render/renderer/raster/RasterConfig.h b/source/runtime/render/renderer/raster/RasterConfig.h index 758d93446..0dd24a338 100644 --- a/source/runtime/render/renderer/raster/RasterConfig.h +++ b/source/runtime/render/renderer/raster/RasterConfig.h @@ -181,7 +181,7 @@ struct RasterConfig { ERtaoSampleMode rtao_sample_mode = ERtaoSampleMode::COSINE_WEIGHTED; float rtao_intensity = 1.0f; float rtao_ray_trace_distance = 1.0f; - int rtao_spp = 4; + int rtao_spp = 8; bool rtao_denoiser_enable = true; bool rtao_denoiser_reprojection_enable = true; diff --git a/source/runtime/render/shaderheaders/shared/raster/post_process/ShaderParameters.h b/source/runtime/render/shaderheaders/shared/raster/post_process/ShaderParameters.h index bf3f6ab08..2e25d2b4b 100644 --- a/source/runtime/render/shaderheaders/shared/raster/post_process/ShaderParameters.h +++ b/source/runtime/render/shaderheaders/shared/raster/post_process/ShaderParameters.h @@ -102,12 +102,15 @@ struct RtaoDenoiserPassBindlessParam { }; struct AoCompositeParam { + float2 full_resolution; + float2 inv_full_resolution; + float2 ao_resolution; uint ao_tex; uint color_tex; uint ao_mode; - uint padding0; - float2 full_resolution; - float2 inv_full_resolution; + uint is_half_resolution; + uint depth_tex; + uint normal_tex; }; struct BilateralFilterDenoiserPipelineBindlessParam { From c8c880f6f640a15b5cc43d625782a01c60f8b4f7 Mon Sep 17 00:00:00 2001 From: YXHXianYu <2943003@qq.com> Date: Tue, 31 Mar 2026 20:15:28 +0800 Subject: [PATCH 17/17] =?UTF-8?q?fix(raster):=20=E4=BF=AE=E5=A4=8D?= =?UTF-8?q?=E4=BA=86Instance=20Culling=E5=AF=BC=E8=87=B4=E5=AF=B9Shadow?= =?UTF-8?q?=E4=BA=A7=E7=94=9F=E8=B4=A1=E7=8C=AE=E7=9A=84Mesh=E4=B8=A2?= =?UTF-8?q?=E5=A4=B1=E9=97=AE=E9=A2=98=EF=BC=9B=E4=B8=B4=E6=97=B6=E7=A6=81?= =?UTF-8?q?=E7=94=A8Instance=20Culling=EF=BC=8C=E5=9B=A0=E4=B8=BA=E7=9B=AE?= =?UTF-8?q?=E5=89=8D=E5=90=AF=E7=94=A8=E4=BC=9A=E5=AF=BC=E8=87=B4=E6=80=A7?= =?UTF-8?q?=E8=83=BD=E6=9B=B4=E5=B7=AE=C2=B7=C2=B7=C2=B7?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- source/runtime/render/renderer/raster/RasterConfig.h | 2 +- source/runtime/render/renderer/raster/RasterRenderer.cpp | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/source/runtime/render/renderer/raster/RasterConfig.h b/source/runtime/render/renderer/raster/RasterConfig.h index 0dd24a338..804e1026f 100644 --- a/source/runtime/render/renderer/raster/RasterConfig.h +++ b/source/runtime/render/renderer/raster/RasterConfig.h @@ -113,7 +113,7 @@ struct RasterConfig { bool geometry_enable_alpha_test = true; float geometry_alpha_test_blend_pixel_cutoff = 0.5f; // 当AlphaMode为BLEND时,低于该值的像素会被丢弃 - bool enable_frustum_culling = true; // GPU视锥剔除 + bool enable_frustum_culling = false; // GPU视锥剔除 // MARK: Culling Statistics (只读,由GPU更新) struct CullingStats { diff --git a/source/runtime/render/renderer/raster/RasterRenderer.cpp b/source/runtime/render/renderer/raster/RasterRenderer.cpp index 93f760223..8fc860ef0 100644 --- a/source/runtime/render/renderer/raster/RasterRenderer.cpp +++ b/source/runtime/render/renderer/raster/RasterRenderer.cpp @@ -252,6 +252,12 @@ bool RasterRenderer::RunSingle(const SharedPtr editor_config, cons // FIXME: 统一update scene // scene.GetGpuScene().UpdateRaytracingScene(cmd_list); + // 当启用视锥剔除时,先恢复完整的 draw commands, + // 确保 ShadowDepthPass 使用未被上一帧剔除的完整场景数据 + if (raster_config.enable_frustum_culling) { + raster_context.scene.RestoreDrawCommands(raster_context.cmd_list); + } + // Shadow Depth Pass shadow_depth_pass->Process(raster_context, raster_config, camera);