diff --git a/framework/api_vulkan_sample.h b/framework/api_vulkan_sample.h index 1cda25340b..2fbfe57a0e 100644 --- a/framework/api_vulkan_sample.h +++ b/framework/api_vulkan_sample.h @@ -294,7 +294,7 @@ class ApiVulkanSample : public vkb::VulkanSampleC /** * @brief Creates a new (graphics) command pool object storing command buffers */ - void create_command_pool(); + virtual void create_command_pool(); /** * @brief Setup default depth and stencil views diff --git a/samples/CMakeLists.txt b/samples/CMakeLists.txt index a11b4c054b..2624459689 100644 --- a/samples/CMakeLists.txt +++ b/samples/CMakeLists.txt @@ -48,99 +48,90 @@ endforeach () # Order of the sample ids set(ORDER_LIST - #API Samples - "hello_triangle" - "dynamic_uniform_buffers" - "texture_loading" - "hdr" - "instancing" - "compute_nbody" - "terrain_tessellation" - "hlsl" - "oit_linked_lists" - "oit_depth_peeling" - - #Extension Samples - "dynamic_rendering" - "conservative_rasterization" - "fragment_shading_rate" - "fragment_shading_rate_dynamic" - "full_screen_exclusive" - "calibrated_timestamps" - "graphics_pipeline_library" - "memory_budget" - "mesh_shader_culling" - "push_descriptors" - "ray_queries" - "ray_tracing_basic" - "ray_tracing_extended" - "ray_tracing_reflection" - "timeline_semaphore" - "shader_object" - "shader_debugprintf" - "synchronization_2" - "buffer_device_address" - "descriptor_indexing" - "portability" - "vertex_dynamic_state" - "extended_dynamic_state2" - "logic_op_dynamic_state" - "patch_control_points" - "fragment_shader_barycentric" - "gshader_to_mshader" - "color_write_enable" - "sparse_image" - "dynamic_primitive_clipping" - - #Performance Samples - "swapchain_images" - "surface_rotation" - "pipeline_cache" - "descriptor_management" - "constant_data" - "render_passes" - "msaa" - "subpasses" - "pipeline_barriers" - "wait_idle" - "layout_transitions" - "specialization_constants" - "command_buffer_usage" - "multithreading_render_passes" - "afbc" - "16bit_storage_input_output" - "16bit_arithmetic" - "async_compute" - "multi_draw_indirect" - "texture_compression_comparison" - - #Tooling samples - "profiles" - - #HPP API Samples - "hpp_compute_nbody" - "hpp_dynamic_uniform_buffers" - "hpp_hdr" - "hpp_hello_triangle" - "hpp_hlsl_shaders" - "hpp_instancing" - "hpp_oit_depth_peeling" - "hpp_oit_linked_lists" - "hpp_separate_image_sampler" - "hpp_terrain_tessellation" - "hpp_texture_loading" - "hpp_texture_mipmap_generation" - - #HPP Extension Samples - "hpp_mesh_shading" - - #HPP Performance Samples - "hpp_pipeline_cache" - "hpp_swapchain_images" - "hpp_texture_compression_comparison" - - #General Samples - "mobile_nerf") + #API Samples + "hello_triangle" + "dynamic_uniform_buffers" + "texture_loading" + "hdr" + "instancing" + "compute_nbody" + "terrain_tessellation" + "hlsl" + + #Extension Samples + "dynamic_rendering" + "conservative_rasterization" + "fragment_shading_rate" + "fragment_shading_rate_dynamic" + "full_screen_exclusive" + "calibrated_timestamps" + "graphics_pipeline_library" + "memory_budget" + "mesh_shader_culling" + "push_descriptors" + "ray_queries" + "ray_tracing_basic" + "ray_tracing_extended" + "ray_tracing_reflection" + "timeline_semaphore" + "shader_object" + "synchronization_2" + "buffer_device_address" + "descriptor_indexing" + "portability" + "vertex_dynamic_state" + "extended_dynamic_state2" + "logic_op_dynamic_state" + "patch_control_points" + "fragment_shader_barycentric" + "gshader_to_mshader" + "color_write_enable" + "subgroups_operations" + + #Performance Samples + "swapchain_images" + "surface_rotation" + "pipeline_cache" + "descriptor_management" + "constant_data" + "render_passes" + "msaa" + "subpasses" + "pipeline_barriers" + "wait_idle" + "layout_transitions" + "specialization_constants" + "command_buffer_usage" + "multithreading_render_passes" + "afbc" + "16bit_storage_input_output" + "16bit_arithmetic" + "async_compute" + "multi_draw_indirect" + "texture_compression_comparison" + + #Tooling samples + "profiles" + + #HPP API Samples + "hpp_compute_nbody" + "hpp_dynamic_uniform_buffers" + "hpp_hdr" + "hpp_hello_triangle" + "hpp_hlsl_shaders" + "hpp_instancing" + "hpp_separate_image_sampler" + "hpp_terrain_tessellation" + "hpp_texture_loading" + "hpp_texture_mipmap_generation" + + #HPP Performance Samples + "hpp_pipeline_cache" + "hpp_swapchain_images" + "hpp_texture_compression_comparison" + + #General Samples + "mobile_nerf") # Orders the sample ids by the order list above set(ORDERED_LIST) diff --git a/samples/extensions/subgroups_operations/CMakeLists.txt b/samples/extensions/subgroups_operations/CMakeLists.txt new file mode 100644 index 0000000000..49197c1fdb --- /dev/null +++ b/samples/extensions/subgroups_operations/CMakeLists.txt @@ -0,0 +1,43 @@ + # Copyright (c) 2024, Mobica Limited + # + # SPDX-License-Identifier: Apache-2.0 + # + # Licensed under the Apache License, Version 2.0 the "License"; + # you may not use this file except in compliance with the License. + # You may obtain a copy of the License at + # + # http://www.apache.org/licenses/LICENSE-2.0 + # + # Unless required by applicable law or agreed to in writing, software + # distributed under the License is distributed on an "AS IS" BASIS, + # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + # See the License for the specific language governing permissions and + # limitations under the License. + + get_filename_component(FOLDER_NAME ${CMAKE_CURRENT_LIST_DIR} NAME) + get_filename_component(PARENT_DIR ${CMAKE_CURRENT_LIST_DIR} PATH) + get_filename_component(CATEGORY_NAME ${PARENT_DIR} NAME) + +add_sample( + ID ${FOLDER_NAME} + CATEGORY ${CATEGORY_NAME} + AUTHOR "Mobica" + NAME "subgroups_operations" + DESCRIPTION "Demonstrates the use of a subgroups operations feature" + SHADER_FILES_GLSL + "subgroups_operations/ocean.vert" + "subgroups_operations/ocean.frag" + "subgroups_operations/ocean.tesc" + "subgroups_operations/ocean.tese" + "subgroups_operations/fft_invert.comp" + "subgroups_operations/butterfly_precomp.comp" + "subgroups_operations/fft_tilde_h.comp" + "subgroups_operations/fft_tilde_h0.comp" + "subgroups_operations/fft_normal_map.comp" + "subgroups_operations/fft.comp" + "subgroups_operations/fft_invert_subgroups_off.comp" + "subgroups_operations/butterfly_precomp_subgroups_off.comp" + "subgroups_operations/fft_tilde_h_subgroups_off.comp" + "subgroups_operations/fft_tilde_h0_subgroups_off.comp" + "subgroups_operations/fft_normal_map_subgroups_off.comp" + "subgroups_operations/fft_subgroups_off.comp") \ No newline at end of file diff --git a/samples/extensions/subgroups_operations/README.adoc b/samples/extensions/subgroups_operations/README.adoc new file mode 100644 index 0000000000..e4cef2918b --- /dev/null +++ b/samples/extensions/subgroups_operations/README.adoc @@ -0,0 +1,98 @@ +//// +Copyright (c) 2024, Mobica Limited + +SPDX-License-Identifier: Apache-2.0 + +Licensed under the Apache License, Version 2.0 the "License"; +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +//// + += Subgroups Operations + +== Overview + +The example demonstrates the use of the feature subgroups operations. An ocean animation based on the FFT is implemented to demonstrate this feature (Fast Fourier Transform https://en.wikipedia.org/wiki/Fast_Fourier_transform). +image:image/image.png[] + +Subgroups operation feature introduces a mechanism to share data between the invocations that run in parallel on a single compute unit. + + +== GLSL Shaders + +To make full use of the feature subgroups operation, one of the following extensions must be enabled: + +* `#extension GL_KHR_shader_subgroup_basic` + +* `#extension GL_KHR_shader_subgroup_vote` + +* `#extension GL_KHR_shader_subgroup_ballot` + +* `#extension GL_KHR_shader_subgroup_arithmetic` + +* `#extension GL_KHR_shader_subgroup_shuffle` + +* `#extension GL_KHR_shader_subgroup_shuffle_relative` + +* `#extension GL_KHR_shader_subgroup_clustered` + +* `#extension GL_KHR_shader_subgroup_quad` + + +This sample focuse on `GL_KHR_shader_subgroup_basic` extension. +More about subgroups you can read in this article https://www.khronos.org/blog/vulkan-subgroup-tutorial. + + +== Enabling the Feature + +In order to use subgroups operations, the required extensions must be enabled, an instance of the Vulkan API must be created with a minimum of version 1.1 and SPIR-V 1.4 must be used. + +VkDevice must be created with this `VK_EXT_subgroup_size_control` extension. +It is also required to enable `VK_KHR_spirv_1_4` and `VK_KHR_shader_float_controls` in addition (`VK_KHR_spirv_1_4` requires this). + + +To get the properties of the supported subgroups, the following should be taken: +[,cpp] +---- +VkPhysicalDeviceSubgroupProperties subgroups_properties; +subgroups_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES; +subgroups_properties.pNext = VK_NULL_HANDLE; + +VkPhysicalDeviceProperties2 device_properties2 = {}; +device_properties2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2; +device_properties2.pNext = &subgroups_properties; +vkGetPhysicalDeviceProperties2(gpu.get_handle(), &device_properties2); +---- + + +== Documentation + +* https://docs.vulkan.org/guide/latest/subgroups.html + +* https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkPhysicalDeviceSubgroupProperties.html + +* https://www.khronos.org/blog/vulkan-subgroup-tutorial + + +== Sources +The implementation was based on the following sources: + +* https://tore.tuhh.de/entities/publication/1cd390d3-732b-41c1-aa2b-07b71a64edd2 +* https://people.computing.clemson.edu/~jtessen/reports/papers_files/coursenotes2004.pdf +* https://github.com/achalpandeyy/OceanFFT +* https://github.com/deiss/fftocean +* https://github.com/iamyoukou/fftWater + +== Authors +* Patryk Jastrzębski +* Krzysztof Dmitruk +* Seweryn Zielas +* Piotr Plenański diff --git a/samples/extensions/subgroups_operations/image/image.png b/samples/extensions/subgroups_operations/image/image.png new file mode 100644 index 0000000000..b7c94452e7 Binary files /dev/null and b/samples/extensions/subgroups_operations/image/image.png differ diff --git a/samples/extensions/subgroups_operations/subgroups_operations.cpp b/samples/extensions/subgroups_operations/subgroups_operations.cpp new file mode 100644 index 0000000000..4a683a704e --- /dev/null +++ b/samples/extensions/subgroups_operations/subgroups_operations.cpp @@ -0,0 +1,1750 @@ +/* Copyright (c) 2024, Mobica Limited + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 the "License"; + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "subgroups_operations.h" +#include + +#include + +void SubgroupsOperations::Pipeline::destroy(VkDevice device) const +{ + if (pipeline != VK_NULL_HANDLE) + { + vkDestroyPipeline(device, pipeline, nullptr); + } + + if (pipeline_layout != VK_NULL_HANDLE) + { + vkDestroyPipelineLayout(device, pipeline_layout, nullptr); + } +} + +SubgroupsOperations::SubgroupsOperations() +{ + // SPIRV 1.4 requires Vulkan 1.1 + set_api_version(VK_API_VERSION_1_1); + + // Subgroup size control extensions required by this sample + add_device_extension(VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME); + + // Required for VK_EXT_subgroup_size_control + add_device_extension(VK_KHR_SPIRV_1_4_EXTENSION_NAME); + + // Required by VK_KHR_spirv_1_4 + add_device_extension(VK_KHR_SHADER_FLOAT_CONTROLS_EXTENSION_NAME); + + // Targeting SPIR-V version + vkb::GLSLCompiler::set_target_environment(glslang::EShTargetSpv, glslang::EShTargetSpv_1_4); + + title = "Subgroups operations"; + camera.type = vkb::CameraType::FirstPerson; + camera.translation_speed = 15.0f; + + camera.set_perspective(60.0f, static_cast(width) / static_cast(height), 0.01f, 256.0f); + camera.set_position({0.0f, 5.0f, 0.0f}); + camera.translation_speed = {15.0f}; + + grid_size = 256U; +} + +SubgroupsOperations::~SubgroupsOperations() +{ + if (has_device()) + { + fft_buffers.fft_tilde_h_kt_dx->destroy(get_device().get_handle()); + fft_buffers.fft_tilde_h_kt_dy->destroy(get_device().get_handle()); + fft_buffers.fft_tilde_h_kt_dz->destroy(get_device().get_handle()); + fft_buffers.fft_displacement->destroy(get_device().get_handle()); + fft_buffers.fft_normal_map->destroy(get_device().get_handle()); + + fft_buffers.fft_input_htilde0->destroy(get_device().get_handle()); + fft_buffers.fft_input_htilde0_conj->destroy(get_device().get_handle()); + butterfly_precomp.destroy(get_device().get_handle()); + + precompute.pipeline.destroy(get_device().get_handle()); +#ifdef DEBUG_SUBGROUPS_SWITCH_ENABLE + precompute.pipeline_subgroups_off.destroy(get_device().get_handle()); +#endif + vkDestroyDescriptorSetLayout(get_device().get_handle(), precompute.descriptor_set_layout, nullptr); + + tildes.pipeline.destroy(get_device().get_handle()); +#ifdef DEBUG_SUBGROUPS_SWITCH_ENABLE + tildes.pipeline_subgroups_off.destroy(get_device().get_handle()); +#endif + vkDestroyDescriptorSetLayout(get_device().get_handle(), tildes.descriptor_set_layout, nullptr); + + initial_tildes.pipeline.destroy(get_device().get_handle()); +#ifdef DEBUG_SUBGROUPS_SWITCH_ENABLE + initial_tildes.pipeline_subgroups_off.destroy(get_device().get_handle()); +#endif + vkDestroyDescriptorSetLayout(get_device().get_handle(), initial_tildes.descriptor_set_layout, nullptr); + + fft_inversion.pipeline.destroy(get_device().get_handle()); +#ifdef DEBUG_SUBGROUPS_SWITCH_ENABLE + fft_inversion.pipeline_subgroups_off.destroy(get_device().get_handle()); +#endif + vkDestroyDescriptorSetLayout(get_device().get_handle(), fft_inversion.descriptor_set_layout, nullptr); + + fft_normal_map.pipeline.destroy(get_device().get_handle()); +#ifdef DEBUG_SUBGROUPS_SWITCH_ENABLE + fft_normal_map.pipeline_subgroups_off.destroy(get_device().get_handle()); +#endif + vkDestroyDescriptorSetLayout(get_device().get_handle(), fft_normal_map.descriptor_set_layout, nullptr); + + fft.tilde_axis_x->destroy(get_device().get_handle()); + fft.tilde_axis_y->destroy(get_device().get_handle()); + fft.tilde_axis_z->destroy(get_device().get_handle()); + fft.pipelines.horizontal.destroy(get_device().get_handle()); + fft.pipelines.vertical.destroy(get_device().get_handle()); +#ifdef DEBUG_SUBGROUPS_SWITCH_ENABLE + fft.pipelines.horizontal_subgroups_off.destroy(get_device().get_handle()); + fft.pipelines.vertical_subgroups_off.destroy(get_device().get_handle()); +#endif + vkDestroyDescriptorSetLayout(get_device().get_handle(), fft.descriptor_set_layout, nullptr); + + vkDestroySemaphore(get_device().get_handle(), compute.semaphore, nullptr); + vkDestroyCommandPool(get_device().get_handle(), compute.command_pool, nullptr); + + ocean.pipelines._default.destroy(get_device().get_handle()); + ocean.pipelines.wireframe.destroy(get_device().get_handle()); + vkDestroyDescriptorSetLayout(get_device().get_handle(), ocean.descriptor_set_layout, nullptr); + vkDestroySemaphore(get_device().get_handle(), ocean.semaphore, nullptr); + } +} + +bool SubgroupsOperations::prepare(const vkb::ApplicationOptions &options) +{ + if (!ApiVulkanSample::prepare(options)) + { + return false; + } + + ocean.graphics_queue_family_index = get_device().get_queue_by_flags(VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT, 0).get_family_index(); + + load_assets(); + setup_descriptor_pool(); + prepare_uniform_buffers(); + prepare_compute(); + + // prepare grpahics pipeline + create_semaphore(); + create_descriptor_set_layout(); + + create_initial_tides(); + create_tildas(); + create_butterfly_texture(); + create_fft(); + create_fft_inversion(); + create_fft_normal_map(); + + create_descriptor_set(); + create_pipelines(); + + build_compute_command_buffer(); + + build_command_buffers(); + + // signal semaphore + VkSubmitInfo submit_info = vkb::initializers::submit_info(); + submit_info.signalSemaphoreCount = 1u; + submit_info.pSignalSemaphores = &ocean.semaphore; + VK_CHECK(vkQueueSubmit(queue, 1u, &submit_info, VK_NULL_HANDLE)); + get_device().wait_idle(); + + prepared = true; + return true; +} + +void SubgroupsOperations::prepare_compute() +{ + create_compute_queue(); + create_compute_command_pool(); + create_compute_command_buffer(); +} + +void SubgroupsOperations::create_compute_queue() +{ + // create compute queue and get family index + compute.queue_family_index = get_device().get_queue_by_flags(VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT, 0).get_family_index(); + vkGetDeviceQueue(get_device().get_handle(), compute.queue_family_index, 0u, &compute.queue); +} + +void SubgroupsOperations::create_compute_command_pool() +{ + VkCommandPoolCreateInfo command_pool_create_info = {}; + command_pool_create_info.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; + command_pool_create_info.queueFamilyIndex = compute.queue_family_index; + command_pool_create_info.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT; + VK_CHECK(vkCreateCommandPool(get_device().get_handle(), &command_pool_create_info, nullptr, &compute.command_pool)); +} + +void SubgroupsOperations::create_compute_command_buffer() +{ + // Create a command buffer for compute operations + VkCommandBufferAllocateInfo command_buffer_allocate_info = + vkb::initializers::command_buffer_allocate_info(compute.command_pool, VK_COMMAND_BUFFER_LEVEL_PRIMARY, 1u); + + VK_CHECK(vkAllocateCommandBuffers(get_device().get_handle(), &command_buffer_allocate_info, &compute.command_buffer)); +} + +void SubgroupsOperations::build_compute_command_buffer() +{ + // record compute command + VkCommandBufferBeginInfo begin_info = vkb::initializers::command_buffer_begin_info(); + VK_CHECK(vkBeginCommandBuffer(compute.command_buffer, &begin_info)); + + { + VkBufferMemoryBarrier memory_barrier = vkb::initializers::buffer_memory_barrier(); + memory_barrier.buffer = bit_reverse_buffer->get_handle(); + memory_barrier.offset = 0u; + memory_barrier.size = bit_reverse_buffer->get_size(); + memory_barrier.srcAccessMask = VK_ACCESS_MEMORY_READ_BIT; + memory_barrier.dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT; + memory_barrier.srcQueueFamilyIndex = ocean.graphics_queue_family_index; + memory_barrier.dstQueueFamilyIndex = compute.queue_family_index; + + vkCmdPipelineBarrier(compute.command_buffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0u, 0u, nullptr, 1u, &memory_barrier, 0u, nullptr); + } + + // butterfly texture + { +#ifdef DEBUG_SUBGROUPS_SWITCH_ENABLE + vkCmdBindPipeline(compute.command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, ui.subgroups_enabled ? precompute.pipeline.pipeline : precompute.pipeline_subgroups_off.pipeline); +#else + vkCmdBindPipeline(compute.command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, precompute.pipeline.pipeline); +#endif + vkCmdBindDescriptorSets(compute.command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, precompute.pipeline.pipeline_layout, 0u, 1u, &precompute.descriptor_set, 0u, nullptr); + + vkCmdDispatch(compute.command_buffer, 1u, grid_size, 1u); + + VkImageMemoryBarrier img_barrier = vkb::initializers::image_memory_barrier(); + img_barrier.image = butterfly_precomp.image; + img_barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + img_barrier.subresourceRange.baseMipLevel = 0u; + img_barrier.subresourceRange.levelCount = 1u; + img_barrier.subresourceRange.baseArrayLayer = 0u; + img_barrier.subresourceRange.layerCount = 1u; + img_barrier.oldLayout = VK_IMAGE_LAYOUT_GENERAL; + img_barrier.newLayout = VK_IMAGE_LAYOUT_GENERAL; + img_barrier.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT; + img_barrier.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT; + + vkCmdPipelineBarrier(compute.command_buffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0u, 0u, nullptr, 0u, nullptr, 1u, &img_barrier); + } + + // initial tildes textures + { +#ifdef DEBUG_SUBGROUPS_SWITCH_ENABLE + vkCmdBindPipeline(compute.command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, ui.subgroups_enabled ? initial_tildes.pipeline.pipeline : initial_tildes.pipeline_subgroups_off.pipeline); +#else + vkCmdBindPipeline(compute.command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, initial_tildes.pipeline.pipeline); +#endif + vkCmdBindDescriptorSets(compute.command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, initial_tildes.pipeline.pipeline_layout, 0u, 1u, &initial_tildes.descriptor_set, 0u, nullptr); + + vkCmdDispatch(compute.command_buffer, grid_size / 32u, grid_size, 1u); + { + VkBufferMemoryBarrier memory_barrier = vkb::initializers::buffer_memory_barrier(); + memory_barrier.buffer = fft_buffers.fft_input_random->get_handle(); + memory_barrier.offset = 0u; + memory_barrier.size = fft_buffers.fft_input_random->get_size(); + memory_barrier.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT; + memory_barrier.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT; + ; + + vkCmdPipelineBarrier(compute.command_buffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0u, 0u, nullptr, 1u, &memory_barrier, 0u, nullptr); + } + + { + VkMemoryBarrier memory_barrier = vkb::initializers::memory_barrier(); + memory_barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT; + memory_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; + + vkCmdPipelineBarrier(compute.command_buffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0u, 1u, &memory_barrier, 0u, nullptr, 0u, nullptr); + } + } + + // tildes textures + { +#ifdef DEBUG_SUBGROUPS_SWITCH_ENABLE + vkCmdBindPipeline(compute.command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, ui.subgroups_enabled ? tildes.pipeline.pipeline : tildes.pipeline_subgroups_off.pipeline); +#else + vkCmdBindPipeline(compute.command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, tildes.pipeline.pipeline); +#endif + vkCmdBindDescriptorSets(compute.command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, tildes.pipeline.pipeline_layout, 0u, 1u, &tildes.descriptor_set, 0u, nullptr); + + vkCmdDispatch(compute.command_buffer, grid_size / 8u, grid_size, 1u); + + VkImageMemoryBarrier img_barrier = vkb::initializers::image_memory_barrier(); + img_barrier.image = fft_buffers.fft_input_htilde0->image; + img_barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + img_barrier.subresourceRange.baseMipLevel = 0u; + img_barrier.subresourceRange.levelCount = 1u; + img_barrier.subresourceRange.baseArrayLayer = 0u; + img_barrier.subresourceRange.layerCount = 1u; + img_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; + img_barrier.newLayout = VK_IMAGE_LAYOUT_GENERAL; + img_barrier.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT; + img_barrier.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT; + + vkCmdPipelineBarrier(compute.command_buffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0u, 0u, nullptr, 0u, nullptr, 1u, &img_barrier); + } + + // fft horizontal; for Y axis + // shader: fft.comp + // layout (binding = 0, rgba32f) readonly uniform image2D u_butterfly_precomp; -> image_descriptor_butterfly + // layout (binding = 1, rgba32f) uniform image2D u_pingpong0; -> image_descriptor_tilda_y + // layout (binding = 2, rgba32f) uniform image2D u_pingpong1; -> image_descriptor_tilde_axis_y + { +#ifdef DEBUG_SUBGROUPS_SWITCH_ENABLE + vkCmdBindPipeline(compute.command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, ui.subgroups_enabled ? fft.pipelines.horizontal.pipeline : fft.pipelines.horizontal_subgroups_off.pipeline); +#else + vkCmdBindPipeline(compute.command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, fft.pipelines.horizontal.pipeline); +#endif + vkCmdBindDescriptorSets(compute.command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, fft.pipelines.horizontal.pipeline_layout, 0u, 1u, &fft.descriptor_set_axis_y, 0u, nullptr); + + for (uint32_t i = 0; i < log_2_N; ++i) + { + vkCmdPushConstants(compute.command_buffer, fft.pipelines.horizontal.pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(uint32_t), &i); + vkCmdDispatch(compute.command_buffer, grid_size / 32u, grid_size, 1u); + + { + VkImageMemoryBarrier img_barrier[2] = {vkb::initializers::image_memory_barrier(), vkb::initializers::image_memory_barrier()}; + img_barrier[0].image = fft_buffers.fft_tilde_h_kt_dy->image; + img_barrier[0].subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + img_barrier[0].subresourceRange.baseMipLevel = 0u; + img_barrier[0].subresourceRange.levelCount = 1u; + img_barrier[0].subresourceRange.baseArrayLayer = 0u; + img_barrier[0].subresourceRange.layerCount = 1u; + img_barrier[0].oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; + img_barrier[0].newLayout = VK_IMAGE_LAYOUT_GENERAL; + img_barrier[0].srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT; + img_barrier[0].dstAccessMask = VK_ACCESS_MEMORY_READ_BIT; + + img_barrier[1].image = fft.tilde_axis_y->image; + img_barrier[1].subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + img_barrier[1].subresourceRange.baseMipLevel = 0u; + img_barrier[1].subresourceRange.levelCount = 1u; + img_barrier[1].subresourceRange.baseArrayLayer = 0u; + img_barrier[1].subresourceRange.layerCount = 1u; + img_barrier[1].oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; + img_barrier[1].newLayout = VK_IMAGE_LAYOUT_GENERAL; + img_barrier[1].srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT; + img_barrier[1].dstAccessMask = VK_ACCESS_MEMORY_READ_BIT; + + vkCmdPipelineBarrier(compute.command_buffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0u, 0u, nullptr, 0u, nullptr, 2u, img_barrier); + } + } + } + + // fft horizontal; for X axis + // shader: fft.comp + // layout (binding = 0, rgba32f) readonly uniform image2D u_butterfly_precomp; -> image_descriptor_butterfly + // layout (binding = 1, rgba32f) uniform image2D u_pingpong0; -> image_descriptor_tilda_x + // layout (binding = 2, rgba32f) uniform image2D u_pingpong1; -> image_descriptor_tilde_axis_x + { +#ifdef DEBUG_SUBGROUPS_SWITCH_ENABLE + vkCmdBindPipeline(compute.command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, ui.subgroups_enabled ? fft.pipelines.horizontal.pipeline : fft.pipelines.horizontal_subgroups_off.pipeline); +#else + vkCmdBindPipeline(compute.command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, fft.pipelines.horizontal.pipeline); +#endif + vkCmdBindDescriptorSets(compute.command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, fft.pipelines.horizontal.pipeline_layout, 0u, 1u, &fft.descriptor_set_axis_x, 0u, nullptr); + + for (uint32_t i = 0; i < log_2_N; ++i) + { + vkCmdPushConstants(compute.command_buffer, fft.pipelines.horizontal.pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(uint32_t), &i); + vkCmdDispatch(compute.command_buffer, grid_size / 32u, grid_size, 1u); + + if ((i % 2) == 0) + { + { + VkImageMemoryBarrier img_barrier = vkb::initializers::image_memory_barrier(); + img_barrier.image = fft_buffers.fft_tilde_h_kt_dx->image; + img_barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + img_barrier.subresourceRange.baseMipLevel = 0u; + img_barrier.subresourceRange.levelCount = 1u; + img_barrier.subresourceRange.baseArrayLayer = 0u; + img_barrier.subresourceRange.layerCount = 1u; + img_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; + img_barrier.newLayout = VK_IMAGE_LAYOUT_GENERAL; + img_barrier.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT; + img_barrier.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT; + + vkCmdPipelineBarrier(compute.command_buffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0u, 0u, nullptr, 0u, nullptr, 1u, &img_barrier); + } + } + else + { + { + VkImageMemoryBarrier img_barrier = vkb::initializers::image_memory_barrier(); + img_barrier.image = fft.tilde_axis_x->image; + img_barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + img_barrier.subresourceRange.baseMipLevel = 0u; + img_barrier.subresourceRange.levelCount = 1u; + img_barrier.subresourceRange.baseArrayLayer = 0u; + img_barrier.subresourceRange.layerCount = 1u; + img_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; + img_barrier.newLayout = VK_IMAGE_LAYOUT_GENERAL; + img_barrier.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT; + img_barrier.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT; + + vkCmdPipelineBarrier(compute.command_buffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0u, 0u, nullptr, 0u, nullptr, 1u, &img_barrier); + } + } + } + } + + // fft horizontal; for Z axis + { +#ifdef DEBUG_SUBGROUPS_SWITCH_ENABLE + vkCmdBindPipeline(compute.command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, ui.subgroups_enabled ? fft.pipelines.horizontal.pipeline : fft.pipelines.horizontal_subgroups_off.pipeline); +#else + vkCmdBindPipeline(compute.command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, fft.pipelines.horizontal.pipeline); +#endif + vkCmdBindDescriptorSets(compute.command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, fft.pipelines.horizontal.pipeline_layout, 0u, 1u, &fft.descriptor_set_axis_z, 0u, nullptr); + + for (uint32_t i = 0; i < log_2_N; ++i) + { + vkCmdPushConstants(compute.command_buffer, fft.pipelines.horizontal.pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(uint32_t), &i); + vkCmdDispatch(compute.command_buffer, grid_size / 32u, grid_size, 1u); + + { + VkImageMemoryBarrier img_barrier = vkb::initializers::image_memory_barrier(); + img_barrier.image = fft_buffers.fft_tilde_h_kt_dz->image; + img_barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + img_barrier.subresourceRange.baseMipLevel = 0u; + img_barrier.subresourceRange.levelCount = 1u; + img_barrier.subresourceRange.baseArrayLayer = 0u; + img_barrier.subresourceRange.layerCount = 1u; + img_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; + img_barrier.newLayout = VK_IMAGE_LAYOUT_GENERAL; + img_barrier.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT; + img_barrier.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT; + + vkCmdPipelineBarrier(compute.command_buffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0u, 0u, nullptr, 0u, nullptr, 1u, &img_barrier); + } + { + VkImageMemoryBarrier img_barrier = vkb::initializers::image_memory_barrier(); + img_barrier.image = fft.tilde_axis_z->image; + img_barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + img_barrier.subresourceRange.baseMipLevel = 0u; + img_barrier.subresourceRange.levelCount = 1u; + img_barrier.subresourceRange.baseArrayLayer = 0u; + img_barrier.subresourceRange.layerCount = 1u; + img_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; + img_barrier.newLayout = VK_IMAGE_LAYOUT_GENERAL; + img_barrier.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT; + img_barrier.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT; + + vkCmdPipelineBarrier(compute.command_buffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0u, 0u, nullptr, 0u, nullptr, 1u, &img_barrier); + } + } + } + + // fft vertical; for Y axis + { +#ifdef DEBUG_SUBGROUPS_SWITCH_ENABLE + vkCmdBindPipeline(compute.command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, ui.subgroups_enabled ? fft.pipelines.vertical.pipeline : fft.pipelines.vertical_subgroups_off.pipeline); +#else + vkCmdBindPipeline(compute.command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, fft.pipelines.vertical.pipeline); +#endif + vkCmdBindDescriptorSets(compute.command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, fft.pipelines.vertical.pipeline_layout, 0u, 1u, &fft.descriptor_set_axis_y, 0u, nullptr); + + for (uint32_t i = 0; i < log_2_N; ++i) + { + vkCmdPushConstants(compute.command_buffer, fft.pipelines.vertical.pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(uint32_t), &i); + vkCmdDispatch(compute.command_buffer, grid_size / 32u, grid_size, 1u); + + { + VkImageMemoryBarrier img_barrier = vkb::initializers::image_memory_barrier(); + img_barrier.image = fft_buffers.fft_tilde_h_kt_dy->image; + img_barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + img_barrier.subresourceRange.baseMipLevel = 0u; + img_barrier.subresourceRange.levelCount = 1u; + img_barrier.subresourceRange.baseArrayLayer = 0u; + img_barrier.subresourceRange.layerCount = 1u; + img_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; + img_barrier.newLayout = VK_IMAGE_LAYOUT_GENERAL; + img_barrier.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT; + img_barrier.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT; + + vkCmdPipelineBarrier(compute.command_buffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0u, 0u, nullptr, 0u, nullptr, 1u, &img_barrier); + } + { + VkImageMemoryBarrier img_barrier = vkb::initializers::image_memory_barrier(); + img_barrier.image = fft.tilde_axis_y->image; + img_barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + img_barrier.subresourceRange.baseMipLevel = 0u; + img_barrier.subresourceRange.levelCount = 1u; + img_barrier.subresourceRange.baseArrayLayer = 0u; + img_barrier.subresourceRange.layerCount = 1u; + img_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; + img_barrier.newLayout = VK_IMAGE_LAYOUT_GENERAL; + img_barrier.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT; + img_barrier.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT; + + vkCmdPipelineBarrier(compute.command_buffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0u, 0u, nullptr, 0u, nullptr, 1u, &img_barrier); + } + } + } + + // fft vertical; for X axis + { +#ifdef DEBUG_SUBGROUPS_SWITCH_ENABLE + vkCmdBindPipeline(compute.command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, ui.subgroups_enabled ? fft.pipelines.vertical.pipeline : fft.pipelines.vertical_subgroups_off.pipeline); +#else + vkCmdBindPipeline(compute.command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, fft.pipelines.vertical.pipeline); +#endif + vkCmdBindDescriptorSets(compute.command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, fft.pipelines.vertical.pipeline_layout, 0u, 1u, &fft.descriptor_set_axis_x, 0u, nullptr); + + for (uint32_t i = 0; i < log_2_N; ++i) + { + vkCmdPushConstants(compute.command_buffer, fft.pipelines.vertical.pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(uint32_t), &i); + vkCmdDispatch(compute.command_buffer, grid_size / 32u, grid_size, 1u); + + { + VkImageMemoryBarrier img_barrier = vkb::initializers::image_memory_barrier(); + img_barrier.image = fft_buffers.fft_tilde_h_kt_dx->image; + img_barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + img_barrier.subresourceRange.baseMipLevel = 0u; + img_barrier.subresourceRange.levelCount = 1u; + img_barrier.subresourceRange.baseArrayLayer = 0u; + img_barrier.subresourceRange.layerCount = 1u; + img_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; + img_barrier.newLayout = VK_IMAGE_LAYOUT_GENERAL; + img_barrier.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT; + img_barrier.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT; + + vkCmdPipelineBarrier(compute.command_buffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0u, 0u, nullptr, 0u, nullptr, 1u, &img_barrier); + } + { + VkImageMemoryBarrier img_barrier = vkb::initializers::image_memory_barrier(); + img_barrier.image = fft.tilde_axis_x->image; + img_barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + img_barrier.subresourceRange.baseMipLevel = 0u; + img_barrier.subresourceRange.levelCount = 1u; + img_barrier.subresourceRange.baseArrayLayer = 0u; + img_barrier.subresourceRange.layerCount = 1u; + img_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; + img_barrier.newLayout = VK_IMAGE_LAYOUT_GENERAL; + img_barrier.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT; + img_barrier.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT; + + vkCmdPipelineBarrier(compute.command_buffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0u, 0u, nullptr, 0u, nullptr, 1u, &img_barrier); + } + } + } + + // fft vertical; for Z axis + { +#ifdef DEBUG_SUBGROUPS_SWITCH_ENABLE + vkCmdBindPipeline(compute.command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, ui.subgroups_enabled ? fft.pipelines.vertical.pipeline : fft.pipelines.vertical_subgroups_off.pipeline); +#else + vkCmdBindPipeline(compute.command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, fft.pipelines.vertical.pipeline); +#endif + vkCmdBindDescriptorSets(compute.command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, fft.pipelines.vertical.pipeline_layout, 0u, 1u, &fft.descriptor_set_axis_z, 0u, nullptr); + + for (uint32_t i = 0; i < log_2_N; ++i) + { + vkCmdPushConstants(compute.command_buffer, fft.pipelines.vertical.pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(uint32_t), &i); + vkCmdDispatch(compute.command_buffer, grid_size / 32u, grid_size, 1u); + + { + VkImageMemoryBarrier img_barrier = vkb::initializers::image_memory_barrier(); + img_barrier.image = fft_buffers.fft_tilde_h_kt_dz->image; + img_barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + img_barrier.subresourceRange.baseMipLevel = 0u; + img_barrier.subresourceRange.levelCount = 1u; + img_barrier.subresourceRange.baseArrayLayer = 0u; + img_barrier.subresourceRange.layerCount = 1u; + img_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; + img_barrier.newLayout = VK_IMAGE_LAYOUT_GENERAL; + img_barrier.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT; + img_barrier.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT; + + vkCmdPipelineBarrier(compute.command_buffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0u, 0u, nullptr, 0u, nullptr, 1u, &img_barrier); + } + { + VkImageMemoryBarrier img_barrier = vkb::initializers::image_memory_barrier(); + img_barrier.image = fft.tilde_axis_z->image; + img_barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + img_barrier.subresourceRange.baseMipLevel = 0u; + img_barrier.subresourceRange.levelCount = 1u; + img_barrier.subresourceRange.baseArrayLayer = 0u; + img_barrier.subresourceRange.layerCount = 1u; + img_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; + img_barrier.newLayout = VK_IMAGE_LAYOUT_GENERAL; + img_barrier.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT; + img_barrier.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT; + + vkCmdPipelineBarrier(compute.command_buffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0u, 0u, nullptr, 0u, nullptr, 1u, &img_barrier); + } + } + } + + { + VkMemoryBarrier memory_barrier = vkb::initializers::memory_barrier(); + memory_barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT; + memory_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; + + vkCmdPipelineBarrier(compute.command_buffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0u, 1u, &memory_barrier, 0u, nullptr, 0u, nullptr); + } + + // fft inverse + { +#ifdef DEBUG_SUBGROUPS_SWITCH_ENABLE + vkCmdBindPipeline(compute.command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, ui.subgroups_enabled ? fft_inversion.pipeline.pipeline : fft_inversion.pipeline_subgroups_off.pipeline); +#else + vkCmdBindPipeline(compute.command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, fft_inversion.pipeline.pipeline); +#endif + vkCmdBindDescriptorSets(compute.command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, fft_inversion.pipeline.pipeline_layout, 0u, 1u, &fft_inversion.descriptor_set, 0u, nullptr); + vkCmdDispatch(compute.command_buffer, grid_size / 32u, grid_size, 1u); + { + VkImageMemoryBarrier img_barrier = vkb::initializers::image_memory_barrier(); + img_barrier.image = fft_buffers.fft_displacement->image; + img_barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + img_barrier.subresourceRange.baseMipLevel = 0u; + img_barrier.subresourceRange.levelCount = 1u; + img_barrier.subresourceRange.baseArrayLayer = 0u; + img_barrier.subresourceRange.layerCount = 1u; + img_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; + img_barrier.newLayout = VK_IMAGE_LAYOUT_GENERAL; + img_barrier.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT; + img_barrier.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT; + + vkCmdPipelineBarrier(compute.command_buffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0u, 0u, nullptr, 0u, nullptr, 1u, &img_barrier); + } + } + + { + VkMemoryBarrier memory_barrier = vkb::initializers::memory_barrier(); + memory_barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT; + memory_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; + + vkCmdPipelineBarrier(compute.command_buffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0u, 1u, &memory_barrier, 0u, nullptr, 0u, nullptr); + } + + // fft normal map + { +#ifdef DEBUG_SUBGROUPS_SWITCH_ENABLE + vkCmdBindPipeline(compute.command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, ui.subgroups_enabled ? fft_normal_map.pipeline.pipeline : fft_normal_map.pipeline_subgroups_off.pipeline); +#else + vkCmdBindPipeline(compute.command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, fft_normal_map.pipeline.pipeline); +#endif + vkCmdBindDescriptorSets(compute.command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, fft_normal_map.pipeline.pipeline_layout, 0u, 1u, &fft_normal_map.descriptor_set, 0u, nullptr); + vkCmdDispatch(compute.command_buffer, grid_size / 32u, grid_size, 1u); + } + if (ocean.graphics_queue_family_index != compute.queue_family_index) + { + VkImageMemoryBarrier img_barrier = vkb::initializers::image_memory_barrier(); + img_barrier.image = fft_buffers.fft_normal_map->image; + img_barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + img_barrier.subresourceRange.baseMipLevel = 0u; + img_barrier.subresourceRange.levelCount = 1u; + img_barrier.subresourceRange.baseArrayLayer = 0u; + img_barrier.subresourceRange.layerCount = 1u; + img_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; + img_barrier.newLayout = VK_IMAGE_LAYOUT_GENERAL; + img_barrier.srcQueueFamilyIndex = compute.queue_family_index; + img_barrier.dstQueueFamilyIndex = ocean.graphics_queue_family_index; + img_barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT; + img_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; + ; + + vkCmdPipelineBarrier(compute.command_buffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0u, 0u, nullptr, 0u, nullptr, 1u, &img_barrier); + } + + vkCmdPipelineBarrier(compute.command_buffer, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0u, 0u, nullptr, 0u, nullptr, 0u, nullptr); + + VK_CHECK(vkEndCommandBuffer(compute.command_buffer)); +} + +void SubgroupsOperations::request_gpu_features(vkb::PhysicalDevice &gpu) +{ + if (gpu.get_features().samplerAnisotropy) + { + gpu.get_mutable_requested_features().samplerAnisotropy = VK_TRUE; + } + + if (gpu.get_features().fillModeNonSolid) + { + gpu.get_mutable_requested_features().fillModeNonSolid = VK_TRUE; + } + + if (gpu.get_features().vertexPipelineStoresAndAtomics) + { + gpu.get_mutable_requested_features().vertexPipelineStoresAndAtomics = VK_TRUE; + } + + if (gpu.get_features().tessellationShader) + { + gpu.get_mutable_requested_features().tessellationShader = VK_TRUE; + } + else + { + throw vkb::VulkanException(VK_ERROR_FEATURE_NOT_PRESENT, "Selected GPU does not support tessellation shaders!"); + } + + subgroups_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES; + subgroups_properties.pNext = VK_NULL_HANDLE; + + VkPhysicalDeviceProperties2 device_properties2 = {}; + device_properties2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2; + device_properties2.pNext = &subgroups_properties; + vkGetPhysicalDeviceProperties2(gpu.get_handle(), &device_properties2); + + if (subgroups_properties.supportedOperations & VK_SUBGROUP_FEATURE_BASIC_BIT) + ui.supported_features.push_back("VK_SUBGROUP_FEATURE_BASIC_BIT"); + if (subgroups_properties.supportedOperations & VK_SUBGROUP_FEATURE_VOTE_BIT) + ui.supported_features.push_back("VK_SUBGROUP_FEATURE_VOTE_BIT"); + if (subgroups_properties.supportedOperations & VK_SUBGROUP_FEATURE_ARITHMETIC_BIT) + ui.supported_features.push_back("VK_SUBGROUP_FEATURE_ARITHMETIC_BIT"); + if (subgroups_properties.supportedOperations & VK_SUBGROUP_FEATURE_BALLOT_BIT) + ui.supported_features.push_back("VK_SUBGROUP_FEATURE_BALLOT_BIT"); + if (subgroups_properties.supportedOperations & VK_SUBGROUP_FEATURE_SHUFFLE_BIT) + ui.supported_features.push_back("VK_SUBGROUP_FEATURE_SHUFFLE_BIT"); + if (subgroups_properties.supportedOperations & VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT) + ui.supported_features.push_back("VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT"); + if (subgroups_properties.supportedOperations & VK_SUBGROUP_FEATURE_CLUSTERED_BIT) + ui.supported_features.push_back("VK_SUBGROUP_FEATURE_CLUSTERED_BIT"); + if (subgroups_properties.supportedOperations & VK_SUBGROUP_FEATURE_QUAD_BIT) + ui.supported_features.push_back("VK_SUBGROUP_FEATURE_QUAD_BIT"); + if (subgroups_properties.supportedOperations & VK_SUBGROUP_FEATURE_PARTITIONED_BIT_NV) + ui.supported_features.push_back("VK_SUBGROUP_FEATURE_PARTITIONED_BIT_NV"); + if (subgroups_properties.supportedOperations & VK_SUBGROUP_FEATURE_ROTATE_BIT_KHR) + ui.supported_features.push_back("VK_SUBGROUP_FEATURE_ROTATE_BIT_KHR"); + if (subgroups_properties.supportedOperations & VK_SUBGROUP_FEATURE_ROTATE_CLUSTERED_BIT_KHR) + ui.supported_features.push_back("VK_SUBGROUP_FEATURE_ROTATE_CLUSTERED_BIT_KHR"); +} + +void SubgroupsOperations::create_initial_tides() +{ + std::vector set_layout_bindings = { + vkb::initializers::descriptor_set_layout_binding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_SHADER_STAGE_COMPUTE_BIT, 0u), + vkb::initializers::descriptor_set_layout_binding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_SHADER_STAGE_COMPUTE_BIT, 1u), + vkb::initializers::descriptor_set_layout_binding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT, 2u), + vkb::initializers::descriptor_set_layout_binding(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT, 3u)}; + + VkDescriptorSetLayoutCreateInfo descriptor_layout = vkb::initializers::descriptor_set_layout_create_info(set_layout_bindings); + VK_CHECK(vkCreateDescriptorSetLayout(get_device().get_handle(), &descriptor_layout, nullptr, &initial_tildes.descriptor_set_layout)); + + VkDescriptorSetAllocateInfo alloc_info = vkb::initializers::descriptor_set_allocate_info(descriptor_pool, &initial_tildes.descriptor_set_layout, 1u); + VK_CHECK(vkAllocateDescriptorSets(get_device().get_handle(), &alloc_info, &initial_tildes.descriptor_set)); + + VkPipelineLayoutCreateInfo compute_pipeline_layout_info = vkb::initializers::pipeline_layout_create_info(&initial_tildes.descriptor_set_layout); + + VK_CHECK(vkCreatePipelineLayout(get_device().get_handle(), &compute_pipeline_layout_info, nullptr, &initial_tildes.pipeline.pipeline_layout)); + + VkComputePipelineCreateInfo computeInfo = vkb::initializers::compute_pipeline_create_info(initial_tildes.pipeline.pipeline_layout); + computeInfo.stage = load_shader("subgroups_operations/fft_tilde_h0.comp", VK_SHADER_STAGE_COMPUTE_BIT); + + VK_CHECK(vkCreateComputePipelines(get_device().get_handle(), pipeline_cache, 1u, &computeInfo, nullptr, &initial_tildes.pipeline.pipeline)); + +#ifdef DEBUG_SUBGROUPS_SWITCH_ENABLE + computeInfo.stage = load_shader("subgroups_operations/fft_tilde_h0_subgroups_off.comp", VK_SHADER_STAGE_COMPUTE_BIT); + VK_CHECK(vkCreateComputePipelines(get_device().get_handle(), pipeline_cache, 1u, &computeInfo, nullptr, &initial_tildes.pipeline_subgroups_off.pipeline)); +#endif + + fft_buffers.fft_input_htilde0 = std::make_unique(); + fft_buffers.fft_input_htilde0_conj = std::make_unique(); + + create_image_attachement(VK_FORMAT_R32G32B32A32_SFLOAT, grid_size, grid_size, *fft_buffers.fft_input_htilde0); + create_image_attachement(VK_FORMAT_R32G32B32A32_SFLOAT, grid_size, grid_size, *fft_buffers.fft_input_htilde0_conj); + + VkDescriptorImageInfo htilde_0_descriptor = create_ia_descriptor(*fft_buffers.fft_input_htilde0); + VkDescriptorImageInfo htilde_conj_0_descriptor = create_ia_descriptor(*fft_buffers.fft_input_htilde0_conj); + VkDescriptorBufferInfo input_random_descriptor = create_descriptor(*fft_buffers.fft_input_random); + VkDescriptorBufferInfo fft_params_ubo_buffer = create_descriptor(*fft_params_ubo); + + std::vector write_descriptor_sets = { + vkb::initializers::write_descriptor_set(initial_tildes.descriptor_set, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 0u, &htilde_0_descriptor), + vkb::initializers::write_descriptor_set(initial_tildes.descriptor_set, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1u, &htilde_conj_0_descriptor), + vkb::initializers::write_descriptor_set(initial_tildes.descriptor_set, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 2u, &input_random_descriptor), + vkb::initializers::write_descriptor_set(initial_tildes.descriptor_set, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 3u, &fft_params_ubo_buffer)}; + vkUpdateDescriptorSets(get_device().get_handle(), static_cast(write_descriptor_sets.size()), write_descriptor_sets.data(), 0u, nullptr); +} + +void SubgroupsOperations::create_tildas() +{ + fft_buffers.fft_tilde_h_kt_dx = std::make_unique(); + fft_buffers.fft_tilde_h_kt_dy = std::make_unique(); + fft_buffers.fft_tilde_h_kt_dz = std::make_unique(); + + create_image_attachement(VK_FORMAT_R32G32B32A32_SFLOAT, grid_size, grid_size, *fft_buffers.fft_tilde_h_kt_dx); + create_image_attachement(VK_FORMAT_R32G32B32A32_SFLOAT, grid_size, grid_size, *fft_buffers.fft_tilde_h_kt_dy); + create_image_attachement(VK_FORMAT_R32G32B32A32_SFLOAT, grid_size, grid_size, *fft_buffers.fft_tilde_h_kt_dz); + + std::vector set_layout_bindings = { + vkb::initializers::descriptor_set_layout_binding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_SHADER_STAGE_COMPUTE_BIT, 0u), + vkb::initializers::descriptor_set_layout_binding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_SHADER_STAGE_COMPUTE_BIT, 1u), + vkb::initializers::descriptor_set_layout_binding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_SHADER_STAGE_COMPUTE_BIT, 2u), + vkb::initializers::descriptor_set_layout_binding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_SHADER_STAGE_COMPUTE_BIT, 3u), + vkb::initializers::descriptor_set_layout_binding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_SHADER_STAGE_COMPUTE_BIT, 4u), + vkb::initializers::descriptor_set_layout_binding(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT, 5u), + vkb::initializers::descriptor_set_layout_binding(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT, 6u)}; + VkDescriptorSetLayoutCreateInfo descriptor_layout = vkb::initializers::descriptor_set_layout_create_info(set_layout_bindings); + VK_CHECK(vkCreateDescriptorSetLayout(get_device().get_handle(), &descriptor_layout, nullptr, &tildes.descriptor_set_layout)); + + VkDescriptorSetAllocateInfo alloc_info = vkb::initializers::descriptor_set_allocate_info(descriptor_pool, &tildes.descriptor_set_layout, 1u); + VK_CHECK(vkAllocateDescriptorSets(get_device().get_handle(), &alloc_info, &tildes.descriptor_set)); + + VkPipelineLayoutCreateInfo compute_pipeline_layout_info = vkb::initializers::pipeline_layout_create_info(&tildes.descriptor_set_layout); + + VK_CHECK(vkCreatePipelineLayout(get_device().get_handle(), &compute_pipeline_layout_info, nullptr, &tildes.pipeline.pipeline_layout)); + + VkComputePipelineCreateInfo computeInfo = vkb::initializers::compute_pipeline_create_info(tildes.pipeline.pipeline_layout); + computeInfo.stage = load_shader("subgroups_operations/fft_tilde_h.comp", VK_SHADER_STAGE_COMPUTE_BIT); + + VK_CHECK(vkCreateComputePipelines(get_device().get_handle(), pipeline_cache, 1u, &computeInfo, nullptr, &tildes.pipeline.pipeline)); + + +#ifdef DEBUG_SUBGROUPS_SWITCH_ENABLE + computeInfo.stage = load_shader("subgroups_operations/fft_tilde_h_subgroups_off.comp", VK_SHADER_STAGE_COMPUTE_BIT); + VK_CHECK(vkCreateComputePipelines(get_device().get_handle(), pipeline_cache, 1u, &computeInfo, nullptr, &tildes.pipeline_subgroups_off.pipeline)); +#endif + + VkDescriptorImageInfo htilde_0_descriptor = create_ia_descriptor(*fft_buffers.fft_input_htilde0); + VkDescriptorImageInfo htilde_conj_0_descriptor = create_ia_descriptor(*fft_buffers.fft_input_htilde0_conj); + + VkDescriptorImageInfo image_dx_descriptor = create_ia_descriptor(*fft_buffers.fft_tilde_h_kt_dx); + VkDescriptorImageInfo image_dy_descriptor = create_ia_descriptor(*fft_buffers.fft_tilde_h_kt_dy); + VkDescriptorImageInfo image_dz_descriptor = create_ia_descriptor(*fft_buffers.fft_tilde_h_kt_dz); + + VkDescriptorBufferInfo fft_params_ubo_buffer = create_descriptor(*fft_params_ubo); + VkDescriptorBufferInfo fft_time_ubo_buffer = create_descriptor(*fft_time_ubo); + + std::vector write_descriptor_sets = { + vkb::initializers::write_descriptor_set(tildes.descriptor_set, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 0u, &htilde_0_descriptor), + vkb::initializers::write_descriptor_set(tildes.descriptor_set, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1u, &htilde_conj_0_descriptor), + vkb::initializers::write_descriptor_set(tildes.descriptor_set, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 2u, &image_dx_descriptor), + vkb::initializers::write_descriptor_set(tildes.descriptor_set, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 3u, &image_dy_descriptor), + vkb::initializers::write_descriptor_set(tildes.descriptor_set, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 4u, &image_dz_descriptor), + vkb::initializers::write_descriptor_set(tildes.descriptor_set, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 5u, &fft_params_ubo_buffer), + vkb::initializers::write_descriptor_set(tildes.descriptor_set, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 6u, &fft_time_ubo_buffer)}; + vkUpdateDescriptorSets(get_device().get_handle(), static_cast(write_descriptor_sets.size()), write_descriptor_sets.data(), 0u, nullptr); +} + +void SubgroupsOperations::load_assets() +{ + generate_plane(); + ui.wind.recalc(); + + log_2_N = static_cast(glm::log2(static_cast(grid_size))); + + input_random.clear(); + + for (uint32_t m = 0; m < grid_size; ++m) + { + for (uint32_t n = 0; n < grid_size; ++n) + { + glm::vec2 rnd1 = rndGaussian(); + glm::vec2 rnd2 = rndGaussian(); + input_random.emplace_back(rnd1.x, rnd1.y, rnd2.x, rnd2.y); + } + } + + auto input_random_size = static_cast(input_random.size() * sizeof(glm::vec4)); + fft_buffers.fft_input_random = std::make_unique(get_device(), + input_random_size, + VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, + VMA_MEMORY_USAGE_CPU_TO_GPU); + + fft_buffers.fft_input_random->update(input_random.data(), input_random_size); +} + +glm::vec2 SubgroupsOperations::rndGaussian() +{ + float x1, x2, w; + auto rndVal = []() -> float { + std::random_device rndDevice; + std::mt19937 mt(rndDevice()); + std::uniform_real_distribution dis(0.0f, 1.0f); + return dis(mt); + }; + do + { + x1 = 2.0f * rndVal() - 1.0f; + x2 = 2.0f * rndVal() - 1.0f; + w = x1 * x1 + x2 * x2; + } while (w >= 1.0f); + w = glm::sqrt((-2.0f * glm::log(w)) / w); + return glm::vec2{x1 * w, x2 * w}; +} + +void SubgroupsOperations::prepare_uniform_buffers() +{ + camera_ubo = std::make_unique(get_device(), sizeof(CameraUbo), VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VMA_MEMORY_USAGE_CPU_TO_GPU); + camera_position_ubo = std::make_unique(get_device(), sizeof(CameraPositionUbo), VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VMA_MEMORY_USAGE_CPU_TO_GPU); + fft_params_ubo = std::make_unique(get_device(), sizeof(FFTParametersUbo), VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VMA_MEMORY_USAGE_CPU_TO_GPU); + fft_time_ubo = std::make_unique(get_device(), sizeof(TimeUbo), VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VMA_MEMORY_USAGE_CPU_TO_GPU); + invert_fft_ubo = std::make_unique(get_device(), sizeof(FFTInvertUbo), VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VMA_MEMORY_USAGE_CPU_TO_GPU); + tessellation_params_ubo = std::make_unique(get_device(), sizeof(TessellationParamsUbo), VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VMA_MEMORY_USAGE_CPU_TO_GPU); + ocean_params_ubo = std::make_unique(get_device(), sizeof(OceanParamsUbo), VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, VMA_MEMORY_USAGE_CPU_TO_GPU); + + update_uniform_buffers(); +} + +void SubgroupsOperations::generate_plane() +{ + uint32_t dim_gird = grid_size; + uint32_t vertex_count = dim_gird + 1u; + std::vector plane_vertices; + const auto tex_coord_scale = float(grid_size); + std::vector indices; + auto half_grid_size = static_cast(dim_gird / 2); + + for (int32_t z = -half_grid_size; z <= half_grid_size; ++z) + { + for (int32_t x = -half_grid_size; x <= half_grid_size; ++x) + { + float u = static_cast(x) / static_cast(dim_gird) + 0.5f; + float v = static_cast(z) / static_cast(dim_gird) + 0.5f; + Vertex vert; + vert.pos = glm::vec3(float(x), 0.0f, float(z)); + vert.uv = glm::vec2(u, v) * tex_coord_scale; + vert.normal = glm::vec3(0.0f); + + plane_vertices.push_back(vert); + } + } + + for (uint32_t y = 0u; y < dim_gird; ++y) + { + for (uint32_t x = 0u; x < dim_gird; ++x) + { + // tris 1 + indices.push_back((vertex_count * y) + x); + indices.push_back((vertex_count * (y + 1u)) + x); + indices.push_back((vertex_count * y) + x + 1u); + + // tris 2 + indices.push_back((vertex_count * y) + x + 1u); + indices.push_back((vertex_count * (y + 1u)) + x); + indices.push_back((vertex_count * (y + 1u)) + x + 1u); + } + } + + auto vertex_buffer_size = vkb::to_u32(plane_vertices.size() * sizeof(Vertex)); + auto index_buffer_size = vkb::to_u32(indices.size() * sizeof(uint32_t)); + ocean.grid.index_count = vkb::to_u32(indices.size()); + + ocean.grid.vertex = std::make_unique(get_device(), + vertex_buffer_size, + VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, + VMA_MEMORY_USAGE_CPU_TO_GPU); + + ocean.grid.index = std::make_unique(get_device(), + index_buffer_size, + VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT, + VMA_MEMORY_USAGE_CPU_TO_GPU); + + ocean.grid.vertex->update(plane_vertices.data(), vertex_buffer_size); + ocean.grid.index->update(indices.data(), index_buffer_size); +} + +void SubgroupsOperations::create_semaphore() +{ + // Semaphore for graphics queue + { + VkSemaphoreCreateInfo semaphore_create_info = vkb::initializers::semaphore_create_info(); + VK_CHECK(vkCreateSemaphore(get_device().get_handle(), &semaphore_create_info, nullptr, &ocean.semaphore)); + } + + // Semaphore for compute & graphics sync + { + VkSemaphoreCreateInfo semaphore_create_info = vkb::initializers::semaphore_create_info(); + VK_CHECK(vkCreateSemaphore(get_device().get_handle(), &semaphore_create_info, nullptr, &compute.semaphore)); + } +} + +void SubgroupsOperations::setup_descriptor_pool() +{ + std::vector pool_sizes = { + vkb::initializers::descriptor_pool_size(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 20u), + vkb::initializers::descriptor_pool_size(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 20u), + vkb::initializers::descriptor_pool_size(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 20u)}; + VkDescriptorPoolCreateInfo descriptor_pool_create_info = + vkb::initializers::descriptor_pool_create_info(static_cast(pool_sizes.size()), pool_sizes.data(), 15u); + VK_CHECK(vkCreateDescriptorPool(get_device().get_handle(), &descriptor_pool_create_info, nullptr, &descriptor_pool)); +} + +void SubgroupsOperations::create_descriptor_set_layout() +{ + std::vector set_layout_bindings = { + vkb::initializers::descriptor_set_layout_binding( + VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT | VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT | VK_SHADER_STAGE_FRAGMENT_BIT, + 0u), + vkb::initializers::descriptor_set_layout_binding( + VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT | VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT | VK_SHADER_STAGE_FRAGMENT_BIT, + 1u), + vkb::initializers::descriptor_set_layout_binding( + VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT | VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT | VK_SHADER_STAGE_FRAGMENT_BIT, + 2u), + vkb::initializers::descriptor_set_layout_binding( + VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT | VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT | VK_SHADER_STAGE_FRAGMENT_BIT, + 3u), + vkb::initializers::descriptor_set_layout_binding( + VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT | VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT | VK_SHADER_STAGE_FRAGMENT_BIT, + 4u), + vkb::initializers::descriptor_set_layout_binding( + VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT | VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT | VK_SHADER_STAGE_FRAGMENT_BIT, + 5u), + vkb::initializers::descriptor_set_layout_binding( + VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, VK_SHADER_STAGE_FRAGMENT_BIT, + 6u)}; + + VkDescriptorSetLayoutCreateInfo descriptor_set_layout_create_info = vkb::initializers::descriptor_set_layout_create_info(set_layout_bindings); + VK_CHECK(vkCreateDescriptorSetLayout(get_device().get_handle(), &descriptor_set_layout_create_info, nullptr, &ocean.descriptor_set_layout)); + + VkPipelineLayoutCreateInfo pipeline_layout_create_info = vkb::initializers::pipeline_layout_create_info(&ocean.descriptor_set_layout); + VK_CHECK(vkCreatePipelineLayout(get_device().get_handle(), &pipeline_layout_create_info, nullptr, &ocean.pipelines._default.pipeline_layout)); +} + +void SubgroupsOperations::create_descriptor_set() +{ + VkDescriptorSetAllocateInfo alloc_info = vkb::initializers::descriptor_set_allocate_info(descriptor_pool, &ocean.descriptor_set_layout, 1u); + VK_CHECK(vkAllocateDescriptorSets(get_device().get_handle(), &alloc_info, &ocean.descriptor_set)); + + VkDescriptorBufferInfo buffer_descriptor = create_descriptor(*camera_ubo); + VkDescriptorImageInfo displacement_descriptor = create_ia_descriptor(*fft_buffers.fft_displacement); + VkDescriptorBufferInfo tessellation_params_descriptor = create_descriptor(*tessellation_params_ubo); + VkDescriptorBufferInfo camera_pos_buffer_descriptor = create_descriptor(*camera_position_ubo); + VkDescriptorImageInfo normal_map_descriptor = create_ia_descriptor(*fft_buffers.fft_normal_map); + VkDescriptorBufferInfo ocean_params_buffer_descriptor = create_descriptor(*ocean_params_ubo); + + std::vector write_descriptor_sets = { + vkb::initializers::write_descriptor_set(ocean.descriptor_set, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 0u, &buffer_descriptor), + vkb::initializers::write_descriptor_set(ocean.descriptor_set, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1u, &displacement_descriptor), + vkb::initializers::write_descriptor_set(ocean.descriptor_set, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 2u, &tessellation_params_descriptor), + vkb::initializers::write_descriptor_set(ocean.descriptor_set, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 3u, &camera_pos_buffer_descriptor), + vkb::initializers::write_descriptor_set(ocean.descriptor_set, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 4u, &normal_map_descriptor), + vkb::initializers::write_descriptor_set(ocean.descriptor_set, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 5u, &ocean_params_buffer_descriptor)}; + + vkUpdateDescriptorSets(get_device().get_handle(), static_cast(write_descriptor_sets.size()), write_descriptor_sets.data(), 0u, nullptr); +} + +void SubgroupsOperations::create_pipelines() +{ + VkPipelineInputAssemblyStateCreateInfo input_assembly_state = + vkb::initializers::pipeline_input_assembly_state_create_info( + VK_PRIMITIVE_TOPOLOGY_PATCH_LIST, + 0u, + VK_FALSE); + VkPipelineRasterizationStateCreateInfo rasterization_state = + vkb::initializers::pipeline_rasterization_state_create_info( + VK_POLYGON_MODE_FILL, + VK_CULL_MODE_NONE, + VK_FRONT_FACE_COUNTER_CLOCKWISE, + 0u); + VkPipelineColorBlendAttachmentState blend_attachment_state = + vkb::initializers::pipeline_color_blend_attachment_state( + 0xf, + VK_FALSE); + + VkPipelineColorBlendStateCreateInfo color_blend_state = + vkb::initializers::pipeline_color_blend_state_create_info( + 1u, + &blend_attachment_state); + VkPipelineDepthStencilStateCreateInfo depth_stencil_state = + vkb::initializers::pipeline_depth_stencil_state_create_info( + VK_TRUE, + VK_TRUE, + VK_COMPARE_OP_GREATER); + VkPipelineViewportStateCreateInfo viewport_state = + vkb::initializers::pipeline_viewport_state_create_info(1u, 1u, 0u); + VkPipelineMultisampleStateCreateInfo multisample_state = + vkb::initializers::pipeline_multisample_state_create_info( + VK_SAMPLE_COUNT_1_BIT, + 0u); + std::vector dynamic_state_enables = { + VK_DYNAMIC_STATE_VIEWPORT, + VK_DYNAMIC_STATE_SCISSOR}; + VkPipelineDynamicStateCreateInfo dynamic_state = + vkb::initializers::pipeline_dynamic_state_create_info( + dynamic_state_enables.data(), + static_cast(dynamic_state_enables.size()), + 0u); + + VkPipelineTessellationStateCreateInfo tessellation_state = vkb::initializers::pipeline_tessellation_state_create_info(3u); + + std::array shader_stages = { + load_shader("subgroups_operations/ocean.vert", VK_SHADER_STAGE_VERTEX_BIT), + load_shader("subgroups_operations/ocean.frag", VK_SHADER_STAGE_FRAGMENT_BIT), + load_shader("subgroups_operations/ocean.tesc", VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT), + load_shader("subgroups_operations/ocean.tese", VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)}; + + const std::vector vertex_input_bindings = { + vkb::initializers::vertex_input_binding_description(0u, sizeof(Vertex), VK_VERTEX_INPUT_RATE_VERTEX)}; + const std::vector vertex_input_attributes = { + vkb::initializers::vertex_input_attribute_description(0u, 0u, VK_FORMAT_R32G32B32_SFLOAT, offsetof(Vertex, pos)), + vkb::initializers::vertex_input_attribute_description(0u, 1u, VK_FORMAT_R32G32_SFLOAT, offsetof(Vertex, uv))}; + VkPipelineVertexInputStateCreateInfo vertex_input_state = vkb::initializers::pipeline_vertex_input_state_create_info(); + vertex_input_state.vertexBindingDescriptionCount = static_cast(vertex_input_bindings.size()); + vertex_input_state.pVertexBindingDescriptions = vertex_input_bindings.data(); + vertex_input_state.vertexAttributeDescriptionCount = static_cast(vertex_input_attributes.size()); + vertex_input_state.pVertexAttributeDescriptions = vertex_input_attributes.data(); + + VkGraphicsPipelineCreateInfo pipeline_create_info = vkb::initializers::pipeline_create_info(ocean.pipelines._default.pipeline_layout, render_pass, 0u); + pipeline_create_info.pVertexInputState = &vertex_input_state; + pipeline_create_info.pInputAssemblyState = &input_assembly_state; + pipeline_create_info.pRasterizationState = &rasterization_state; + pipeline_create_info.pColorBlendState = &color_blend_state; + pipeline_create_info.pMultisampleState = &multisample_state; + pipeline_create_info.pViewportState = &viewport_state; + pipeline_create_info.pDepthStencilState = &depth_stencil_state; + pipeline_create_info.pDynamicState = &dynamic_state; + pipeline_create_info.pTessellationState = &tessellation_state; + pipeline_create_info.stageCount = static_cast(shader_stages.size()); + pipeline_create_info.pStages = shader_stages.data(); + VK_CHECK(vkCreateGraphicsPipelines(get_device().get_handle(), pipeline_cache, 1u, &pipeline_create_info, nullptr, &ocean.pipelines._default.pipeline)); + + if (get_device().get_gpu().get_features().fillModeNonSolid) + { + rasterization_state.polygonMode = VK_POLYGON_MODE_LINE; + VK_CHECK(vkCreateGraphicsPipelines(get_device().get_handle(), pipeline_cache, 1u, &pipeline_create_info, nullptr, &ocean.pipelines.wireframe.pipeline)); + } +} + +void SubgroupsOperations::update_uniform_buffers() +{ + CameraUbo ubo = {}; + ubo.model = glm::mat4(1.0f); + ubo.view = camera.matrices.view; + ubo.projection = camera.matrices.perspective; + + CameraPositionUbo cam_pos = {}; + cam_pos.position = glm::vec4(camera.position, 0.0f); + + FFTParametersUbo fft_ubo = {}; + fft_ubo.amplitude = ui.amplitude; + fft_ubo.grid_size = grid_size; + fft_ubo.length = ui.length; + fft_ubo.wind = ui.wind.vec; + + FFTInvertUbo invert_fft = {}; + invert_fft.page_idx = static_cast(log_2_N % 2); + invert_fft.grid_size = grid_size; + + TessellationParamsUbo tess_params = {}; + tess_params.displacement_scale = ui.displacement_scale; + tess_params.choppines = ui.choppines; + + TimeUbo t = {}; + t.time = static_cast(timer.elapsed()); + + OceanParamsUbo ocean_params = {}; + ocean_params.light_color = ui.light_color; + ocean_params.light_position = ui.light_pos; + ocean_params.ocean_color = ui.ocean_color; + + ocean_params_ubo->convert_and_update(ocean_params); + fft_time_ubo->convert_and_update(t); + camera_ubo->convert_and_update(ubo); + fft_params_ubo->convert_and_update(fft_ubo); + invert_fft_ubo->convert_and_update(invert_fft); + tessellation_params_ubo->convert_and_update(tess_params); + camera_position_ubo->convert_and_update(cam_pos); +} + +void SubgroupsOperations::build_command_buffers() +{ + VkCommandBufferBeginInfo command_buffer_begin_info = vkb::initializers::command_buffer_begin_info(); + + std::array clear_values = {}; + clear_values[0].color = {{0.0f, 0.0f, 0.0f, 0.0f}}; + clear_values[1].depthStencil = {0.0f, 0u}; + + VkRenderPassBeginInfo render_pass_begin_info = vkb::initializers::render_pass_begin_info(); + render_pass_begin_info.renderPass = render_pass; + render_pass_begin_info.renderArea.extent.width = width; + render_pass_begin_info.renderArea.extent.height = height; + render_pass_begin_info.clearValueCount = static_cast(clear_values.size()); + render_pass_begin_info.pClearValues = clear_values.data(); + + for (uint32_t i = 0u; i < draw_cmd_buffers.size(); ++i) + { + render_pass_begin_info.framebuffer = framebuffers[i]; + auto &cmd_buff = draw_cmd_buffers[i]; + + VK_CHECK(vkBeginCommandBuffer(cmd_buff, &command_buffer_begin_info)); + + if (ocean.graphics_queue_family_index != compute.queue_family_index) + { + VkMemoryBarrier memory_barrier = vkb::initializers::memory_barrier(); + memory_barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT; + memory_barrier.dstAccessMask = VK_ACCESS_INDEX_READ_BIT; + + VkImageMemoryBarrier img_barrier = vkb::initializers::image_memory_barrier(); + img_barrier.image = fft_buffers.fft_normal_map->image; + img_barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + img_barrier.subresourceRange.baseMipLevel = 0u; + img_barrier.subresourceRange.levelCount = 1u; + img_barrier.subresourceRange.baseArrayLayer = 0u; + img_barrier.subresourceRange.layerCount = 1u; + img_barrier.oldLayout = VK_IMAGE_LAYOUT_GENERAL; + img_barrier.newLayout = VK_IMAGE_LAYOUT_GENERAL; + img_barrier.srcQueueFamilyIndex = compute.queue_family_index; + img_barrier.dstQueueFamilyIndex = ocean.graphics_queue_family_index; + img_barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT; + img_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; + + vkCmdPipelineBarrier(cmd_buff, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0u, 1u, &memory_barrier, 0u, nullptr, 1u, &img_barrier); + } + + vkCmdBeginRenderPass(cmd_buff, &render_pass_begin_info, VK_SUBPASS_CONTENTS_INLINE); + + VkViewport viewport = vkb::initializers::viewport(static_cast(width), static_cast(height), 0.0f, 1.0f); + vkCmdSetViewport(cmd_buff, 0u, 1u, &viewport); + + VkRect2D scissor = vkb::initializers::rect2D(static_cast(width), static_cast(height), 0, 0); + vkCmdSetScissor(cmd_buff, 0u, 1u, &scissor); + + // draw ocean + { + vkCmdBindDescriptorSets(cmd_buff, VK_PIPELINE_BIND_POINT_GRAPHICS, ocean.pipelines._default.pipeline_layout, 0u, 1u, &ocean.descriptor_set, 0u, nullptr); + vkCmdBindPipeline(cmd_buff, VK_PIPELINE_BIND_POINT_GRAPHICS, ui.wireframe ? ocean.pipelines.wireframe.pipeline : ocean.pipelines._default.pipeline); + + VkDeviceSize offset[] = {0u}; + vkCmdBindVertexBuffers(cmd_buff, 0u, 1u, ocean.grid.vertex->get(), offset); + vkCmdBindIndexBuffer(cmd_buff, ocean.grid.index->get_handle(), VkDeviceSize(0), VK_INDEX_TYPE_UINT32); + + vkCmdDrawIndexed(cmd_buff, ocean.grid.index_count, 1u, 0u, 0u, 0u); + } + + draw_ui(cmd_buff); + + vkCmdEndRenderPass(cmd_buff); + + if (ocean.graphics_queue_family_index != compute.queue_family_index) + { + VkImageMemoryBarrier img_barrier = vkb::initializers::image_memory_barrier(); + img_barrier.image = fft_buffers.fft_normal_map->image; + img_barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + img_barrier.subresourceRange.baseMipLevel = 0u; + img_barrier.subresourceRange.levelCount = 1u; + img_barrier.subresourceRange.baseArrayLayer = 0u; + img_barrier.subresourceRange.layerCount = 1u; + img_barrier.oldLayout = VK_IMAGE_LAYOUT_GENERAL; + img_barrier.newLayout = VK_IMAGE_LAYOUT_GENERAL; + img_barrier.srcQueueFamilyIndex = ocean.graphics_queue_family_index; + img_barrier.dstQueueFamilyIndex = compute.queue_family_index; + img_barrier.dstAccessMask = 0u; + img_barrier.srcAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT; + + vkCmdPipelineBarrier(cmd_buff, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0u, 0u, nullptr, 0u, nullptr, 1u, &img_barrier); + } + + VK_CHECK(vkEndCommandBuffer(cmd_buff)); + } +} + +void SubgroupsOperations::draw() +{ + VkPipelineStageFlags wait_stage_mask = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; + // Submit compute commands + VkSubmitInfo compute_submit_info = vkb::initializers::submit_info(); + compute_submit_info.commandBufferCount = 1; + compute_submit_info.pCommandBuffers = &compute.command_buffer; + compute_submit_info.waitSemaphoreCount = 1u; + compute_submit_info.pWaitSemaphores = &ocean.semaphore; + compute_submit_info.pWaitDstStageMask = &wait_stage_mask; + compute_submit_info.signalSemaphoreCount = 1u; + compute_submit_info.pSignalSemaphores = &compute.semaphore; + + VK_CHECK(vkQueueSubmit(compute.queue, 1u, &compute_submit_info, VK_NULL_HANDLE)); + + VkPipelineStageFlags graphics_wait_stage_masks[] = {VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT}; + VkSemaphore graphics_wait_semaphores[] = {compute.semaphore, semaphores.acquired_image_ready}; + VkSemaphore graphics_signal_semaphores[] = {ocean.semaphore, semaphores.render_complete}; + + ApiVulkanSample::prepare_frame(); + submit_info.commandBufferCount = 1u; + submit_info.pCommandBuffers = &draw_cmd_buffers[current_buffer]; + submit_info.waitSemaphoreCount = 2u; + submit_info.pWaitSemaphores = graphics_wait_semaphores; + submit_info.pWaitDstStageMask = graphics_wait_stage_masks; + submit_info.signalSemaphoreCount = 2u; + submit_info.pSignalSemaphores = graphics_signal_semaphores; + + VK_CHECK(vkQueueSubmit(queue, 1u, &submit_info, VK_NULL_HANDLE)); + + ApiVulkanSample::submit_frame(); +} + +void SubgroupsOperations::on_update_ui_overlay(vkb::Drawer &drawer) +{ + if (drawer.header("Subgroups operations properties")) + { + + + drawer.text("Subgroups size %d", subgroups_properties.subgroupSize); + drawer.text("Supported operations:"); + + for (auto &s : ui.supported_features) + drawer.text(" %s", s.c_str()); + } + + if (drawer.header("Settings")) + { + if (get_device().get_gpu().get_features().fillModeNonSolid) + { + if (drawer.checkbox("Wireframe", &ui.wireframe)) + { + build_command_buffers(); + } + } + + if (drawer.checkbox("Subgroups enable", &ui.subgroups_enabled)) + { + build_compute_command_buffer(); + } + + if (drawer.header("Light")) + { + drawer.slider_float("Position x", &ui.light_pos.x, -1000.0f, 1000.0f); + drawer.slider_float("Position y", &ui.light_pos.y, -1000.0f, 1000.0f); + drawer.slider_float("Position z", &ui.light_pos.z, -1000.0f, 1000.0f); + + std::array colors = {ui.light_color.r, ui.light_color.g, ui.light_color.b}; + drawer.color_op("Light color", colors, 0, + ImGuiColorEditFlags_NoSidePreview | + ImGuiColorEditFlags_NoSmallPreview | + ImGuiColorEditFlags_Float | + ImGuiColorEditFlags_HDR); + ui.light_color.r = colors[0]; + ui.light_color.g = colors[1]; + ui.light_color.b = colors[2]; + } + } + + if (drawer.header("Ocean settings")) + { + drawer.input_float("Amplitude", &ui.amplitude, 1.f, "%.3f"); + drawer.input_float("Length", &ui.length, 10.f, "%.1f"); + drawer.slider_float("Choppines", &ui.choppines, 0.0f, 1.0f); + drawer.slider_float("Displacement scale", &ui.displacement_scale, 0.0f, 1.0f); + + if (drawer.header("Wind")) + { + drawer.slider_float("Angle", &ui.wind.angle, 0.0f, 360.0f); + drawer.slider_float("Force", &ui.wind.force, 0.1f, 50.0f); + + ui.wind.recalc(); + } + + std::array colors = {ui.ocean_color.r, ui.ocean_color.g, ui.ocean_color.b}; + drawer.color_op("Ocean color", colors, 0, + ImGuiColorEditFlags_NoSidePreview | + ImGuiColorEditFlags_NoSmallPreview | + ImGuiColorEditFlags_Float | + ImGuiColorEditFlags_HDR); + ui.ocean_color.r = colors[0]; + ui.ocean_color.g = colors[1]; + ui.ocean_color.b = colors[2]; + } +} + +bool SubgroupsOperations::resize(const uint32_t width, const uint32_t height) +{ + if (!ApiVulkanSample::resize(width, height)) + return false; + update_uniform_buffers(); + build_compute_command_buffer(); + build_command_buffers(); + return true; +} + +void SubgroupsOperations::render(float delta_time) +{ + if (!prepared) + { + return; + } + if (!timer.is_running()) + timer.start(); + + update_uniform_buffers(); + draw(); +} + +void SubgroupsOperations::create_image_attachement(VkFormat format, uint32_t width, uint32_t height, ImageAttachment &attachment) +{ + attachment.format = format; + + VkImageCreateInfo image = vkb::initializers::image_create_info(); + image.imageType = VK_IMAGE_TYPE_2D; + image.format = format; + image.extent.width = width; + image.extent.height = height; + image.extent.depth = 1u; + image.mipLevels = 1u; + image.arrayLayers = 1u; + image.samples = VK_SAMPLE_COUNT_1_BIT; + image.tiling = VK_IMAGE_TILING_OPTIMAL; + image.usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT | VK_IMAGE_USAGE_STORAGE_BIT; + image.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; + + VkMemoryAllocateInfo memory_allocate_info = vkb::initializers::memory_allocate_info(); + VkMemoryRequirements memory_requirements; + + VK_CHECK(vkCreateImage(get_device().get_handle(), &image, nullptr, &attachment.image)); + vkGetImageMemoryRequirements(get_device().get_handle(), attachment.image, &memory_requirements); + memory_allocate_info.allocationSize = memory_requirements.size; + memory_allocate_info.memoryTypeIndex = get_device().get_memory_type(memory_requirements.memoryTypeBits, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); + VK_CHECK(vkAllocateMemory(get_device().get_handle(), &memory_allocate_info, nullptr, &attachment.memory)); + VK_CHECK(vkBindImageMemory(get_device().get_handle(), attachment.image, attachment.memory, 0)); + + VkImageViewCreateInfo image_view_create_info = vkb::initializers::image_view_create_info(); + image_view_create_info.viewType = VK_IMAGE_VIEW_TYPE_2D; + image_view_create_info.format = format; + image_view_create_info.subresourceRange = {}; + image_view_create_info.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + image_view_create_info.subresourceRange.baseMipLevel = 0u; + image_view_create_info.subresourceRange.levelCount = 1u; + image_view_create_info.subresourceRange.baseArrayLayer = 0u; + image_view_create_info.subresourceRange.layerCount = 1u; + image_view_create_info.image = attachment.image; + VK_CHECK(vkCreateImageView(get_device().get_handle(), &image_view_create_info, nullptr, &attachment.view)); + + VkSamplerCreateInfo sampler_info = vkb::initializers::sampler_create_info(); + sampler_info.magFilter = VK_FILTER_LINEAR; + sampler_info.minFilter = VK_FILTER_LINEAR; + sampler_info.addressModeU = VK_SAMPLER_ADDRESS_MODE_REPEAT; + sampler_info.addressModeV = VK_SAMPLER_ADDRESS_MODE_REPEAT; + sampler_info.addressModeW = VK_SAMPLER_ADDRESS_MODE_REPEAT; + sampler_info.anisotropyEnable = VK_TRUE; + sampler_info.maxAnisotropy = get_device().get_gpu().get_properties().limits.maxSamplerAnisotropy; + sampler_info.compareEnable = VK_FALSE; + sampler_info.compareOp = VK_COMPARE_OP_NEVER; + sampler_info.borderColor = VK_BORDER_COLOR_INT_OPAQUE_BLACK; + sampler_info.mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR; + + VK_CHECK(vkCreateSampler(get_device().get_handle(), &sampler_info, nullptr, &attachment.sampler)); + + VkImageMemoryBarrier imgMemBarrier = vkb::initializers::image_memory_barrier(); + imgMemBarrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; + imgMemBarrier.newLayout = VK_IMAGE_LAYOUT_GENERAL; + imgMemBarrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + imgMemBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + imgMemBarrier.image = attachment.image; + imgMemBarrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + imgMemBarrier.subresourceRange.baseArrayLayer = 0u; + imgMemBarrier.subresourceRange.levelCount = 1u; + imgMemBarrier.subresourceRange.baseArrayLayer = 0u; + imgMemBarrier.subresourceRange.layerCount = 1u; + imgMemBarrier.srcAccessMask = 0u; + imgMemBarrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + + VkPipelineStageFlagBits srcStage = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; + VkPipelineStageFlagBits dstStage = VK_PIPELINE_STAGE_TRANSFER_BIT; + + VkCommandBuffer cmd = get_device().create_command_buffer(VK_COMMAND_BUFFER_LEVEL_PRIMARY, true); + vkCmdPipelineBarrier(cmd, srcStage, dstStage, 0u, 0u, nullptr, 0u, nullptr, 1u, &imgMemBarrier); + get_device().flush_command_buffer(cmd, queue, true); +} + +uint32_t SubgroupsOperations::reverse(uint32_t i) const +{ + uint32_t res = 0; + for (int j = 0; j < log_2_N; j++) + { + res = (res << 1) + (i & 1); + i >>= 1; + } + return res; +} + +void SubgroupsOperations::create_butterfly_texture() +{ + std::vector set_layout_bindings = { + vkb::initializers::descriptor_set_layout_binding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_SHADER_STAGE_COMPUTE_BIT, 0u), + vkb::initializers::descriptor_set_layout_binding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT, 1u), + vkb::initializers::descriptor_set_layout_binding(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT, 2u)}; + + VkDescriptorSetLayoutCreateInfo descriptor_layout = vkb::initializers::descriptor_set_layout_create_info(set_layout_bindings); + VK_CHECK(vkCreateDescriptorSetLayout(get_device().get_handle(), &descriptor_layout, nullptr, &precompute.descriptor_set_layout)); + + VkDescriptorSetAllocateInfo alloc_info = vkb::initializers::descriptor_set_allocate_info(descriptor_pool, &precompute.descriptor_set_layout, 1u); + VK_CHECK(vkAllocateDescriptorSets(get_device().get_handle(), &alloc_info, &precompute.descriptor_set)); + + VkPipelineLayoutCreateInfo compute_pipeline_layout_info = vkb::initializers::pipeline_layout_create_info(&precompute.descriptor_set_layout); + + VK_CHECK(vkCreatePipelineLayout(get_device().get_handle(), &compute_pipeline_layout_info, nullptr, &precompute.pipeline.pipeline_layout)); + + VkComputePipelineCreateInfo computeInfo = {}; + computeInfo.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO; + computeInfo.layout = precompute.pipeline.pipeline_layout; + computeInfo.stage = load_shader("subgroups_operations/butterfly_precomp.comp", VK_SHADER_STAGE_COMPUTE_BIT); + + VK_CHECK(vkCreateComputePipelines(get_device().get_handle(), pipeline_cache, 1u, &computeInfo, nullptr, &precompute.pipeline.pipeline)); + +#ifdef DEBUG_SUBGROUPS_SWITCH_ENABLE + computeInfo.stage = load_shader("subgroups_operations/butterfly_precomp_subgroups_off.comp", VK_SHADER_STAGE_COMPUTE_BIT); + VK_CHECK(vkCreateComputePipelines(get_device().get_handle(), pipeline_cache, 1u, &computeInfo, nullptr, &precompute.pipeline_subgroups_off.pipeline)); +#endif + + create_image_attachement(VK_FORMAT_R32G32B32A32_SFLOAT, log_2_N, grid_size, butterfly_precomp); + + std::vector bit_reverse_arr; + for (uint32_t i = 0; i < grid_size; ++i) + bit_reverse_arr.push_back(reverse(i)); + + bit_reverse_buffer = std::make_unique(get_device(), sizeof(uint32_t) * bit_reverse_arr.size(), VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VMA_MEMORY_USAGE_CPU_TO_GPU); + bit_reverse_buffer->update(bit_reverse_arr.data(), sizeof(uint32_t) * bit_reverse_arr.size()); + + VkDescriptorBufferInfo bit_reverse_descriptor = create_descriptor(*bit_reverse_buffer); + VkDescriptorImageInfo image_descriptor = create_ia_descriptor(butterfly_precomp); + VkDescriptorBufferInfo fft_params_ubo_buffer = create_descriptor(*fft_params_ubo); + + std::vector write_descriptor_sets = { + vkb::initializers::write_descriptor_set(precompute.descriptor_set, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 0u, &image_descriptor), + vkb::initializers::write_descriptor_set(precompute.descriptor_set, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1u, &bit_reverse_descriptor), + vkb::initializers::write_descriptor_set(precompute.descriptor_set, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 2u, &fft_params_ubo_buffer)}; + vkUpdateDescriptorSets(get_device().get_handle(), static_cast(write_descriptor_sets.size()), write_descriptor_sets.data(), 0u, nullptr); +} + +void SubgroupsOperations::create_fft() +{ + std::vector set_layout_bindings = { + vkb::initializers::descriptor_set_layout_binding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_SHADER_STAGE_COMPUTE_BIT, 0u), + vkb::initializers::descriptor_set_layout_binding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_SHADER_STAGE_COMPUTE_BIT, 1u), + vkb::initializers::descriptor_set_layout_binding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_SHADER_STAGE_COMPUTE_BIT, 2u)}; + VkDescriptorSetLayoutCreateInfo descriptor_layout = vkb::initializers::descriptor_set_layout_create_info(set_layout_bindings); + VK_CHECK(vkCreateDescriptorSetLayout(get_device().get_handle(), &descriptor_layout, nullptr, &fft.descriptor_set_layout)); + + VkDescriptorSetAllocateInfo alloc_info = vkb::initializers::descriptor_set_allocate_info(descriptor_pool, &fft.descriptor_set_layout, 1u); + VK_CHECK(vkAllocateDescriptorSets(get_device().get_handle(), &alloc_info, &fft.descriptor_set_axis_y)); + VK_CHECK(vkAllocateDescriptorSets(get_device().get_handle(), &alloc_info, &fft.descriptor_set_axis_x)); + VK_CHECK(vkAllocateDescriptorSets(get_device().get_handle(), &alloc_info, &fft.descriptor_set_axis_z)); + + VkPushConstantRange push_constant_range = vkb::initializers::push_constant_range(VK_SHADER_STAGE_COMPUTE_BIT, sizeof(int32_t), 0); + + VkPipelineLayoutCreateInfo compute_pipeline_layout_info = {}; + compute_pipeline_layout_info.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; + compute_pipeline_layout_info.setLayoutCount = 1u; + compute_pipeline_layout_info.pSetLayouts = &fft.descriptor_set_layout; + compute_pipeline_layout_info.pushConstantRangeCount = 1u; + compute_pipeline_layout_info.pPushConstantRanges = &push_constant_range; + + VK_CHECK(vkCreatePipelineLayout(get_device().get_handle(), &compute_pipeline_layout_info, nullptr, &fft.pipelines.horizontal.pipeline_layout)); + VK_CHECK(vkCreatePipelineLayout(get_device().get_handle(), &compute_pipeline_layout_info, nullptr, &fft.pipelines.vertical.pipeline_layout)); + + VkComputePipelineCreateInfo computeInfo = {}; + computeInfo.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO; + computeInfo.layout = fft.pipelines.horizontal.pipeline_layout; + computeInfo.stage = load_shader("subgroups_operations/fft.comp", VK_SHADER_STAGE_COMPUTE_BIT); + + std::array specialization_map_entries = {}; + VkSpecializationInfo spec_info; + uint32_t direction = 0u; + specialization_map_entries[0] = vkb::initializers::specialization_map_entry(0u, 0u, sizeof(uint32_t)); + spec_info = vkb::initializers::specialization_info(static_cast(specialization_map_entries.size()), + specialization_map_entries.data(), + sizeof(uint32_t), + &direction); + computeInfo.stage.pSpecializationInfo = &spec_info; + + VK_CHECK(vkCreateComputePipelines(get_device().get_handle(), pipeline_cache, 1u, &computeInfo, nullptr, &fft.pipelines.horizontal.pipeline)); + +#ifdef DEBUG_SUBGROUPS_SWITCH_ENABLE + computeInfo.stage = load_shader("subgroups_operations/fft_subgroups_off.comp", VK_SHADER_STAGE_COMPUTE_BIT); + VK_CHECK(vkCreateComputePipelines(get_device().get_handle(), pipeline_cache, 1u, &computeInfo, nullptr, &fft.pipelines.horizontal_subgroups_off.pipeline)); + computeInfo.stage = load_shader("subgroups_operations/fft.comp", VK_SHADER_STAGE_COMPUTE_BIT); +#endif + + direction = 1u; + computeInfo.layout = fft.pipelines.vertical.pipeline_layout; + + VK_CHECK(vkCreateComputePipelines(get_device().get_handle(), pipeline_cache, 1u, &computeInfo, nullptr, &fft.pipelines.vertical.pipeline)); + +#ifdef DEBUG_SUBGROUPS_SWITCH_ENABLE + computeInfo.stage = load_shader("subgroups_operations/fft_subgroups_off.comp", VK_SHADER_STAGE_COMPUTE_BIT); + VK_CHECK(vkCreateComputePipelines(get_device().get_handle(), pipeline_cache, 1u, &computeInfo, nullptr, &fft.pipelines.vertical_subgroups_off.pipeline)); +#endif + + fft.tilde_axis_y = std::make_unique(); + fft.tilde_axis_x = std::make_unique(); + fft.tilde_axis_z = std::make_unique(); + create_image_attachement(VK_FORMAT_R32G32B32A32_SFLOAT, grid_size, grid_size, *fft.tilde_axis_y); + create_image_attachement(VK_FORMAT_R32G32B32A32_SFLOAT, grid_size, grid_size, *fft.tilde_axis_x); + create_image_attachement(VK_FORMAT_R32G32B32A32_SFLOAT, grid_size, grid_size, *fft.tilde_axis_z); + + VkDescriptorImageInfo image_descriptor_butterfly = create_ia_descriptor(butterfly_precomp); + VkDescriptorImageInfo image_descriptor_tilda_y = create_ia_descriptor(*fft_buffers.fft_tilde_h_kt_dy); + VkDescriptorImageInfo image_descriptor_tilde_axis_y = create_ia_descriptor(*fft.tilde_axis_y); + + std::vector write_descriptor_sets_axis_y = { + vkb::initializers::write_descriptor_set(fft.descriptor_set_axis_y, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 0u, &image_descriptor_butterfly), + vkb::initializers::write_descriptor_set(fft.descriptor_set_axis_y, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1u, &image_descriptor_tilda_y), + vkb::initializers::write_descriptor_set(fft.descriptor_set_axis_y, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 2u, &image_descriptor_tilde_axis_y)}; + + vkUpdateDescriptorSets(get_device().get_handle(), static_cast(write_descriptor_sets_axis_y.size()), write_descriptor_sets_axis_y.data(), 0u, nullptr); + + VkDescriptorImageInfo image_descriptor_tilda_x = create_ia_descriptor(*fft_buffers.fft_tilde_h_kt_dx); + VkDescriptorImageInfo image_descriptor_tilde_axis_x = create_ia_descriptor(*fft.tilde_axis_x); + + std::vector write_descriptor_sets_axis_x = { + vkb::initializers::write_descriptor_set(fft.descriptor_set_axis_x, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 0u, &image_descriptor_butterfly), + vkb::initializers::write_descriptor_set(fft.descriptor_set_axis_x, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1u, &image_descriptor_tilda_x), + vkb::initializers::write_descriptor_set(fft.descriptor_set_axis_x, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 2u, &image_descriptor_tilde_axis_x)}; + + vkUpdateDescriptorSets(get_device().get_handle(), static_cast(write_descriptor_sets_axis_x.size()), write_descriptor_sets_axis_x.data(), 0u, nullptr); + + VkDescriptorImageInfo image_descriptor_tilda_z = create_ia_descriptor(*fft_buffers.fft_tilde_h_kt_dz); + VkDescriptorImageInfo image_descriptor_tilde_axis_z = create_ia_descriptor(*fft.tilde_axis_z); + + std::vector write_descriptor_sets_axis_z = { + vkb::initializers::write_descriptor_set(fft.descriptor_set_axis_z, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 0u, &image_descriptor_butterfly), + vkb::initializers::write_descriptor_set(fft.descriptor_set_axis_z, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1u, &image_descriptor_tilda_z), + vkb::initializers::write_descriptor_set(fft.descriptor_set_axis_z, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 2u, &image_descriptor_tilde_axis_z)}; + + vkUpdateDescriptorSets(get_device().get_handle(), static_cast(write_descriptor_sets_axis_z.size()), write_descriptor_sets_axis_z.data(), 0u, nullptr); +} + +void SubgroupsOperations::create_fft_inversion() +{ + std::vector set_layout_bindings = { + vkb::initializers::descriptor_set_layout_binding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_SHADER_STAGE_COMPUTE_BIT, 0u), + vkb::initializers::descriptor_set_layout_binding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_SHADER_STAGE_COMPUTE_BIT, 1u), + vkb::initializers::descriptor_set_layout_binding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_SHADER_STAGE_COMPUTE_BIT, 2u), + vkb::initializers::descriptor_set_layout_binding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_SHADER_STAGE_COMPUTE_BIT, 3u), + vkb::initializers::descriptor_set_layout_binding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_SHADER_STAGE_COMPUTE_BIT, 4u), + vkb::initializers::descriptor_set_layout_binding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_SHADER_STAGE_COMPUTE_BIT, 5u), + vkb::initializers::descriptor_set_layout_binding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_SHADER_STAGE_COMPUTE_BIT, 6u), + vkb::initializers::descriptor_set_layout_binding(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT, 7u)}; + + VkDescriptorSetLayoutCreateInfo descriptor_layout = vkb::initializers::descriptor_set_layout_create_info(set_layout_bindings); + VK_CHECK(vkCreateDescriptorSetLayout(get_device().get_handle(), &descriptor_layout, nullptr, &fft_inversion.descriptor_set_layout)); + + VkDescriptorSetAllocateInfo alloc_info = vkb::initializers::descriptor_set_allocate_info(descriptor_pool, &fft_inversion.descriptor_set_layout, 1u); + VK_CHECK(vkAllocateDescriptorSets(get_device().get_handle(), &alloc_info, &fft_inversion.descriptor_set)); + + VkPipelineLayoutCreateInfo compute_pipeline_layout_info = vkb::initializers::pipeline_layout_create_info(&fft_inversion.descriptor_set_layout); + + VK_CHECK(vkCreatePipelineLayout(get_device().get_handle(), &compute_pipeline_layout_info, nullptr, &fft_inversion.pipeline.pipeline_layout)); + + VkComputePipelineCreateInfo computeInfo = {}; + computeInfo.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO; + computeInfo.layout = fft_inversion.pipeline.pipeline_layout; + computeInfo.stage = load_shader("subgroups_operations/fft_invert.comp", VK_SHADER_STAGE_COMPUTE_BIT); + + VK_CHECK(vkCreateComputePipelines(get_device().get_handle(), pipeline_cache, 1u, &computeInfo, nullptr, &fft_inversion.pipeline.pipeline)); + +#ifdef DEBUG_SUBGROUPS_SWITCH_ENABLE + computeInfo.stage = load_shader("subgroups_operations/fft_invert_subgroups_off.comp", VK_SHADER_STAGE_COMPUTE_BIT); + VK_CHECK(vkCreateComputePipelines(get_device().get_handle(), pipeline_cache, 1u, &computeInfo, nullptr, &fft_inversion.pipeline_subgroups_off.pipeline)); + +#endif + + fft_buffers.fft_displacement = std::make_unique(); + + create_image_attachement(VK_FORMAT_R32G32B32A32_SFLOAT, grid_size, grid_size, *fft_buffers.fft_displacement); + + VkDescriptorImageInfo image_descriptor_displacement = create_ia_descriptor(*fft_buffers.fft_displacement); + VkDescriptorImageInfo image_descriptor_pingpong0_axis_y = create_ia_descriptor(*fft_buffers.fft_tilde_h_kt_dy); + VkDescriptorImageInfo image_descriptor_pingpong1_axis_y = create_ia_descriptor(*fft.tilde_axis_y); + VkDescriptorImageInfo image_descriptor_pingpong0_axis_x = create_ia_descriptor(*fft_buffers.fft_tilde_h_kt_dx); + VkDescriptorImageInfo image_descriptor_pingpong1_axis_x = create_ia_descriptor(*fft.tilde_axis_x); + VkDescriptorImageInfo image_descriptor_pingpong0_axis_z = create_ia_descriptor(*fft_buffers.fft_tilde_h_kt_dz); + VkDescriptorImageInfo image_descriptor_pingpong1_axis_z = create_ia_descriptor(*fft.tilde_axis_z); + + auto fft_page_descriptor = create_descriptor(*invert_fft_ubo); + + std::vector write_descriptor_sets = { + vkb::initializers::write_descriptor_set(fft_inversion.descriptor_set, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 0u, &image_descriptor_displacement), + vkb::initializers::write_descriptor_set(fft_inversion.descriptor_set, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1u, &image_descriptor_pingpong0_axis_y), + vkb::initializers::write_descriptor_set(fft_inversion.descriptor_set, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 2u, &image_descriptor_pingpong1_axis_y), + vkb::initializers::write_descriptor_set(fft_inversion.descriptor_set, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 3u, &image_descriptor_pingpong0_axis_x), + vkb::initializers::write_descriptor_set(fft_inversion.descriptor_set, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 4u, &image_descriptor_pingpong1_axis_x), + vkb::initializers::write_descriptor_set(fft_inversion.descriptor_set, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 5u, &image_descriptor_pingpong0_axis_z), + vkb::initializers::write_descriptor_set(fft_inversion.descriptor_set, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 6u, &image_descriptor_pingpong1_axis_z), + vkb::initializers::write_descriptor_set(fft_inversion.descriptor_set, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 7u, &fft_page_descriptor)}; + vkUpdateDescriptorSets(get_device().get_handle(), static_cast(write_descriptor_sets.size()), write_descriptor_sets.data(), 0u, nullptr); +} + +void SubgroupsOperations::create_fft_normal_map() +{ + std::vector set_layout_bindings = { + vkb::initializers::descriptor_set_layout_binding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_SHADER_STAGE_COMPUTE_BIT, 0u), + vkb::initializers::descriptor_set_layout_binding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_SHADER_STAGE_COMPUTE_BIT, 1u), + vkb::initializers::descriptor_set_layout_binding(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT, 2u)}; + + VkDescriptorSetLayoutCreateInfo descriptor_layout = vkb::initializers::descriptor_set_layout_create_info(set_layout_bindings); + VK_CHECK(vkCreateDescriptorSetLayout(get_device().get_handle(), &descriptor_layout, nullptr, &fft_normal_map.descriptor_set_layout)); + + VkDescriptorSetAllocateInfo alloc_info = vkb::initializers::descriptor_set_allocate_info(descriptor_pool, &fft_normal_map.descriptor_set_layout, 1u); + VK_CHECK(vkAllocateDescriptorSets(get_device().get_handle(), &alloc_info, &fft_normal_map.descriptor_set)); + + VkPipelineLayoutCreateInfo compute_pipeline_layout_info = vkb::initializers::pipeline_layout_create_info(&fft_normal_map.descriptor_set_layout); + + VK_CHECK(vkCreatePipelineLayout(get_device().get_handle(), &compute_pipeline_layout_info, nullptr, &fft_normal_map.pipeline.pipeline_layout)); + + VkComputePipelineCreateInfo computeInfo = {}; + computeInfo.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO; + computeInfo.layout = fft_normal_map.pipeline.pipeline_layout; + computeInfo.stage = load_shader("subgroups_operations/fft_normal_map.comp", VK_SHADER_STAGE_COMPUTE_BIT); + + VK_CHECK(vkCreateComputePipelines(get_device().get_handle(), pipeline_cache, 1u, &computeInfo, nullptr, &fft_normal_map.pipeline.pipeline)); + +#ifdef DEBUG_SUBGROUPS_SWITCH_ENABLE + computeInfo.stage = load_shader("subgroups_operations/fft_normal_map_subgroups_off.comp", VK_SHADER_STAGE_COMPUTE_BIT); + VK_CHECK(vkCreateComputePipelines(get_device().get_handle(), pipeline_cache, 1u, &computeInfo, nullptr, &fft_normal_map.pipeline_subgroups_off.pipeline)); +#endif + + fft_buffers.fft_normal_map = std::make_unique(); + + create_image_attachement(VK_FORMAT_R32G32B32A32_SFLOAT, grid_size, grid_size, *fft_buffers.fft_normal_map); + + VkDescriptorImageInfo image_descriptor_normal_map = create_ia_descriptor(*fft_buffers.fft_normal_map); + VkDescriptorImageInfo image_descriptor_displacement = create_ia_descriptor(*fft_buffers.fft_displacement); + VkDescriptorBufferInfo fft_page_descriptor = create_descriptor(*invert_fft_ubo); + + std::vector write_descriptor_sets = { + vkb::initializers::write_descriptor_set(fft_normal_map.descriptor_set, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 0u, &image_descriptor_normal_map), + vkb::initializers::write_descriptor_set(fft_normal_map.descriptor_set, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1u, &image_descriptor_displacement), + vkb::initializers::write_descriptor_set(fft_normal_map.descriptor_set, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 2u, &fft_page_descriptor)}; + vkUpdateDescriptorSets(get_device().get_handle(), static_cast(write_descriptor_sets.size()), write_descriptor_sets.data(), 0u, nullptr); +} + +VkDescriptorImageInfo SubgroupsOperations::create_ia_descriptor(ImageAttachment &attachment) +{ + VkDescriptorImageInfo image_descriptor = {}; + image_descriptor.imageView = attachment.view; + image_descriptor.imageLayout = VK_IMAGE_LAYOUT_GENERAL; + image_descriptor.sampler = attachment.sampler; + return image_descriptor; +} + +void SubgroupsOperations::Wind::recalc() +{ + float rad = angle * glm::pi() / 180.0f; + vec.x = force * glm::cos(rad); + vec.y = force * glm::sin(rad); +} + +std::unique_ptr create_subgroups_operations() +{ + return std::make_unique(); +} + +void SubgroupsOperations::create_command_pool() +{ + VkCommandPoolCreateInfo command_pool_info = {}; + command_pool_info.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; + command_pool_info.queueFamilyIndex = get_device().get_queue_by_flags(VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT, 0).get_family_index(); + command_pool_info.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT; + VK_CHECK(vkCreateCommandPool(get_device().get_handle(), &command_pool_info, nullptr, &cmd_pool)); +} \ No newline at end of file diff --git a/samples/extensions/subgroups_operations/subgroups_operations.h b/samples/extensions/subgroups_operations/subgroups_operations.h new file mode 100644 index 0000000000..c73a65bd45 --- /dev/null +++ b/samples/extensions/subgroups_operations/subgroups_operations.h @@ -0,0 +1,306 @@ +/* Copyright (c) 2024, Mobica Limited + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 the "License"; + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "api_vulkan_sample.h" + +#define DEBUG_SUBGROUPS_SWITCH_ENABLE + +class SubgroupsOperations : public ApiVulkanSample +{ + public: + SubgroupsOperations(); + ~SubgroupsOperations(); + + bool prepare(const vkb::ApplicationOptions &options) override; + void request_gpu_features(vkb::PhysicalDevice &gpu) override; + void build_command_buffers() override; + void render(float delta_time) override; + bool resize(const uint32_t width, const uint32_t height) override; + void on_update_ui_overlay(vkb::Drawer &drawer) override; + void create_command_pool() override; + + void draw(); + void load_assets(); + + void prepare_compute(); + void create_compute_queue(); + void create_compute_command_pool(); + void create_compute_command_buffer(); + + void build_compute_command_buffer(); + + void generate_plane(); + void prepare_uniform_buffers(); + void setup_descriptor_pool(); + void create_semaphore(); + void create_descriptor_set_layout(); + void create_descriptor_set(); + void create_pipelines(); + + void create_initial_tides(); + void create_tildas(); + void create_butterfly_texture(); + void create_fft(); + void create_fft_inversion(); + void create_fft_normal_map(); + + void update_uniform_buffers(); + + static glm::vec2 rndGaussian(); + uint32_t grid_size; + + struct Pipeline + { + void destroy(VkDevice device) const; + + VkPipeline pipeline = {VK_NULL_HANDLE}; + VkPipelineLayout pipeline_layout = {VK_NULL_HANDLE}; + }; + + struct GridBuffers + { + std::unique_ptr vertex = {VK_NULL_HANDLE}; + std::unique_ptr index = {VK_NULL_HANDLE}; + uint32_t index_count = {0u}; + }; + + struct CameraUbo + { + alignas(16) glm::mat4 projection; + alignas(16) glm::mat4 view; + alignas(16) glm::mat4 model; + }; + + struct OceanParamsUbo + { + alignas(16) glm::vec3 light_color; + alignas(16) glm::vec3 light_position; + alignas(16) glm::vec3 ocean_color; + }; + + struct CameraPositionUbo + { + alignas(16) glm::vec4 position; + }; + + struct TessellationParamsUbo + { + alignas(4) float choppines; + alignas(4) float displacement_scale; + }; + + struct FFTParametersUbo + { + alignas(4) float amplitude; + alignas(4) float length; + alignas(4) uint32_t grid_size; + alignas(8) glm::vec2 wind; + }; + + struct FFTInvertUbo + { + alignas(4) int32_t page_idx = {-1}; + alignas(4) uint32_t grid_size = {256u}; + }; + + struct TimeUbo + { + alignas(4) float time = {0.0f}; + }; + + struct Wind + { + Wind() + { + recalc(); + } + void recalc(); + glm::vec2 vec; + float angle = {180.0f}; + float force = {25.0f}; + }; + + struct GuiConfig + { + bool wireframe = {true}; + bool subgroups_enabled = {true}; + float choppines = {0.1f}; + float displacement_scale = {0.5f}; + float amplitude = {32.0f}; + float length = {1900.0f}; + Wind wind{}; + + glm::vec3 light_pos = {100.0f, 15.0f, 10.0f}; + glm::vec3 light_color = {1.0f, 1.0f, 1.0f}; + glm::vec3 ocean_color = {0.0f, 0.2423423f, 0.434335435f}; + std::vector supported_features; + } ui; + + std::unique_ptr skybox_ubo = {VK_NULL_HANDLE}; + std::unique_ptr ocean_params_ubo = {VK_NULL_HANDLE}; + std::unique_ptr camera_position_ubo = {VK_NULL_HANDLE}; + std::unique_ptr camera_ubo = {VK_NULL_HANDLE}; + std::unique_ptr tessellation_params_ubo = {VK_NULL_HANDLE}; + std::unique_ptr fft_params_ubo = {VK_NULL_HANDLE}; + std::unique_ptr fft_time_ubo = {VK_NULL_HANDLE}; + std::unique_ptr invert_fft_ubo = {VK_NULL_HANDLE}; + std::unique_ptr bit_reverse_buffer = {VK_NULL_HANDLE}; + + std::vector input_random; + + struct ImageAttachment + { + VkImage image; + VkDeviceMemory memory; + VkImageView view; + VkFormat format; + VkSampler sampler; + void destroy(VkDevice device) const + { + vkDestroySampler(device, sampler, nullptr); + vkDestroyImageView(device, view, nullptr); + vkDestroyImage(device, image, nullptr); + vkFreeMemory(device, memory, nullptr); + }; + }; + + ImageAttachment butterfly_precomp{}; + + uint32_t log_2_N{}; + vkb::Timer timer; + + struct + { + std::unique_ptr fft_input_random; + std::unique_ptr fft_input_htilde0; + std::unique_ptr fft_input_htilde0_conj; + + std::unique_ptr fft_tilde_h_kt_dx; + std::unique_ptr fft_tilde_h_kt_dy; + std::unique_ptr fft_tilde_h_kt_dz; + std::unique_ptr fft_displacement; + std::unique_ptr fft_normal_map; + } fft_buffers; + + struct + { + VkQueue queue = {VK_NULL_HANDLE}; + VkCommandPool command_pool = {VK_NULL_HANDLE}; + VkCommandBuffer command_buffer = {VK_NULL_HANDLE}; + VkSemaphore semaphore = {VK_NULL_HANDLE}; + uint32_t queue_family_index = {-1u}; + } compute; + + struct + { + VkDescriptorSetLayout descriptor_set_layout = {VK_NULL_HANDLE}; + VkDescriptorSet descriptor_set_axis_y = {VK_NULL_HANDLE}; + VkDescriptorSet descriptor_set_axis_x = {VK_NULL_HANDLE}; + VkDescriptorSet descriptor_set_axis_z = {VK_NULL_HANDLE}; + + struct + { + Pipeline horizontal; // fft.comp + Pipeline vertical; // fft.comp +#ifdef DEBUG_SUBGROUPS_SWITCH_ENABLE + Pipeline horizontal_subgroups_off; // fft_subgroups_off.comp + Pipeline vertical_subgroups_off; // fft_subgroups_off.comp +#endif + } pipelines; + + std::unique_ptr tilde_axis_y = {VK_NULL_HANDLE}; + std::unique_ptr tilde_axis_x = {VK_NULL_HANDLE}; + std::unique_ptr tilde_axis_z = {VK_NULL_HANDLE}; + } fft; + + struct + { + VkDescriptorSetLayout descriptor_set_layout = {VK_NULL_HANDLE}; + VkDescriptorSet descriptor_set = {VK_NULL_HANDLE}; + Pipeline pipeline; // fft_invert.comp +#ifdef DEBUG_SUBGROUPS_SWITCH_ENABLE + Pipeline pipeline_subgroups_off; // fft_invert_subgroups_off.comp +#endif + + } fft_inversion; + + struct + { + VkDescriptorSetLayout descriptor_set_layout = {VK_NULL_HANDLE}; + VkDescriptorSet descriptor_set = {VK_NULL_HANDLE}; + Pipeline pipeline; // fft_normal_map.comp +#ifdef DEBUG_SUBGROUPS_SWITCH_ENABLE + Pipeline pipeline_subgroups_off; // fft_normal_map_subgroups_off.comp +#endif + } fft_normal_map; + + struct + { + VkDescriptorSetLayout descriptor_set_layout = {VK_NULL_HANDLE}; + VkDescriptorSet descriptor_set = {VK_NULL_HANDLE}; + Pipeline pipeline; // fft_tilde_h0.comp +#ifdef DEBUG_SUBGROUPS_SWITCH_ENABLE + Pipeline pipeline_subgroups_off; // fft_tilde_h0_subgroups_off.comp +#endif + } initial_tildes; + + struct + { + VkDescriptorSetLayout descriptor_set_layout = {VK_NULL_HANDLE}; + VkDescriptorSet descriptor_set = {VK_NULL_HANDLE}; + Pipeline pipeline; // fft_tilde_h.comp +#ifdef DEBUG_SUBGROUPS_SWITCH_ENABLE + Pipeline pipeline_subgroups_off; // fft_tilde_h_subgroups_off.comp +#endif + } tildes; + + struct + { + VkDescriptorSetLayout descriptor_set_layout = {VK_NULL_HANDLE}; + VkDescriptorSet descriptor_set = {VK_NULL_HANDLE}; + Pipeline pipeline; // butterfly_precomp.comp +#ifdef DEBUG_SUBGROUPS_SWITCH_ENABLE + Pipeline pipeline_subgroups_off; // butterfly_precomp_subgroups_off.comp +#endif + } precompute; + + struct + { + GridBuffers grid; + uint32_t graphics_queue_family_index = {-1u}; + VkDescriptorSetLayout descriptor_set_layout = {VK_NULL_HANDLE}; + VkDescriptorSet descriptor_set = {VK_NULL_HANDLE}; + VkSemaphore semaphore = {VK_NULL_HANDLE}; + + struct + { + Pipeline _default; // ocean.* + Pipeline wireframe; // ocean.* + } pipelines; + } ocean; + + VkPhysicalDeviceSubgroupProperties subgroups_properties{}; + + private: + uint32_t reverse(uint32_t i) const; + VkDescriptorImageInfo create_ia_descriptor(ImageAttachment &attachment); + void create_image_attachement(VkFormat format, uint32_t width, uint32_t height, ImageAttachment &result); +}; + +std::unique_ptr create_subgroups_operations(); \ No newline at end of file diff --git a/shaders/subgroups_operations/butterfly_precomp.comp b/shaders/subgroups_operations/butterfly_precomp.comp new file mode 100644 index 0000000000..9ecc1d1385 --- /dev/null +++ b/shaders/subgroups_operations/butterfly_precomp.comp @@ -0,0 +1,88 @@ +#version 450 +#extension GL_KHR_shader_subgroup_basic : enable +/* Copyright (c) 2024, Mobica Limited + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 the "License"; + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#define PI_32F 3.14159265358979f + +layout (local_size_x = 8, local_size_y = 1, local_size_z = 1) in; + +layout (binding = 0, rgba32f) writeonly uniform image2D u_butterfly_precomp; + +layout (std430, binding = 1) buffer indices +{ + int data[]; +} bit_reversed_indices; + +layout (binding = 2) uniform FFTParametersUbo +{ + float amplitude; + float len; + uint grid_size; + vec2 wind; +} fftUbo; + +void main() +{ + vec2 pos = gl_GlobalInvocationID.xy; + uint N = fftUbo.grid_size; + + // Twiddle factor exponent, Thesis, Section 4.2.6, Eq 4.6 + float k = mod(pos.y * (float(N) / pow(2, pos.x + 1)), N); + + // Thesis, Section 4.2.6, Eq 4.7 + int butterfly_span = int(pow(2, pos.x)); + int butterfly_wing = 0; + + if ((mod(pos.y, pow(2, pos.x + 1))) < butterfly_span) butterfly_wing = 1; + + ivec2 pixel_pos = ivec2(gl_GlobalInvocationID.xy); + + vec4 result = vec4(0.0f); + result.x = cos(2.f * PI_32F * k / float(N)); // Twiddle factor real part + result.y = sin(2.f * PI_32F * k / float(N)); // Twiddle factor imaginary part + + // Store the sample indices for the next stage + if (pos.x == 0) + { + if (butterfly_wing == 1) + { + result.z = bit_reversed_indices.data[int(pos.y)]; + result.w = bit_reversed_indices.data[int(pos.y + 1)]; + } + else + { + result.z = bit_reversed_indices.data[int(pos.y - 1)]; + result.w = bit_reversed_indices.data[int(pos.y)]; + } + } + else + { + if (butterfly_wing == 1) + { + result.z = pos.y; + result.w = pos.y + butterfly_span; + } + else + { + result.z = pos.y - butterfly_span; + result.w = pos.y; + } + } + + imageStore(u_butterfly_precomp, pixel_pos, result); +} \ No newline at end of file diff --git a/shaders/subgroups_operations/butterfly_precomp_subgroups_off.comp b/shaders/subgroups_operations/butterfly_precomp_subgroups_off.comp new file mode 100644 index 0000000000..df2016a867 --- /dev/null +++ b/shaders/subgroups_operations/butterfly_precomp_subgroups_off.comp @@ -0,0 +1,87 @@ +#version 450 +/* Copyright (c) 2024, Mobica Limited + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 the "License"; + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#define PI_32F 3.14159265358979f + +layout (local_size_x = 8, local_size_y = 1, local_size_z = 1) in; + +layout (binding = 0, rgba32f) writeonly uniform image2D u_butterfly_precomp; + +layout (std430, binding = 1) buffer indices +{ + int data[]; +} bit_reversed_indices; + +layout (binding = 2) uniform FFTParametersUbo +{ + float amplitude; + float len; + uint grid_size; + vec2 wind; +} fftUbo; + +void main() +{ + vec2 pos = gl_GlobalInvocationID.xy; + uint N = fftUbo.grid_size; + + // Twiddle factor exponent, Thesis, Section 4.2.6, Eq 4.6 + float k = mod(pos.y * (float(N) / pow(2, pos.x + 1)), N); + + // Thesis, Section 4.2.6, Eq 4.7 + int butterfly_span = int(pow(2, pos.x)); + int butterfly_wing = 0; + + if ((mod(pos.y, pow(2, pos.x + 1))) < butterfly_span) butterfly_wing = 1; + + ivec2 pixel_pos = ivec2(gl_GlobalInvocationID.xy); + + vec4 result = vec4(0.0f); + result.x = cos(2.f * PI_32F * k / float(N)); // Twiddle factor real part + result.y = sin(2.f * PI_32F * k / float(N)); // Twiddle factor imaginary part + + // Store the sample indices for the next stage + if (pos.x == 0) + { + if (butterfly_wing == 1) + { + result.z = bit_reversed_indices.data[int(pos.y)]; + result.w = bit_reversed_indices.data[int(pos.y + 1)]; + } + else + { + result.z = bit_reversed_indices.data[int(pos.y - 1)]; + result.w = bit_reversed_indices.data[int(pos.y)]; + } + } + else + { + if (butterfly_wing == 1) + { + result.z = pos.y; + result.w = pos.y + butterfly_span; + } + else + { + result.z = pos.y - butterfly_span; + result.w = pos.y; + } + } + + imageStore(u_butterfly_precomp, pixel_pos, result); +} \ No newline at end of file diff --git a/shaders/subgroups_operations/fft.comp b/shaders/subgroups_operations/fft.comp new file mode 100644 index 0000000000..381d84ca11 --- /dev/null +++ b/shaders/subgroups_operations/fft.comp @@ -0,0 +1,121 @@ +#version 450 +#extension GL_KHR_shader_subgroup_basic : enable +/* Copyright (c) 2024, Mobica Limited + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 the "License"; + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +layout (local_size_x = 32, local_size_y = 1, local_size_z = 1) in; + +layout (constant_id = 0) const int direction = 0; + +layout (binding = 0, rgba32f) readonly uniform image2D u_butterfly_precomp; +layout (binding = 1, rgba32f) uniform image2D u_pingpong0; +layout (binding = 2, rgba32f) uniform image2D u_pingpong1; + +layout( push_constant ) uniform Push_Constants{ + uint i; +} step; + +struct Complex +{ + float real; + float imag; +}; + +Complex complex_add(Complex c1, Complex c2) +{ + Complex res; + res.real = c1.real + c2.real; + res.imag = c1.imag + c2.imag; + return res; +} + +Complex complex_multiply(Complex c1, Complex c2) +{ + Complex res; + res.real = c1.real * c2.real - c1.imag * c2.imag; + res.imag = c1.real * c2.imag + c1.imag * c2.real; + return res; +} + +void HorizontalButterflies(in ivec2 pixel_pos) +{ + vec4 butterfly_precomp = imageLoad(u_butterfly_precomp, ivec2(step.i, pixel_pos.x)); + if ((step.i % 2) == 0) + { + subgroupMemoryBarrierImage(); + vec2 a_ = imageLoad(u_pingpong0, ivec2(butterfly_precomp.z, pixel_pos.y)).rg; + vec2 b_ = imageLoad(u_pingpong0, ivec2(butterfly_precomp.w, pixel_pos.y)).rg; + + Complex a = Complex(a_.x, a_.y); + Complex b = Complex(b_.x, b_.y); + Complex twiddle_factor = Complex(butterfly_precomp.x, butterfly_precomp.y); + + Complex result = complex_add(a, complex_multiply(twiddle_factor, b)); + imageStore(u_pingpong1, pixel_pos, vec4(result.real, result.imag, 0.f, 1.f)); + } + else + { + subgroupMemoryBarrierImage(); + vec2 a_ = imageLoad(u_pingpong1, ivec2(butterfly_precomp.z, pixel_pos.y)).rg; + vec2 b_ = imageLoad(u_pingpong1, ivec2(butterfly_precomp.w, pixel_pos.y)).rg; + + Complex a = Complex(a_.x, a_.y); + Complex b = Complex(b_.x, b_.y); + Complex twiddle_factor = Complex(butterfly_precomp.x, butterfly_precomp.y); + + Complex result = complex_add(a, complex_multiply(twiddle_factor, b)); + imageStore(u_pingpong0, pixel_pos, vec4(result.real, result.imag, 0.f, 1.f)); + } +} + +void VerticalButterfiles(in ivec2 pixel_pos) +{ + vec4 butterfly_precomp = imageLoad(u_butterfly_precomp, ivec2(step.i, pixel_pos.y)); + if ((step.i % 2) == 0) + { + subgroupMemoryBarrierImage(); + vec2 a_ = imageLoad(u_pingpong0, ivec2(pixel_pos.x, butterfly_precomp.z)).rg; + vec2 b_ = imageLoad(u_pingpong0, ivec2(pixel_pos.x, butterfly_precomp.w)).rg; + + Complex a = Complex(a_.x, a_.y); + Complex b = Complex(b_.x, b_.y); + Complex twiddle_factor = Complex(butterfly_precomp.x, butterfly_precomp.y); + + Complex result = complex_add(a, complex_multiply(twiddle_factor, b)); + imageStore(u_pingpong1, pixel_pos, vec4(result.real, result.imag, 0.f, 1.f)); + } + else + { + subgroupMemoryBarrierImage(); + vec2 a_ = imageLoad(u_pingpong1, ivec2(pixel_pos.x, butterfly_precomp.z)).rg; + vec2 b_ = imageLoad(u_pingpong1, ivec2(pixel_pos.x, butterfly_precomp.w)).rg; + + Complex a = Complex(a_.x, a_.y); + Complex b = Complex(b_.x, b_.y); + Complex twiddle_factor = Complex(butterfly_precomp.x, butterfly_precomp.y); + + Complex result = complex_add(a, complex_multiply(twiddle_factor, b)); + imageStore(u_pingpong0, pixel_pos, vec4(result.real, result.imag, 0.f, 1.f)); + } +} + +void main() +{ + ivec2 uv = ivec2(gl_GlobalInvocationID.xy); + if (direction == 0) HorizontalButterflies(uv); + else if (direction == 1) VerticalButterfiles(uv); +} \ No newline at end of file diff --git a/shaders/subgroups_operations/fft_invert.comp b/shaders/subgroups_operations/fft_invert.comp new file mode 100644 index 0000000000..54996e0d91 --- /dev/null +++ b/shaders/subgroups_operations/fft_invert.comp @@ -0,0 +1,62 @@ +#version 450 +#extension GL_KHR_shader_subgroup_basic : enable +/* Copyright (c) 2024, Mobica Limited + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 the "License"; + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +layout (local_size_x = 32, local_size_y = 1, local_size_z = 1) in; + +layout (binding = 0, rgba32f) writeonly uniform image2D u_displacement; + +layout (binding = 1, rgba32f) readonly uniform image2D u_pingpong0_y; +layout (binding = 2, rgba32f) readonly uniform image2D u_pingpong1_y; + +layout (binding = 3, rgba32f) readonly uniform image2D u_pingpong0_x; +layout (binding = 4, rgba32f) readonly uniform image2D u_pingpong1_x; + +layout (binding = 5, rgba32f) readonly uniform image2D u_pingpong0_z; +layout (binding = 6, rgba32f) readonly uniform image2D u_pingpong1_z; + +layout (binding = 7) uniform InvertFft +{ + int pong_idx; + uint grid_size; +} fftUbo; + +void main() +{ + uint N = fftUbo.grid_size; + ivec2 pixel_pos = ivec2(gl_GlobalInvocationID.xy); + + float perms[2] = { 1.0f, -1.0f }; + int index = int(mod(pixel_pos.x + pixel_pos.y, 2)); + float perm = perms[index]; + uint pingpong_index = fftUbo.pong_idx; + if (pingpong_index == 0) + { + float h_y = imageLoad(u_pingpong0_y, pixel_pos).r; + float h_x = imageLoad(u_pingpong0_x, pixel_pos).r; + float h_z = imageLoad(u_pingpong0_z, pixel_pos).r; + imageStore(u_displacement, pixel_pos, vec4(perm * (h_x / float(N * N)), perm * (h_y / float(N * N)), perm * (h_z / float(N * N)), 1.0f)); + } + else if (pingpong_index == 1) + { + float h_y = imageLoad(u_pingpong1_y, pixel_pos).r; + float h_x = imageLoad(u_pingpong1_x, pixel_pos).r; + float h_z = imageLoad(u_pingpong1_z, pixel_pos).r; + imageStore(u_displacement, pixel_pos, vec4(perm * (h_x / float(N * N)), perm * (h_y / float(N * N)), perm * (h_z / float(N * N)), 1.0f)); + } +} \ No newline at end of file diff --git a/shaders/subgroups_operations/fft_invert_subgroups_off.comp b/shaders/subgroups_operations/fft_invert_subgroups_off.comp new file mode 100644 index 0000000000..736994c814 --- /dev/null +++ b/shaders/subgroups_operations/fft_invert_subgroups_off.comp @@ -0,0 +1,61 @@ +#version 450 +/* Copyright (c) 2024, Mobica Limited + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 the "License"; + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +layout (local_size_x = 32, local_size_y = 1, local_size_z = 1) in; + +layout (binding = 0, rgba32f) writeonly uniform image2D u_displacement; + +layout (binding = 1, rgba32f) readonly uniform image2D u_pingpong0_y; +layout (binding = 2, rgba32f) readonly uniform image2D u_pingpong1_y; + +layout (binding = 3, rgba32f) readonly uniform image2D u_pingpong0_x; +layout (binding = 4, rgba32f) readonly uniform image2D u_pingpong1_x; + +layout (binding = 5, rgba32f) readonly uniform image2D u_pingpong0_z; +layout (binding = 6, rgba32f) readonly uniform image2D u_pingpong1_z; + +layout (binding = 7) uniform InvertFft +{ + int pong_idx; + uint grid_size; +} fftUbo; + +void main() +{ + uint N = fftUbo.grid_size; + ivec2 pixel_pos = ivec2(gl_GlobalInvocationID.xy); + + float perms[2] = { 1.0f, -1.0f }; + int index = int(mod(pixel_pos.x + pixel_pos.y, 2)); + float perm = perms[index]; + uint pingpong_index = fftUbo.pong_idx; + if (pingpong_index == 0) + { + float h_y = imageLoad(u_pingpong0_y, pixel_pos).r; + float h_x = imageLoad(u_pingpong0_x, pixel_pos).r; + float h_z = imageLoad(u_pingpong0_z, pixel_pos).r; + imageStore(u_displacement, pixel_pos, vec4(perm * (h_x / float(N * N)), perm * (h_y / float(N * N)), perm * (h_z / float(N * N)), 1.0f)); + } + else if (pingpong_index == 1) + { + float h_y = imageLoad(u_pingpong1_y, pixel_pos).r; + float h_x = imageLoad(u_pingpong1_x, pixel_pos).r; + float h_z = imageLoad(u_pingpong1_z, pixel_pos).r; + imageStore(u_displacement, pixel_pos, vec4(perm * (h_x / float(N * N)), perm * (h_y / float(N * N)), perm * (h_z / float(N * N)), 1.0f)); + } +} \ No newline at end of file diff --git a/shaders/subgroups_operations/fft_normal_map.comp b/shaders/subgroups_operations/fft_normal_map.comp new file mode 100644 index 0000000000..24b9cf08bf --- /dev/null +++ b/shaders/subgroups_operations/fft_normal_map.comp @@ -0,0 +1,50 @@ +#version 450 +#extension GL_KHR_shader_subgroup_basic: enable +/* Copyright (c) 2024, Mobica Limited + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 the "License"; + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +layout (local_size_x = 32, local_size_y = 1, local_size_z = 1) in; + +layout (binding = 0, rgba32f) writeonly uniform image2D fft_normal_map; + +layout (binding = 1, rgba32f) readonly uniform image2D fft_displacement_map; + +layout (binding = 2) uniform InvertFft +{ + int pong_idx; + uint grid_size; +} fftUbo; + +void main() +{ + uint N = fftUbo.grid_size; + ivec2 pixel_pos = ivec2(gl_GlobalInvocationID.xy); + const float offset = 1.0f; + + vec3 v0 = imageLoad(fft_displacement_map, ivec2(pixel_pos + vec2(0.0f, -offset))).rgb; + vec3 v1 = imageLoad(fft_displacement_map, ivec2(pixel_pos + vec2(0.0f, offset))).rgb; + float v01_length = length(v1 - v0); + + vec3 v2 = imageLoad(fft_displacement_map, ivec2(pixel_pos + vec2(-offset, 0.0f))).rgb; + vec3 v3 = imageLoad(fft_displacement_map, ivec2(pixel_pos + vec2(offset, 0.0f))).rgb; + float v23_length = length(v3 - v2); + + vec3 c0 = (v1 - v0) / ( 2.0f * v01_length); + vec3 c1 = (v3 - v2) / ( 2.0f * v23_length); + + vec3 result = (cross(c0, c1) * 0.5f) + 0.5f; + imageStore(fft_normal_map, pixel_pos, vec4(normalize(result), 1.0f)); +} \ No newline at end of file diff --git a/shaders/subgroups_operations/fft_normal_map_subgroups_off.comp b/shaders/subgroups_operations/fft_normal_map_subgroups_off.comp new file mode 100644 index 0000000000..24b9cf08bf --- /dev/null +++ b/shaders/subgroups_operations/fft_normal_map_subgroups_off.comp @@ -0,0 +1,50 @@ +#version 450 +#extension GL_KHR_shader_subgroup_basic: enable +/* Copyright (c) 2024, Mobica Limited + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 the "License"; + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +layout (local_size_x = 32, local_size_y = 1, local_size_z = 1) in; + +layout (binding = 0, rgba32f) writeonly uniform image2D fft_normal_map; + +layout (binding = 1, rgba32f) readonly uniform image2D fft_displacement_map; + +layout (binding = 2) uniform InvertFft +{ + int pong_idx; + uint grid_size; +} fftUbo; + +void main() +{ + uint N = fftUbo.grid_size; + ivec2 pixel_pos = ivec2(gl_GlobalInvocationID.xy); + const float offset = 1.0f; + + vec3 v0 = imageLoad(fft_displacement_map, ivec2(pixel_pos + vec2(0.0f, -offset))).rgb; + vec3 v1 = imageLoad(fft_displacement_map, ivec2(pixel_pos + vec2(0.0f, offset))).rgb; + float v01_length = length(v1 - v0); + + vec3 v2 = imageLoad(fft_displacement_map, ivec2(pixel_pos + vec2(-offset, 0.0f))).rgb; + vec3 v3 = imageLoad(fft_displacement_map, ivec2(pixel_pos + vec2(offset, 0.0f))).rgb; + float v23_length = length(v3 - v2); + + vec3 c0 = (v1 - v0) / ( 2.0f * v01_length); + vec3 c1 = (v3 - v2) / ( 2.0f * v23_length); + + vec3 result = (cross(c0, c1) * 0.5f) + 0.5f; + imageStore(fft_normal_map, pixel_pos, vec4(normalize(result), 1.0f)); +} \ No newline at end of file diff --git a/shaders/subgroups_operations/fft_subgroups_off.comp b/shaders/subgroups_operations/fft_subgroups_off.comp new file mode 100644 index 0000000000..0f4d9ab585 --- /dev/null +++ b/shaders/subgroups_operations/fft_subgroups_off.comp @@ -0,0 +1,120 @@ +#version 450 +/* Copyright (c) 2024, Mobica Limited + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 the "License"; + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +layout (local_size_x = 32, local_size_y = 1, local_size_z = 1) in; + +layout (constant_id = 0) const int direction = 0; + +layout (binding = 0, rgba32f) readonly uniform image2D u_butterfly_precomp; +layout (binding = 1, rgba32f) uniform image2D u_pingpong0; +layout (binding = 2, rgba32f) uniform image2D u_pingpong1; + +layout( push_constant ) uniform Push_Constants{ + uint i; +} step; + +struct Complex +{ + float real; + float imag; +}; + +Complex complex_add(Complex c1, Complex c2) +{ + Complex res; + res.real = c1.real + c2.real; + res.imag = c1.imag + c2.imag; + return res; +} + +Complex complex_multiply(Complex c1, Complex c2) +{ + Complex res; + res.real = c1.real * c2.real - c1.imag * c2.imag; + res.imag = c1.real * c2.imag + c1.imag * c2.real; + return res; +} + +void HorizontalButterflies(in ivec2 pixel_pos) +{ + vec4 butterfly_precomp = imageLoad(u_butterfly_precomp, ivec2(step.i, pixel_pos.x)); + if ((step.i % 2) == 0) + { + memoryBarrier(); + vec2 a_ = imageLoad(u_pingpong0, ivec2(butterfly_precomp.z, pixel_pos.y)).rg; + vec2 b_ = imageLoad(u_pingpong0, ivec2(butterfly_precomp.w, pixel_pos.y)).rg; + + Complex a = Complex(a_.x, a_.y); + Complex b = Complex(b_.x, b_.y); + Complex twiddle_factor = Complex(butterfly_precomp.x, butterfly_precomp.y); + + Complex result = complex_add(a, complex_multiply(twiddle_factor, b)); + imageStore(u_pingpong1, pixel_pos, vec4(result.real, result.imag, 0.f, 1.f)); + } + else + { + memoryBarrier(); + vec2 a_ = imageLoad(u_pingpong1, ivec2(butterfly_precomp.z, pixel_pos.y)).rg; + vec2 b_ = imageLoad(u_pingpong1, ivec2(butterfly_precomp.w, pixel_pos.y)).rg; + + Complex a = Complex(a_.x, a_.y); + Complex b = Complex(b_.x, b_.y); + Complex twiddle_factor = Complex(butterfly_precomp.x, butterfly_precomp.y); + + Complex result = complex_add(a, complex_multiply(twiddle_factor, b)); + imageStore(u_pingpong0, pixel_pos, vec4(result.real, result.imag, 0.f, 1.f)); + } +} + +void VerticalButterfiles(in ivec2 pixel_pos) +{ + vec4 butterfly_precomp = imageLoad(u_butterfly_precomp, ivec2(step.i, pixel_pos.y)); + if ((step.i % 2) == 0) + { + memoryBarrier(); + vec2 a_ = imageLoad(u_pingpong0, ivec2(pixel_pos.x, butterfly_precomp.z)).rg; + vec2 b_ = imageLoad(u_pingpong0, ivec2(pixel_pos.x, butterfly_precomp.w)).rg; + + Complex a = Complex(a_.x, a_.y); + Complex b = Complex(b_.x, b_.y); + Complex twiddle_factor = Complex(butterfly_precomp.x, butterfly_precomp.y); + + Complex result = complex_add(a, complex_multiply(twiddle_factor, b)); + imageStore(u_pingpong1, pixel_pos, vec4(result.real, result.imag, 0.f, 1.f)); + } + else + { + memoryBarrier(); + vec2 a_ = imageLoad(u_pingpong1, ivec2(pixel_pos.x, butterfly_precomp.z)).rg; + vec2 b_ = imageLoad(u_pingpong1, ivec2(pixel_pos.x, butterfly_precomp.w)).rg; + + Complex a = Complex(a_.x, a_.y); + Complex b = Complex(b_.x, b_.y); + Complex twiddle_factor = Complex(butterfly_precomp.x, butterfly_precomp.y); + + Complex result = complex_add(a, complex_multiply(twiddle_factor, b)); + imageStore(u_pingpong0, pixel_pos, vec4(result.real, result.imag, 0.f, 1.f)); + } +} + +void main() +{ + ivec2 uv = ivec2(gl_GlobalInvocationID.xy); + if (direction == 0) HorizontalButterflies(uv); + else if (direction == 1) VerticalButterfiles(uv); +} \ No newline at end of file diff --git a/shaders/subgroups_operations/fft_tilde_h.comp b/shaders/subgroups_operations/fft_tilde_h.comp new file mode 100644 index 0000000000..0ae7626c58 --- /dev/null +++ b/shaders/subgroups_operations/fft_tilde_h.comp @@ -0,0 +1,109 @@ +#version 450 +#extension GL_KHR_shader_subgroup_basic : enable +/* Copyright (c) 2024, Mobica Limited + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 the "License"; + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#define PI 3.14159265358979f +#define GRAVITY 9.81f + +layout (local_size_x = 32, local_size_y = 1, local_size_z = 1) in; + +layout (binding = 0, rgba32f) readonly uniform image2D u_tilde_h0_k; +layout (binding = 1, rgba32f) readonly uniform image2D u_tilde_h0_minus_k; + +layout (binding = 2, rgba32f) writeonly uniform image2D tilde_h_kt_dx; +layout (binding = 3, rgba32f) writeonly uniform image2D tilde_h_kt_dy; +layout (binding = 4, rgba32f) writeonly uniform image2D tilde_h_kt_dz; + +layout (binding = 5) uniform FFTParametersUbo +{ + float amplitude; + float len; + uint grid_size; + vec2 wind; +} fftUbo; + +layout (binding = 6) uniform Time +{ + float time; +} t; + +struct Complex +{ + float real; + float imag; +}; + +Complex complex_add(Complex c1, Complex c2) +{ + Complex res; + res.real = c1.real + c2.real; + res.imag = c1.imag + c2.imag; + return res; +} + +Complex complex_multiply(Complex c1, Complex c2) +{ + Complex res; + res.real = c1.real * c2.real - c1.imag * c2.imag; + res.imag = c1.real * c2.imag + c1.imag * c2.real; + return res; +} + +Complex complex_conj(Complex c) +{ + Complex res; + res.real = c.real; + res.imag = -c.imag; + return res; +} + +void main() +{ + uint N = fftUbo.grid_size; + float L = fftUbo.len; + + vec2 pos = vec2(gl_GlobalInvocationID.xy) - (N / 2.0); + + vec2 k = vec2((2.0 * PI * pos.x) / L, (2.0 * PI * pos.y) / L); + + float k_magnitude = length(k); + if (k_magnitude < 0.00001f) k_magnitude = 0.00001f; + + float w = sqrt(GRAVITY * k_magnitude); + + ivec2 pixel_pos = ivec2(gl_GlobalInvocationID.xy); + + vec2 h0_k = imageLoad(u_tilde_h0_k, pixel_pos).xy; + Complex amp = Complex(h0_k.x, h0_k.y); + Complex exp_iwt = Complex(cos(w * t.time), sin(w * t.time)); + + vec2 h0_minus_k = imageLoad(u_tilde_h0_minus_k, pixel_pos).xy; + Complex amp_conj = complex_conj(Complex(h0_minus_k.x, h0_minus_k.y)); + Complex exp_minus_iwt = Complex(cos(w * t.time), -sin(w * t.time)); + + Complex h_k_t_dy = complex_add(complex_multiply(amp, exp_iwt), complex_multiply(amp_conj, exp_minus_iwt)); + imageStore(tilde_h_kt_dy, pixel_pos, vec4(h_k_t_dy.real, h_k_t_dy.imag, 0.0, 1.0)); + + Complex dx = Complex(0.0, -k.x / k_magnitude); + Complex h_k_t_dx = complex_multiply(dx, h_k_t_dy); + imageStore(tilde_h_kt_dx, pixel_pos, vec4(h_k_t_dx.real, h_k_t_dx.imag, 0.0, 1.0)); + + Complex dz = Complex(0.0, -k.y / k_magnitude); + Complex h_k_t_dz = complex_multiply(dz, h_k_t_dy); + imageStore(tilde_h_kt_dz, pixel_pos, vec4(h_k_t_dz.real, h_k_t_dz.imag, 0.0, 1.0)); +} \ No newline at end of file diff --git a/shaders/subgroups_operations/fft_tilde_h0.comp b/shaders/subgroups_operations/fft_tilde_h0.comp new file mode 100644 index 0000000000..a6edb69ead --- /dev/null +++ b/shaders/subgroups_operations/fft_tilde_h0.comp @@ -0,0 +1,80 @@ +#version 450 +#extension GL_KHR_shader_subgroup_basic : enable +/* Copyright (c) 2024, Mobica Limited + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 the "License"; + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#define PI 3.14159265358979f +#define GRAVITY 9.81f + +layout (local_size_x = 32, local_size_y = 1, local_size_z = 1) in; + +layout (binding = 0, rgba32f) writeonly uniform image2D tilde_h0_k; +layout (binding = 1, rgba32f) writeonly uniform image2D tilde_h0_minus_k; + +layout (std140, binding = 2) readonly buffer InputRandom +{ + vec4 data[]; +} input_random; + +layout (std140, binding = 3) uniform FFTParametersUbo +{ + float amplitude; + float len; + uint grid_size; + vec2 wind; +} input_params; + +float SuppressionFactor(float k_magnitude_sq) +{ + float suppress_length = 0.1; + return exp(-k_magnitude_sq * suppress_length * suppress_length); +} + +float PhillipsSpectrum(vec2 k, float k_magnitude_sq, float l_phillips, vec2 wind_direction) +{ + return input_params.amplitude + * ((exp(-1.0 / (k_magnitude_sq * l_phillips * l_phillips)) + * pow(dot(normalize(k), wind_direction), 2)) + * SuppressionFactor(k_magnitude_sq)) + / (k_magnitude_sq * k_magnitude_sq); +} + +void main() +{ + vec2 wind_direction = normalize(input_params.wind); + float wind_speed = length(input_params.wind); + + vec2 pos = vec2(gl_GlobalInvocationID.xy) - (input_params.grid_size / 2.0); + ivec2 pixel_pos = ivec2(gl_GlobalInvocationID.xy); + + vec2 k = vec2((2.0 * PI * pos.x) / input_params.len, (2.0 * PI * pos.y) / input_params.len); + + float k_magnitude = length(k); + if (k_magnitude < 0.00001) + k_magnitude = 0.00001; + + float l_phillips = (wind_speed * wind_speed) / GRAVITY; + + uint idx = pixel_pos.x + pixel_pos.y * input_params.grid_size; + vec4 rnd = input_random.data[idx]; + + float h0_k = clamp(sqrt(PhillipsSpectrum(k, k_magnitude * k_magnitude, l_phillips, wind_direction) / 2.0), -4000.0, 4000.0); + float h0_minus_k = clamp(sqrt(PhillipsSpectrum(-k, k_magnitude * k_magnitude, l_phillips, wind_direction) / 2.0), -4000.0, 4000.0); + + imageStore(tilde_h0_k, pixel_pos, vec4(rnd.xy * h0_k, 0.0, 1.0)); + imageStore(tilde_h0_minus_k, pixel_pos, vec4(rnd.zw * h0_minus_k, 0.0, 1.0)); +} \ No newline at end of file diff --git a/shaders/subgroups_operations/fft_tilde_h0_subgroups_off.comp b/shaders/subgroups_operations/fft_tilde_h0_subgroups_off.comp new file mode 100644 index 0000000000..fc8e72fc12 --- /dev/null +++ b/shaders/subgroups_operations/fft_tilde_h0_subgroups_off.comp @@ -0,0 +1,79 @@ +#version 450 +/* Copyright (c) 2024, Mobica Limited + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 the "License"; + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#define PI 3.14159265358979f +#define GRAVITY 9.81f + +layout (local_size_x = 32, local_size_y = 1, local_size_z = 1) in; + +layout (binding = 0, rgba32f) writeonly uniform image2D tilde_h0_k; +layout (binding = 1, rgba32f) writeonly uniform image2D tilde_h0_minus_k; + +layout (std140, binding = 2) readonly buffer InputRandom +{ + vec4 data[]; +} input_random; + +layout (std140, binding = 3) uniform FFTParametersUbo +{ + float amplitude; + float len; + uint grid_size; + vec2 wind; +} input_params; + +float SuppressionFactor(float k_magnitude_sq) +{ + float suppress_length = 0.1; + return exp(-k_magnitude_sq * suppress_length * suppress_length); +} + +float PhillipsSpectrum(vec2 k, float k_magnitude_sq, float l_phillips, vec2 wind_direction) +{ + return input_params.amplitude + * ((exp(-1.0 / (k_magnitude_sq * l_phillips * l_phillips)) + * pow(dot(normalize(k), wind_direction), 2)) + * SuppressionFactor(k_magnitude_sq)) + / (k_magnitude_sq * k_magnitude_sq); +} + +void main() +{ + vec2 wind_direction = normalize(input_params.wind); + float wind_speed = length(input_params.wind); + + vec2 pos = vec2(gl_GlobalInvocationID.xy) - (input_params.grid_size / 2.0); + ivec2 pixel_pos = ivec2(gl_GlobalInvocationID.xy); + + vec2 k = vec2((2.0 * PI * pos.x) / input_params.len, (2.0 * PI * pos.y) / input_params.len); + + float k_magnitude = length(k); + if (k_magnitude < 0.00001) + k_magnitude = 0.00001; + + float l_phillips = (wind_speed * wind_speed) / GRAVITY; + + uint idx = pixel_pos.x + pixel_pos.y * input_params.grid_size; + vec4 rnd = input_random.data[idx]; + + float h0_k = clamp(sqrt(PhillipsSpectrum(k, k_magnitude * k_magnitude, l_phillips, wind_direction) / 2.0), -4000.0, 4000.0); + float h0_minus_k = clamp(sqrt(PhillipsSpectrum(-k, k_magnitude * k_magnitude, l_phillips, wind_direction) / 2.0), -4000.0, 4000.0); + + imageStore(tilde_h0_k, pixel_pos, vec4(rnd.xy * h0_k, 0.0, 1.0)); + imageStore(tilde_h0_minus_k, pixel_pos, vec4(rnd.zw * h0_minus_k, 0.0, 1.0)); +} \ No newline at end of file diff --git a/shaders/subgroups_operations/fft_tilde_h_subgroups_off.comp b/shaders/subgroups_operations/fft_tilde_h_subgroups_off.comp new file mode 100644 index 0000000000..421e817ade --- /dev/null +++ b/shaders/subgroups_operations/fft_tilde_h_subgroups_off.comp @@ -0,0 +1,108 @@ +#version 450 +/* Copyright (c) 2024, Mobica Limited + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 the "License"; + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#define PI 3.14159265358979f +#define GRAVITY 9.81f + +layout (local_size_x = 32, local_size_y = 1, local_size_z = 1) in; + +layout (binding = 0, rgba32f) readonly uniform image2D u_tilde_h0_k; +layout (binding = 1, rgba32f) readonly uniform image2D u_tilde_h0_minus_k; + +layout (binding = 2, rgba32f) writeonly uniform image2D tilde_h_kt_dx; +layout (binding = 3, rgba32f) writeonly uniform image2D tilde_h_kt_dy; +layout (binding = 4, rgba32f) writeonly uniform image2D tilde_h_kt_dz; + +layout (binding = 5) uniform FFTParametersUbo +{ + float amplitude; + float len; + uint grid_size; + vec2 wind; +} fftUbo; + +layout (binding = 6) uniform Time +{ + float time; +} t; + +struct Complex +{ + float real; + float imag; +}; + +Complex complex_add(Complex c1, Complex c2) +{ + Complex res; + res.real = c1.real + c2.real; + res.imag = c1.imag + c2.imag; + return res; +} + +Complex complex_multiply(Complex c1, Complex c2) +{ + Complex res; + res.real = c1.real * c2.real - c1.imag * c2.imag; + res.imag = c1.real * c2.imag + c1.imag * c2.real; + return res; +} + +Complex complex_conj(Complex c) +{ + Complex res; + res.real = c.real; + res.imag = -c.imag; + return res; +} + +void main() +{ + uint N = fftUbo.grid_size; + float L = fftUbo.len; + + vec2 pos = vec2(gl_GlobalInvocationID.xy) - (N / 2.0); + + vec2 k = vec2((2.0 * PI * pos.x) / L, (2.0 * PI * pos.y) / L); + + float k_magnitude = length(k); + if (k_magnitude < 0.00001f) k_magnitude = 0.00001f; + + float w = sqrt(GRAVITY * k_magnitude); + + ivec2 pixel_pos = ivec2(gl_GlobalInvocationID.xy); + + vec2 h0_k = imageLoad(u_tilde_h0_k, pixel_pos).xy; + Complex amp = Complex(h0_k.x, h0_k.y); + Complex exp_iwt = Complex(cos(w * t.time), sin(w * t.time)); + + vec2 h0_minus_k = imageLoad(u_tilde_h0_minus_k, pixel_pos).xy; + Complex amp_conj = complex_conj(Complex(h0_minus_k.x, h0_minus_k.y)); + Complex exp_minus_iwt = Complex(cos(w * t.time), -sin(w * t.time)); + + Complex h_k_t_dy = complex_add(complex_multiply(amp, exp_iwt), complex_multiply(amp_conj, exp_minus_iwt)); + imageStore(tilde_h_kt_dy, pixel_pos, vec4(h_k_t_dy.real, h_k_t_dy.imag, 0.0, 1.0)); + + Complex dx = Complex(0.0, -k.x / k_magnitude); + Complex h_k_t_dx = complex_multiply(dx, h_k_t_dy); + imageStore(tilde_h_kt_dx, pixel_pos, vec4(h_k_t_dx.real, h_k_t_dx.imag, 0.0, 1.0)); + + Complex dz = Complex(0.0, -k.y / k_magnitude); + Complex h_k_t_dz = complex_multiply(dz, h_k_t_dy); + imageStore(tilde_h_kt_dz, pixel_pos, vec4(h_k_t_dz.real, h_k_t_dz.imag, 0.0, 1.0)); +} \ No newline at end of file diff --git a/shaders/subgroups_operations/ocean.frag b/shaders/subgroups_operations/ocean.frag new file mode 100644 index 0000000000..85e69ef2a8 --- /dev/null +++ b/shaders/subgroups_operations/ocean.frag @@ -0,0 +1,105 @@ +#version 450 +/* Copyright (c) 2024, Mobica Limited + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 the "License"; + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +layout (location = 0) in vec4 in_pos; +layout (location = 1) in vec2 in_uv; + +layout (location = 0) out vec4 outFragColor; + +layout (binding = 0) uniform Ubo +{ + mat4 projection; + mat4 view; + mat4 model; +} ubo; + +layout (binding = 1, rgba32f) uniform image2D fft_displacement_map; + +layout (binding = 3) uniform CameraPos +{ + vec4 position; +} cam; + +layout (binding = 4) uniform sampler2D fft_normal_map; + +layout (binding = 5) uniform OceanParamsUbo +{ + vec3 light_color; + vec3 light_position; + vec3 ocean_color; +} ocean_ubo; + +const float fresnel_approx_pow_factor = 2.0; +const float specular_power = 16.0; +const float specular_scale = 0.75; +const float dyna_range = 0.8f; +const vec3 ocean_dark = vec3(0.03, 0.06, 0.135); + +void main() +{ + vec3 result = vec3(0.0f); + ivec2 normal_texture_size = textureSize(fft_normal_map, 0); + vec2 offset_scale = vec2(4.0f / normal_texture_size.x, 4.0f / normal_texture_size.y); + + vec3 n0 = texture(fft_normal_map, in_uv + offset_scale).xyz; + vec3 n1 = texture(fft_normal_map, in_uv + vec2(-offset_scale.x, offset_scale.y)).xyz; + vec3 n2 = texture(fft_normal_map, in_uv - offset_scale).xyz; + vec3 n3 = texture(fft_normal_map, in_uv - vec2(-offset_scale.x, offset_scale.y)).xyz; + + float f0 = clamp(abs(dot(n0, n2) * (-0.5f) + 0.5f), 0.0f, 1.0f); + float f1 = clamp(abs(dot(n1, n3) * (-0.5f) + 0.5f), 0.0f, 1.0f); + + f0 = pow(f0 * 5.0f, 2.0f); + f1 = pow(f1 * 5.0f, 2.0f); + + vec4 normal_map_data = texture(fft_normal_map, in_uv); + + float fac = (normal_map_data.z / 125.0f) * clamp(max(f0, f1), 0.0f, 1.0f); + + mat3 normal_matrix = mat3(ubo.model); + vec3 normal = normal_matrix * normal_map_data.xyz; + + vec3 light_dir = normalize(normal_matrix * ocean_ubo.light_position); + vec3 view_dir = normalize(in_pos.xyz); + + vec3 specular = vec3(0.0f); + float n_dot_vp = max(0.0f, dot(normal, light_dir)); + float n_dot_d = dot(normal, -view_dir); + float diffuse = clamp(dot(normal, light_dir), 0.0, 1.0); + + if (n_dot_vp > 0.0f) + { + vec3 D = -view_dir; + vec3 R = normalize(reflect(-light_dir, normal)); + + float dir_scale = mix(pow(abs(n_dot_d), 8.0f), 1.0f - pow(abs(1.0f - n_dot_d), 4.0f), n_dot_d); + specular = vec3(0.8f) * vec3(pow(max(dot(R, D), 0.0f), specular_power) * specular_scale * dir_scale) * ocean_ubo.light_color; + } + + float fresnel = clamp(pow(1.0f + n_dot_d, -fresnel_approx_pow_factor) * dyna_range, 0.0f, 1.0f); + vec3 ambient = fresnel * ocean_ubo.ocean_color; + vec3 water_color = (1.0f - fresnel) * ocean_ubo.ocean_color * ocean_dark * diffuse; + result = ambient + water_color + specular; + + // gamma correction + result.r = pow(result.r, 1.0f / 2.2f); + result.g = pow(result.g, 1.0f / 2.2f); + result.b = pow(result.b, 1.0f / 2.2f); + + outFragColor = vec4(result, 1.0f); +} \ No newline at end of file diff --git a/shaders/subgroups_operations/ocean.tesc b/shaders/subgroups_operations/ocean.tesc new file mode 100644 index 0000000000..8b2dbeb69c --- /dev/null +++ b/shaders/subgroups_operations/ocean.tesc @@ -0,0 +1,36 @@ +#version 450 +/* Copyright (c) 2024, Mobica Limited + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 the "License"; + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +layout (vertices = 3) out; + +layout(location = 0) in vec2 inUv[]; + +layout(location = 0) out vec2 outUv[]; + +void main() +{ + gl_TessLevelOuter[0] = 2.0f; + gl_TessLevelOuter[1] = 2.0f; + gl_TessLevelOuter[2] = 2.0f; + gl_TessLevelOuter[3] = 2.0f; + + gl_TessLevelInner[0] = gl_TessLevelOuter[3]; + + gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position; + outUv[gl_InvocationID] = inUv[gl_InvocationID]; +} \ No newline at end of file diff --git a/shaders/subgroups_operations/ocean.tese b/shaders/subgroups_operations/ocean.tese new file mode 100644 index 0000000000..52a1ef7085 --- /dev/null +++ b/shaders/subgroups_operations/ocean.tese @@ -0,0 +1,74 @@ +#version 450 +/* Copyright (c) 2024, Mobica Limited + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 the "License"; + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +layout (triangles, equal_spacing, ccw) in; + +layout (location = 0) in vec2 inUv[]; + +layout (location = 0) out vec4 outPos; +layout (location = 1) out vec2 outUV; + +layout (binding = 0) uniform Ubo +{ + mat4 projection; + mat4 view; + mat4 model; +} ubo; + +layout (binding = 1, rgba32f) uniform image2D fft_displacement_map; + +layout (binding = 2) uniform TessellationParams +{ + float choppines; + float displacement_scale; +} tessParams; + +vec2 interpolate_2d(vec2 v0, vec2 v1, vec2 v2) +{ + return vec2(gl_TessCoord.x) * v0 + vec2(gl_TessCoord.y) * v1 + vec2(gl_TessCoord.z) * v2; +} + +vec3 interpolate_3d(vec3 v0, vec3 v1, vec3 v2) +{ + return vec3(gl_TessCoord.x) * v0 + vec3(gl_TessCoord.y) * v1 + vec3(gl_TessCoord.z) * v2; +} + +vec4 interpolate_4d(vec4 v0, vec4 v1, vec4 v2) +{ + return vec4(gl_TessCoord.x) * v0 + vec4(gl_TessCoord.y) * v1 + vec4(gl_TessCoord.z) * v2; +} + +void main() +{ + vec3 world_pos = interpolate_3d(gl_in[0].gl_Position.xyz, gl_in[1].gl_Position.xyz, gl_in[2].gl_Position.xyz); + + vec4 fft_texel_at_vertex[3]; + fft_texel_at_vertex[0] = imageLoad(fft_displacement_map, ivec2(inUv[0])); + fft_texel_at_vertex[1] = imageLoad(fft_displacement_map, ivec2(inUv[1])); + fft_texel_at_vertex[2] = imageLoad(fft_displacement_map, ivec2(inUv[2])); + + vec4 fft_texel = interpolate_4d(fft_texel_at_vertex[0], fft_texel_at_vertex[1], fft_texel_at_vertex[2]); + + world_pos.y += fft_texel.y * tessParams.displacement_scale; + world_pos.x -= fft_texel.x * tessParams.choppines; + world_pos.z -= fft_texel.z * tessParams.choppines; + + outUV = interpolate_2d(inUv[0], inUv[1], inUv[2]) / 256; + outPos = ubo.view * vec4(world_pos, 1.0f); + gl_Position = ubo.projection * ubo.view * ubo.model * vec4(world_pos, 1.0f); +} \ No newline at end of file diff --git a/shaders/subgroups_operations/ocean.vert b/shaders/subgroups_operations/ocean.vert new file mode 100644 index 0000000000..253b125e5d --- /dev/null +++ b/shaders/subgroups_operations/ocean.vert @@ -0,0 +1,28 @@ +#version 450 +/* Copyright (c) 2024, Mobica Limited + * + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 the "License"; + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +layout (location = 0) in vec3 inPos; +layout (location = 1) in vec2 inUv; + +layout (location = 0) out vec2 outUv; + +void main() +{ + gl_Position = vec4(inPos, 1.0f); + outUv = inUv; +} \ No newline at end of file