diff --git a/CMakeLists.txt b/CMakeLists.txt index 9f19c7ea..a8433256 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -26,9 +26,14 @@ include(CMakeDependentOption) option(OPENCL_SDK_BUILD_UTILITY_LIBRARIES "Build utility libraries" ON) cmake_dependent_option(OPENCL_SDK_BUILD_SAMPLES "Build sample code" ON OPENCL_SDK_BUILD_UTILITY_LIBRARIES OFF) cmake_dependent_option(OPENCL_SDK_BUILD_OPENGL_SAMPLES "Build OpenCL-OpenGL interop sample code" OFF OPENCL_SDK_BUILD_SAMPLES OFF) -cmake_dependent_option(OPENCL_SDK_BUILD_VULKAN_SAMPLES "Build OpenCL-Vulkan interop sample code" ON OPENCL_SDK_BUILD_SAMPLES OFF) +cmake_dependent_option(OPENCL_SDK_BUILD_VULKAN_SAMPLES "Build OpenCL-Vulkan interop sample code" OFF OPENCL_SDK_BUILD_SAMPLES OFF) cmake_dependent_option(OPENCL_SDK_TEST_SAMPLES "Add CTest to samples (where applicable)" ON OPENCL_SDK_BUILD_SAMPLES OFF) +find_package(Vulkan) +if(VULKAN_FOUND) + cmake_dependent_option(OPENCL_SDK_BUILD_VULKAN_SAMPLES "Build OpenCL-Vulkan interop sample code" ON OPENCL_SDK_BUILD_SAMPLES OFF) +endif() + option(OPENCL_SDK_BUILD_CLINFO "Build clinfo utility" ON) if (("${CMAKE_SYSTEM_NAME}" STREQUAL "Darwin") AND ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")) string(APPEND CMAKE_CXX_FLAGS " -stdlib=libstdc++") diff --git a/README.md b/README.md index 00d0df29..e5944141 100644 --- a/README.md +++ b/README.md @@ -61,7 +61,7 @@ If CMake is not provided by your build system or OS package manager, please cons -B ./OpenCL-SDK/build -S ./OpenCL-SDK cmake --build ./OpenCL-SDK/build --target install -Samples that make use of OpenGL interop are disabled by default to reduce +Samples that make use of OpenGL or Vulkan interop are disabled by default to reduce the number of dependencies for most users. They can be enabled using the `OPENCL_SDK_BUILD_OPENGL_SAMPLES` CMake option. diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake index 06145cb8..45d88286 100644 --- a/cmake/Dependencies.cmake +++ b/cmake/Dependencies.cmake @@ -51,6 +51,20 @@ if(OPENCL_SDK_BUILD_SAMPLES) list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}/Dependencies/${DEP}") include(${DEP}) endforeach() + if(OPENCL_SDK_BUILD_VULKAN_SAMPLES AND SFML_VERSION VERSION_GREATER_EQUAL 2.6) + foreach(DEP IN ITEMS X11 SFML) + list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}/Dependencies/${DEP}") + include(${DEP}) + endforeach() + endif() + else() + if(OPENCL_SDK_BUILD_VULKAN_SAMPLES AND SFML_VERSION VERSION_GREATER_EQUAL 2.6) + foreach(DEP IN ITEMS X11 glm SFML) + list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}/Dependencies/${DEP}") + include(${DEP}) + endforeach() + endif() + endif(OPENCL_SDK_BUILD_OPENGL_SAMPLES) if(OPENCL_SDK_BUILD_VULKAN_SAMPLES) diff --git a/cmake/Dependencies/glfw/glfw.cmake b/cmake/Dependencies/glfw/glfw.cmake new file mode 100644 index 00000000..5849a4d1 --- /dev/null +++ b/cmake/Dependencies/glfw/glfw.cmake @@ -0,0 +1,46 @@ +if(NOT DEPENDENCIES_FORCE_DOWNLOAD AND NOT EXISTS "${CMAKE_CURRENT_BINARY_DIR}/_deps/glfw-external-src") + find_package(glfw3 CONFIG) + # To avoid every test depening on GLFW define their deps using + # + # add_sample( + # LIBS + # $<$:glfw> + # INCLUDES + # $<$>:"${GLFW_INCLUDE_DIRS}"> + # ) + # + # we create the INTERFACE target in case it didn't exist. + if(glfw3_FOUND AND NOT TARGET glfw) + add_library(glfw INTERFACE) + target_include_directories(glfw INTERFACE "${GLFW_INCLUDE_DIRS}") + endif() +endif() + +if(NOT (glfw3_FOUND OR TARGET glfw)) + if(NOT EXISTS "${CMAKE_CURRENT_BINARY_DIR}/_deps/glfw-external-src") + if(DEPENDENCIES_FORCE_DOWNLOAD) + message(STATUS "DEPENDENCIES_FORCE_DOWNLOAD is ON. Fetching glfw.") + else() + message(STATUS "Fetching glfw.") + endif() + message(STATUS "Adding glfw subproject: ${CMAKE_CURRENT_BINARY_DIR}/_deps/glfw-external-src") + endif() + cmake_minimum_required(VERSION 3.11) + include(FetchContent) + set(GLFW_BUILD_EXAMPLES OFF CACHE BOOL "Build the GLFW example programs.") + set(GLFW_BUILD_TESTS OFF CACHE BOOL "Build the GLFW test programs.") + FetchContent_Declare( + glfw-external + GIT_REPOSITORY https://github.com/glfw/glfw + GIT_TAG 3.3.6 # 7d5a16ce714f0b5f4efa3262de22e4d948851525 + ) + FetchContent_MakeAvailable(glfw-external) + set_target_properties(glfw + PROPERTIES + RUNTIME_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/${CMAKE_INSTALL_BINDIR}" + ARCHIVE_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/${CMAKE_INSTALL_LIBDIR}" + LIBRARY_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/${CMAKE_INSTALL_LIBDIR}" + INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR}" + FOLDER "Dependencies" + ) +endif() diff --git a/lib/include/CL/Utils/File.hpp b/lib/include/CL/Utils/File.hpp index b34b0bb7..d5dcf382 100644 --- a/lib/include/CL/Utils/File.hpp +++ b/lib/include/CL/Utils/File.hpp @@ -32,5 +32,8 @@ namespace util { std::string UTILSCPP_EXPORT read_exe_relative_text_file( const char* const filename, cl_int* const error = nullptr); + + std::vector UTILSCPP_EXPORT read_exe_relative_binary_file( + const char* const filename, cl_int* const error = nullptr); } } diff --git a/lib/src/Utils/File.cpp b/lib/src/Utils/File.cpp index 77c00c9f..ff4177ec 100644 --- a/lib/src/Utils/File.cpp +++ b/lib/src/Utils/File.cpp @@ -170,3 +170,27 @@ std::string cl::util::read_exe_relative_text_file(const char* const filename, } return result; } + +std::vector +cl::util::read_exe_relative_binary_file(const char* const filename, + cl_int* const error) +{ + std::vector result; + cl_int err = CL_SUCCESS; + std::string exe_folder = executable_folder(&err); + if (err != CL_SUCCESS) + { + detail::errHandler(CL_UTIL_FILE_OPERATION_ERROR, error, + "Failed to query exe folder!"); + return result; + } + result = read_binary_file((exe_folder + "/" + filename).c_str(), &err); + if (err != CL_SUCCESS) + { + result.clear(); + detail::errHandler(CL_UTIL_FILE_OPERATION_ERROR, error, + "Unable to read file!"); + return result; + } + return result; +} diff --git a/samples/CMakeLists.txt b/samples/CMakeLists.txt index 2eae373d..a4613504 100644 --- a/samples/CMakeLists.txt +++ b/samples/CMakeLists.txt @@ -148,3 +148,4 @@ endmacro() add_subdirectory(core) add_subdirectory(extensions) +add_subdirectory(vulkan) diff --git a/samples/vulkan/CMakeLists.txt b/samples/vulkan/CMakeLists.txt new file mode 100755 index 00000000..d015f2b2 --- /dev/null +++ b/samples/vulkan/CMakeLists.txt @@ -0,0 +1,18 @@ +# Copyright (c) 2021 The Khronos Group Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +if(OPENCL_SDK_BUILD_VULKAN_SAMPLES) + add_subdirectory( ocean ) +endif() diff --git a/samples/vulkan/ocean/CMakeLists.txt b/samples/vulkan/ocean/CMakeLists.txt new file mode 100755 index 00000000..dca78181 --- /dev/null +++ b/samples/vulkan/ocean/CMakeLists.txt @@ -0,0 +1,23 @@ +# Copyright (c) 2024 Mobica Limited, Marcin Hajder +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +add_sample( + TARGET ocean_vk_ocl_interop + VERSION 300 # clCreateImageWithProperties + CATEGORY vulkan + SOURCES main.cpp ocean.cpp ocean.hpp ocean_util.hpp + SHADERS ocean.vert.spv ocean.frag.spv + KERNELS twiddle.cl time_spectrum.cl inversion.cl normals.cl fft_kernel.cl init_spectrum.cl + INCLUDES ${Vulkan_INCLUDE_DIR} + LIBS ${Vulkan_LIBRARY}) diff --git a/samples/vulkan/ocean/README.md b/samples/vulkan/ocean/README.md new file mode 100755 index 00000000..35d84804 --- /dev/null +++ b/samples/vulkan/ocean/README.md @@ -0,0 +1,73 @@ +# Ocean surface simulation with Opencl and Vulkan interoperability + +[Ocean Simulation With OpenCL and Vulkan](ocean.png) + +## Sample Purpose + +This sample demonstrates how to share compute/render resources between OpenCL and Vulkan to simulate an ocean surface. If the cl_khr_external_memory extension is available and requested (through CLI options), some OpenCL images will be created through a file descriptor handle received with vkGetMemoryFdKHR. These images will then be used for ocean rendering. If cl_khr_external_memory is not available, additional copying from OpenCL buffers to Vulkan images will be performed. + +## Key APIs and Concepts + +The primary focus of this sample is to understand how to set up shared resources between OpenCL and Vulkan interoperability. Additionally, this sample demonstrates how to approach physical, real-time simulations in OpenCL and the API objects involved in executing an OpenCL application such as ocean surface simulation. + + +### Application flow + +The application performs an initial setup during which: + + -An OpenCL platform and Vulkan physical device are selected based on CLI options. + -OpenCL and Vulkan devices are prepared. + -A GLFW window, camera, and related keyboard event callbacks are created. + -Both shared and private resources for OpenCL and Vulkan are set up. + +Available CLI options are as follows: + + --window_width, specifies initial window width + --window_height, specifies initial window window_height + --vulkan_device, requests number of vulkan physical device + --immediate, requests preference of VK_PRESENT_MODE_IMMEDIATE_KHR (no vsync) + --linear, requests use of linearly tiled images + --deviceLocalImages, requests use of device local images + --useExternalMemory, requests use of cl_khr_external_memory + +After the setup, the simulation starts with initial ocean parameters that can be modified with keyboard events in real-time: + + - a/z - Increase/decrease wind magnitude. + - s/x - Change wind heading. + - d/c - Increase/decrease waving amplitude. + - f/v - Increase/decrease wave choppiness. + - g/b - Increase/decrease additional altitude scale. + +Additionally, the simulation and rendering can be paused with the Space key. Rendering can toggle between wireframe and filled modes using the 'w' key. Application tracks its performance in the title bar of the window, it could be toggled by pressing 'e' key. + +While the simulation is in progress, each frame of the application performs the following general steps: + + -Necessary Vulkan/OpenCL semaphores are signaled/waited. + -Uniform buffers are updated to handle camera and ocean parameters. + -OpenCL kernels are enqueued. + -The ocean grid is rendered using the previous OpenCL computation outcome. + + +### Kernel logic + +Multiple kernels follow the general steps (with multiple optimizations) described in the publication: [Realtime GPGPU FFT ocean water simulation](https://tore.tuhh.de/bitstream/11420/1439/1/GPGPU_FFT_Ocean_Simulation.pdf) + +### Used API surface + +```c++ +cl::util::supports_extension(cl::Device, cl::string) +cl::util::read_exe_relative_text_file(const char*, cl_int* const) +cl::util::read_exe_relative_binary_file(const char*, cl_int* const) +cl::Context(cl::Device) +cl::CommandQueue(cl::Context, cl::Device) +cl::Platform::get(vector) +cl::Platform::getDevices(Type, vector) +cl::Program::build() +cl::Image2D(cl::Context, cl_mem_flags, ImageFormat, size_type, size_type) +cl::Error::what() +cl::Error::err() +cl::NDRange(size_type, size_type) +cl::Buffer::Buffer(cl::Context, cl_mem_flags, size_type) +``` + + diff --git a/samples/vulkan/ocean/fft_kernel.cl b/samples/vulkan/ocean/fft_kernel.cl new file mode 100644 index 00000000..e7c120fe --- /dev/null +++ b/samples/vulkan/ocean/fft_kernel.cl @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2024 Mobica Limited, Marcin Hajder + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +constant sampler_t sampler = CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST | CLK_NORMALIZED_COORDS_FALSE; + +typedef float2 complex; + +complex mul(complex c0, complex c1) +{ + return (complex)(c0.x * c1.x - c0.y * c1.y, c0.x * c1.y + c0.y * c1.x); +} + +complex add(complex c0, complex c1) +{ + return (complex)(c0.x + c1.x, c0.y + c1.y); +} + +// mode.x - 0-horizontal, 1-vertical +// mode.y - subsequent count + +__kernel void fft_1D( int2 mode, int2 patch_info, + read_only image2d_t twiddle, read_only image2d_t src, write_only image2d_t dst ) +{ + int2 uv = (int2)((int)get_global_id(0), (int)get_global_id(1)); + + int2 data_coords = (int2)(mode.y, uv.x * (1-mode.x) + uv.y * mode.x); + float4 data = read_imagef(twiddle, sampler, data_coords); + + int2 pp_coords0 = (int2)(data.z, uv.y) * (1-mode.x) + (int2)(uv.x, data.z) * mode.x; + float2 p = read_imagef(src, sampler, pp_coords0).xy; + + int2 pp_coords1 = (int2)(data.w, uv.y) * (1-mode.x) + (int2)(uv.x, data.w) * mode.x; + float2 q = read_imagef(src, sampler, pp_coords1).xy; + + float2 w = (float2)(data.x, data.y); + + //Butterfly operation + complex H = add(p,mul(w,q)); + + write_imagef(dst, uv, (float4)(H.x, H.y, 0, 1)); +} diff --git a/samples/vulkan/ocean/init_spectrum.cl b/samples/vulkan/ocean/init_spectrum.cl new file mode 100644 index 00000000..1d1b99cb --- /dev/null +++ b/samples/vulkan/ocean/init_spectrum.cl @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2024 Mobica Limited, Marcin Hajder + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +constant float PI = 3.14159265359f; +constant sampler_t sampler = CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST | CLK_NORMALIZED_COORDS_FALSE; +constant float GRAVITY = 9.81f; + +float4 gaussRND(float4 rnd) +{ + float u0 = 2.0*PI*rnd.x; + float v0 = sqrt(-2.0 * log(rnd.y)); + float u1 = 2.0*PI*rnd.z; + float v1 = sqrt(-2.0 * log(rnd.w)); + + float4 ret = (float4)(v0 * cos(u0), v0 * sin(u0), v1 * cos(u1), v1 * sin(u1)); + return ret; +} + +// patch_info.x - ocean patch size +// patch_info.y - ocean texture unified resolution +// params.x - wind x +// params.y - wind.y +// params.z - amplitude +// params.w - capillar supress factor + +kernel void init_spectrum( int2 patch_info, float4 params, read_only image2d_t noise, write_only image2d_t dst ) +{ + int2 uv = (int2)((int)get_global_id(0), (int)get_global_id(1)); + int res = patch_info.y; + + float2 fuv = convert_float2(uv) - (float2)((float)(res-1)/2.f); + float2 k = (2.f * PI * fuv) / patch_info.x; + float k_mag = length(k); + + if (k_mag < 0.00001) k_mag = 0.00001; + + float wind_speed = length((float2)(params.x, params.y)); + float4 params_n = params; + params_n.xy = (float2)(params.x/wind_speed, params.y/wind_speed); + float l_phillips = (wind_speed * wind_speed) / GRAVITY; + float4 rnd = clamp(read_imagef(noise, sampler, uv), 0.001f, 1.f); + + float magSq = k_mag * k_mag; + float h0k = sqrt((params.z/(magSq*magSq)) * pow(dot(normalize(k), params_n.xy), 2.f) * + exp(-(1.0/(magSq * l_phillips * l_phillips))) * exp(-magSq*pow(params.w, 2.f)))/ sqrt(2.0); + float h0minusk = sqrt((params.z/(magSq*magSq)) * pow(dot(normalize(-k), params_n.xy), 2.f) * + exp(-(1.0/(magSq * l_phillips * l_phillips))) * exp(-magSq*pow(params.w, 2.f)))/ sqrt(2.0); + float4 gauss_random = gaussRND(rnd); + write_imagef(dst, uv, (float4)(gauss_random.xy*h0k, gauss_random.zw*h0minusk)); +} diff --git a/samples/vulkan/ocean/inversion.cl b/samples/vulkan/ocean/inversion.cl new file mode 100644 index 00000000..ceec43b7 --- /dev/null +++ b/samples/vulkan/ocean/inversion.cl @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2024 Mobica Limited, Marcin Hajder + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +constant sampler_t sampler = CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST | CLK_NORMALIZED_COORDS_FALSE; + +kernel void inversion( int2 patch_info, read_only image2d_t src0, + read_only image2d_t src1, read_only image2d_t src2, write_only image2d_t dst ) +{ + int2 uv = (int2)((int)get_global_id(0), (int)get_global_id(1)); + int res2 = patch_info.y * patch_info.y; + + float x = read_imagef(src0, sampler, uv).x; + float y = read_imagef(src1, sampler, uv).x; + float z = read_imagef(src2, sampler, uv).x; + + write_imagef(dst, uv, (float4)(x/res2, y/res2, z/res2, 1)); +} diff --git a/samples/vulkan/ocean/main.cpp b/samples/vulkan/ocean/main.cpp new file mode 100755 index 00000000..2b2b8a1f --- /dev/null +++ b/samples/vulkan/ocean/main.cpp @@ -0,0 +1,206 @@ +/* + * Copyright (c) 2024 Mobica Limited, Marcin Hajder + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "ocean.hpp" +#include + +void OceanApplication::event(const sf::Event& event) +{ + switch (event.type) + { + case sf::Event::Closed: window.close(); break; + case sf::Event::Resized: + // not supported + break; + case sf::Event::KeyPressed: keyboard(event.key.code); break; + case sf::Event::MouseButtonPressed: + if (event.mouseButton.button == sf::Mouse::Button::Left) + { + camera.drag = true; + camera.begin = + glm::vec2(event.mouseButton.x, event.mouseButton.y); + } + break; + case sf::Event::MouseButtonReleased: + if (event.mouseButton.button == sf::Mouse::Button::Left) + camera.drag = false; + break; + case sf::Event::MouseMoved: + if (camera.drag) mouseDrag(event.mouseMove.x, event.mouseMove.y); + break; + case sf::Event::MouseWheelMoved: + camera.eye += + camera.dir * (float)event.mouseWheel.delta * ROLL_SPEED_FAC; + break; + default: break; + } +} + +void OceanApplication::mouseDrag(const int x, const int y) +{ + if (!camera.drag) return; + + glm::vec2 off = camera.begin - glm::vec2(x, y); + camera.begin = glm::vec2(x, y); + + camera.yaw -= off.x * DRAG_SPEED_FAC; + camera.pitch += off.y * DRAG_SPEED_FAC; + + glm::quat yaw(glm::cos(glm::radians(camera.yaw / 2)), + glm::vec3(0, 0, 1) * glm::sin(glm::radians(camera.yaw / 2))); + glm::quat pitch(glm::cos(glm::radians(camera.pitch / 2)), + glm::vec3(1, 0, 0) + * glm::sin(glm::radians(camera.pitch / 2))); + glm::mat3 rot_mat(yaw * pitch); + glm::vec3 dir = rot_mat * glm::vec3(0, 0, -1); + + camera.dir = glm::normalize(dir); + camera.rvec = glm::normalize(glm::cross(camera.dir, glm::vec3(0, 0, 1))); + camera.up = glm::normalize(glm::cross(camera.rvec, camera.dir)); +} + +void OceanApplication::keyboard(int key) +{ + switch (key) + { + case sf::Keyboard::Key::Escape: window.close(); break; + case sf::Keyboard::Key::Space: + animate = !animate; + printf("animation is %s\n", animate ? "ON" : "OFF"); + break; + + case sf::Keyboard::Key::A: + wind_magnitude += 1.f; + changed = true; + break; + case sf::Keyboard::Key::Z: + wind_magnitude -= 1.f; + changed = true; + break; + + case sf::Keyboard::Key::S: + wind_angle += 1.f; + changed = true; + break; + case sf::Keyboard::Key::X: + wind_angle -= 1.f; + changed = true; + break; + + case sf::Keyboard::Key::D: + amplitude += 0.5f; + changed = true; + break; + case sf::Keyboard::Key::C: + amplitude -= 0.5f; + changed = true; + break; + + case sf::Keyboard::Key::F: choppiness += 0.5f; break; + case sf::Keyboard::Key::V: choppiness -= 0.5f; break; + + case sf::Keyboard::Key::G: alt_scale += 0.5f; break; + case sf::Keyboard::Key::B: alt_scale -= 0.5f; break; + + case sf::Keyboard::Key::W: wireframe_mode = !wireframe_mode; break; + + case sf::Keyboard::Key::E: show_fps = !show_fps; break; + } +} + +void OceanApplication::main_loop() +{ + while (window.isOpen()) + { + // Render the frame + draw_frame(); + + // Process events + sf::Event e; + while (window.pollEvent(e)) + { + event(e); + } + } + + if (vkQueueWaitIdle(graphics_queue) != VK_SUCCESS) + { + throw std::runtime_error("vkQueueWaitIdle failed!"); + } + + vkDeviceWaitIdle(device); +} + +template <> auto cl::sdk::parse() +{ + return std::make_tuple( + std::make_shared>( + "", "vulkan_device", "Vulkan physical device", false, -1, + "integral number"), + std::make_shared>( + "", "immediate", "Prefer VK_PRESENT_MODE_IMMEDIATE_KHR (no vsync)", + false, false, "boolean"), + std::make_shared>( + "", "linear", "Use linearly tiled images", false, false, "boolean"), + std::make_shared>("", "deviceLocalImages", + "Use device local images", + false, true, "boolean"), + std::make_shared>("", "useExternalMemory", + "Use cl_khr_external_memory", + false, true, "boolean"), + std::make_shared>("", "validationLayersOn", + "Use vulkan validation layers", + false, false, "boolean")); +} + +template <> +CliOptions cl::sdk::comprehend( + std::shared_ptr> vulkan_device, + std::shared_ptr> immediate, + std::shared_ptr> linearImages, + std::shared_ptr> deviceLocalImages, + std::shared_ptr> useExternalMemory, + std::shared_ptr> validationLayersOn) +{ + return CliOptions{ + vulkan_device->getValue(), immediate->getValue(), + linearImages->getValue(), deviceLocalImages->getValue(), + useExternalMemory->getValue(), validationLayersOn->getValue() + }; +} + +int main(int argc, char** argv) +{ + auto opts = cl::sdk::parse_cli( + argc, argv); + + OceanApplication app(std::get<0>(opts)); + + app.dev_opts = std::get<1>(opts); + app.app_opts = std::get<2>(opts); + + try + { + app.run(); + } catch (const std::exception& e) + { + fprintf(stderr, "%s\n", e.what()); + return EXIT_FAILURE; + } + + return EXIT_SUCCESS; +} diff --git a/samples/vulkan/ocean/normals.cl b/samples/vulkan/ocean/normals.cl new file mode 100644 index 00000000..be1d7a4a --- /dev/null +++ b/samples/vulkan/ocean/normals.cl @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2024 Mobica Limited, Marcin Hajder + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +constant sampler_t sampler = CLK_ADDRESS_REPEAT | CLK_FILTER_LINEAR | CLK_NORMALIZED_COORDS_TRUE; +constant float normal_scale_fac = 3.f; +// patch_info.x - ocean patch size +// patch_info.y - ocean texture unified resolution +// scale_fac.x - choppines +// scale_fac.y - altitude scale +kernel void normals( int2 patch_info, float2 scale_fac, read_only image2d_t noise, + read_only image2d_t src, write_only image2d_t dst ) +{ + int2 uv = (int2)((int)get_global_id(0), (int)get_global_id(1)); + float2 fuv = convert_float2(uv) / patch_info.y; + + float texel = 1.f / patch_info.y; + + float dz_c = read_imagef(src, sampler, fuv).y; + float dz_cr = read_imagef(src, sampler, (float2)(fuv.x + texel, fuv.y)).y; + float dz_ct = read_imagef(src, sampler, (float2)(fuv.x, fuv.y + texel)).y; + float dz_cl = read_imagef(src, sampler, (float2)(fuv.x - texel, fuv.y)).y; + float dz_cb = read_imagef(src, sampler, (float2)(fuv.x, fuv.y - texel)).y; + float dz_tr = read_imagef(src, sampler, (float2)(fuv.x + texel, fuv.y + texel)).y; + float dz_br = read_imagef(src, sampler, (float2)(fuv.x + texel, fuv.y - texel)).y; + float dz_tl = read_imagef(src, sampler, (float2)(fuv.x - texel, fuv.y + texel)).y; + float dz_bl = read_imagef(src, sampler, (float2)(fuv.x - texel, fuv.y - texel)).y; + + float3 normal = (float3)(0.f, 0.f, 1.f / normal_scale_fac); + normal.y = dz_c + 2.f * dz_cb + dz_br - dz_tl - 2.f * dz_ct - dz_tr; + normal.x = dz_c + 2.f * dz_cl + dz_tl - dz_br - 2.f * dz_cr - dz_tr; + + float4 n = read_imagef(noise, sampler, fuv*(float2)(4.0)); + float* pn = &n; + + write_imagef(dst, uv, (float4)(normalize(normal), pn[(uv.x+uv.y)%4])); +} diff --git a/samples/vulkan/ocean/ocean.cpp b/samples/vulkan/ocean/ocean.cpp new file mode 100755 index 00000000..81015959 --- /dev/null +++ b/samples/vulkan/ocean/ocean.cpp @@ -0,0 +1,2548 @@ +/* + * Copyright (c) 2024 Mobica Limited, Marcin Hajder + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// The code in this sample was derived from several samples in the Vulkan +// Tutorial: https://vulkan-tutorial.com +// +// The code samples in the Vulkan Tutorial are licensed as CC0 1.0 Universal. + +#include +#if !defined(cl_khr_external_memory) +#error cl_khr_external_memory not found, please update your OpenCL headers! +#endif + +#include +#include +#include +#include + +#include + +// GLM includes +#include +#include + +#include "ocean.hpp" + +OceanApplication::OceanApplication(cl::sdk::options::Window& opts) + : win_opts(opts), app_name("Ocean Surface Simulation"), + window(sf::VideoMode({ (std::uint32_t)win_opts.width, + (std::uint32_t)win_opts.height }), + app_name.c_str(), sf::Style::Titlebar | sf::Style::Close) +{} + +void OceanApplication::run() +{ + init_openCL(); + init_vulkan(); + init_openCL_mems(); + main_loop(); + cleanup(); +} + +void OceanApplication::init_openCL() +{ + std::vector platforms; + cl::Platform::get(&platforms); + + printf("Running on platform: %s\n", + platforms[dev_opts.triplet.plat_index] + .getInfo() + .c_str()); + std::vector devices; + platforms[dev_opts.triplet.plat_index].getDevices(CL_DEVICE_TYPE_ALL, + &devices); + + printf( + "Running on device: %s\n", + devices[dev_opts.triplet.dev_index].getInfo().c_str()); + + check_openCL_ext_mem_support(devices[dev_opts.triplet.dev_index]); + + int error = CL_SUCCESS; + error |= clGetDeviceInfo( + devices[dev_opts.triplet.dev_index](), CL_DEVICE_IMAGE2D_MAX_WIDTH, + sizeof(ocl_max_img2d_width), &ocl_max_img2d_width, NULL); + error |= clGetDeviceInfo( + devices[dev_opts.triplet.dev_index](), CL_DEVICE_MAX_MEM_ALLOC_SIZE, + sizeof(ocl_max_alloc_size), &ocl_max_alloc_size, NULL); + error |= clGetDeviceInfo(devices[dev_opts.triplet.dev_index](), + CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(ocl_mem_size), + &ocl_mem_size, NULL); + + if (error != CL_SUCCESS) printf("clGetDeviceInfo error: %d\n", error); + + context = cl::Context{ devices[dev_opts.triplet.dev_index] }; + command_queue = + cl::CommandQueue{ context, devices[dev_opts.triplet.dev_index] }; + + auto build_opencl_kernel = [&](const char* src_file, cl::Kernel& kernel, + const char* name) { + try + { + std::string kernel_code = + cl::util::read_exe_relative_text_file(src_file); + cl::Program program{ context, kernel_code }; + program.build(); + kernel = cl::Kernel{ program, name }; + } catch (const cl::BuildError& e) + { + auto bl = e.getBuildLog(); + std::cout << "Build OpenCL " << name + << " kernel error: " << std::endl; + for (auto& elem : bl) std::cout << elem.second << std::endl; + exit(1); + } + }; + + build_opencl_kernel("twiddle.cl", twiddle_kernel, "generate"); + build_opencl_kernel("init_spectrum.cl", init_spectrum_kernel, + "init_spectrum"); + build_opencl_kernel("time_spectrum.cl", time_spectrum_kernel, "spectrum"); + build_opencl_kernel("fft_kernel.cl", fft_kernel, "fft_1D"); + build_opencl_kernel("inversion.cl", inversion_kernel, "inversion"); + build_opencl_kernel("normals.cl", normals_kernel, "normals"); +} + +void OceanApplication::init_openCL_mems() +{ + // init opencl resources + try + { + { + std::vector phase_array(ocean_tex_size * ocean_tex_size); + std::random_device dev; + std::mt19937 rng(dev()); + std::uniform_real_distribution dist(0.f, 1.f); + + for (size_t i = 0; i < phase_array.size(); ++i) + phase_array[i] = { dist(rng), dist(rng), dist(rng), dist(rng) }; + + noise_mem = std::make_unique( + context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, + cl::ImageFormat(CL_RGBA, CL_FLOAT), ocean_tex_size, + ocean_tex_size, 0, phase_array.data()); + } + + + hkt_pong_mem = std::make_unique( + context, CL_MEM_READ_WRITE, cl::ImageFormat(CL_RG, CL_FLOAT), + ocean_tex_size, ocean_tex_size); + + dxyz_coef_mem[0] = std::make_unique( + context, CL_MEM_READ_WRITE, cl::ImageFormat(CL_RG, CL_FLOAT), + ocean_tex_size, ocean_tex_size); + + dxyz_coef_mem[1] = std::make_unique( + context, CL_MEM_READ_WRITE, cl::ImageFormat(CL_RG, CL_FLOAT), + ocean_tex_size, ocean_tex_size); + + dxyz_coef_mem[2] = std::make_unique( + context, CL_MEM_READ_WRITE, cl::ImageFormat(CL_RG, CL_FLOAT), + ocean_tex_size, ocean_tex_size); + + h0k_mem = std::make_unique( + context, CL_MEM_READ_WRITE, cl::ImageFormat(CL_RGBA, CL_FLOAT), + ocean_tex_size, ocean_tex_size); + + size_t log_2_N = (size_t)((log((float)ocean_tex_size) / log(2.f)) - 1); + + twiddle_factors_mem = std::make_unique( + context, CL_MEM_READ_WRITE, cl::ImageFormat(CL_RGBA, CL_FLOAT), + log_2_N, ocean_tex_size); + + for (size_t target = 0; target < IOPT_COUNT; target++) + { + ocl_image_mems[target].resize(swap_chain_images.size()); + + for (size_t i = 0; i < swap_chain_images.size(); i++) + { + if (app_opts.use_external_memory) + { +#ifdef _WIN32 + HANDLE handle = NULL; + VkMemoryGetWin32HandleInfoKHR getWin32HandleInfo{}; + getWin32HandleInfo.sType = + VK_STRUCTURE_TYPE_MEMORY_GET_WIN32_HANDLE_INFO_KHR; + getWin32HandleInfo.memory = + texture_images[target].image_memories[i]; + getWin32HandleInfo.handleType = + VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT; + vkGetMemoryWin32HandleKHR(device, &getWin32HandleInfo, + &handle); + + const cl_mem_properties props[] = { + external_mem_type, + (cl_mem_properties)handle, + 0, + }; +#elif defined(__linux__) + int fd = 0; + VkMemoryGetFdInfoKHR getFdInfo{}; + getFdInfo.sType = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR; + getFdInfo.memory = + texture_images[target] + .image_memories[i]; // textureImageMemories[i]; + getFdInfo.handleType = external_mem_type + == CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_FD_KHR + ? VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT + : VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT; + vkGetMemoryFdKHR(device, &getFdInfo, &fd); + + const cl_mem_properties props[] = { + external_mem_type, + (cl_mem_properties)fd, + 0, + }; +#else + const cl_mem_properties* props = NULL; +#endif + + cl_image_format format{}; + format.image_channel_order = CL_RGBA; + format.image_channel_data_type = CL_FLOAT; + + cl_image_desc desc{}; + desc.image_type = CL_MEM_OBJECT_IMAGE2D; + desc.image_width = ocean_tex_size; + desc.image_height = ocean_tex_size; + + ocl_image_mems[target][i].reset(new cl::Image2D{ + clCreateImageWithProperties(context(), props, + CL_MEM_READ_WRITE, &format, + &desc, NULL, NULL) }); + } + else + { + ocl_image_mems[target][i].reset( + new cl::Image2D{ context, CL_MEM_READ_WRITE, + cl::ImageFormat{ CL_RGBA, CL_FLOAT }, + ocean_tex_size, ocean_tex_size }); + } + } + } + } catch (const cl::Error& e) + { + std::cerr << "OpenCL runtime error: " << e.what() << std::endl; + exit(e.err()); + } +} + +void OceanApplication::init_vulkan() +{ + create_instance(); + setup_dbg_msger(); + create_surface(); + pick_physical_device(); + create_logical_device(); + create_swap_chain(); + create_swap_chain_image_views(); + create_render_pass(); + create_uniform_buffer(); + create_descriptor_set_layout(); + create_graphics_pipeline(); + create_command_pool(); + + create_depth_resources(); + create_vertex_buffers(); + create_index_buffers(); + + create_framebuffers(); + create_texture_images(); + create_texture_image_views(); + create_texture_sampler(); + create_descriptor_pool(); + create_descriptor_sets(); + create_command_buffers(); + create_sync_objects(); +} + +void OceanApplication::cleanup() +{ + for (auto framebuffer : swap_chain_framebuffers) + { + vkDestroyFramebuffer(device, framebuffer, nullptr); + } + + for (auto imageView : swap_chain_image_views) + { + vkDestroyImageView(device, imageView, nullptr); + } + + vkDestroySwapchainKHR(device, swap_chain, nullptr); + + vkDestroyPipeline(device, graphics_pipeline, nullptr); + vkDestroyPipeline(device, wireframe_pipeline, nullptr); + vkDestroyPipelineLayout(device, pipeline_layout, nullptr); + vkDestroyRenderPass(device, render_pass, nullptr); + + vkDestroyImageView(device, depth_image_view, nullptr); + vkDestroyImage(device, depth_image, nullptr); + vkFreeMemory(device, depth_image_memory, nullptr); + + vkDestroyDescriptorPool(device, descriptor_pool, nullptr); + + vkDestroyBuffer(device, staging_tex_buffer, nullptr); + vkFreeMemory(device, staging_tex_buffer_memory, nullptr); + + for (size_t img_num = 0; img_num < texture_images.size(); img_num++) + { + for (auto textureImageView : texture_images[img_num].image_views) + { + vkDestroyImageView(device, textureImageView, nullptr); + } + for (auto textureImage : texture_images[img_num].images) + { + vkDestroyImage(device, textureImage, nullptr); + } + for (auto textureImageMemory : texture_images[img_num].image_memories) + { + vkFreeMemory(device, textureImageMemory, nullptr); + } + } + + for (size_t sampler_num = 0; sampler_num < texture_sampler.size(); + sampler_num++) + { + vkDestroySampler(device, texture_sampler[sampler_num], nullptr); + } + + // cleanup vertices buffers + for (auto& buffer : vertex_buffers) + { + vkDestroyBuffer(device, buffer, nullptr); + } + + for (auto& bufferMemory : vertex_buffer_memories) + { + vkFreeMemory(device, bufferMemory, nullptr); + } + + // cleanup indices buffers + for (auto buffer : index_buffer.buffers) + { + vkDestroyBuffer(device, buffer, nullptr); + } + + for (auto& bufferMemory : index_buffer.buffer_memories) + { + vkFreeMemory(device, bufferMemory, nullptr); + } + + for (size_t i = 0; i < MAX_FRAMES_IN_FLIGHT; i++) + { + vkDestroySemaphore(device, render_finished_semaphores[i], nullptr); + vkDestroySemaphore(device, image_available_semaphores[i], nullptr); + vkDestroyFence(device, in_flight_fences[i], nullptr); + } + + for (auto& unif_buffer : uniform_buffers) + { + vkDestroyBuffer(device, unif_buffer, nullptr); + } + + for (auto& unif_buf_mem : uniform_buffers_memory) + { + vkFreeMemory(device, unif_buf_mem, nullptr); + } + + vkDestroyDescriptorSetLayout(device, descriptor_set_layout, nullptr); + + vkDestroyCommandPool(device, command_pool, nullptr); + + vkDestroyDevice(device, nullptr); + + if (app_opts.validationLayersOn) + { + DestroyDebugUtilsMessengerEXT(instance, debug_messenger, nullptr); + } + + vkDestroySurfaceKHR(instance, surface, nullptr); + vkDestroyInstance(instance, nullptr); +} + +void OceanApplication::create_instance() +{ + if (app_opts.validationLayersOn && !check_validation_layer_support()) + { + throw std::runtime_error( + "validation layers requested, but not available!"); + } + + VkApplicationInfo appInfo{}; + appInfo.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO; + appInfo.pApplicationName = + "Ocean Surface Simulation with OpenCL+Vulkan Sample"; + appInfo.applicationVersion = VK_MAKE_VERSION(1, 0, 0); + appInfo.pEngineName = "No Engine"; + appInfo.engineVersion = VK_MAKE_VERSION(1, 0, 0); + if (app_opts.use_external_memory) + { + appInfo.apiVersion = VK_API_VERSION_1_1; + } + else + { + appInfo.apiVersion = VK_API_VERSION_1_0; + } + + VkInstanceCreateInfo createInfo{}; + createInfo.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO; + createInfo.pApplicationInfo = &appInfo; + + auto extensions = get_required_exts(); + createInfo.enabledExtensionCount = static_cast(extensions.size()); + createInfo.ppEnabledExtensionNames = extensions.data(); + + VkDebugUtilsMessengerCreateInfoEXT debugCreateInfo{}; + if (app_opts.validationLayersOn) + { + createInfo.enabledLayerCount = + static_cast(validationLayers.size()); + createInfo.ppEnabledLayerNames = validationLayers.data(); + + populate_dbg_msger_create_info(debugCreateInfo); + createInfo.pNext = + (VkDebugUtilsMessengerCreateInfoEXT*)&debugCreateInfo; + } + else + { + createInfo.enabledLayerCount = 0; + + createInfo.pNext = nullptr; + } + + if (vkCreateInstance(&createInfo, nullptr, &instance) != VK_SUCCESS) + { + throw std::runtime_error("failed to create instance!"); + } + +#ifdef _WIN32 + if (app_opts.use_external_memory) + { + vkGetMemoryWin32HandleKHR = + (PFN_vkGetMemoryWin32HandleKHR)vkGetInstanceProcAddr( + instance, "vkGetMemoryWin32HandleKHR"); + if (vkGetMemoryWin32HandleKHR == NULL) + { + throw std::runtime_error("couldn't get function pointer for " + "vkGetMemoryWin32HandleKHR"); + } + } +#elif defined(__linux__) + if (app_opts.use_external_memory) + { + vkGetMemoryFdKHR = (PFN_vkGetMemoryFdKHR)vkGetInstanceProcAddr( + instance, "vkGetMemoryFdKHR"); + if (vkGetMemoryFdKHR == NULL) + { + throw std::runtime_error( + "couldn't get function pointer for vkGetMemoryFdKHR"); + } + } +#endif +} + +void OceanApplication::populate_dbg_msger_create_info( + VkDebugUtilsMessengerCreateInfoEXT& createInfo) +{ + createInfo = {}; + createInfo.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT; + createInfo.messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT + | VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT; + createInfo.messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT + | VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT + | VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT; + createInfo.pfnUserCallback = debug_callback; +} + +void OceanApplication::setup_dbg_msger() +{ + if (!app_opts.validationLayersOn) return; + + VkDebugUtilsMessengerCreateInfoEXT createInfo; + populate_dbg_msger_create_info(createInfo); + + if (CreateDebugUtilsMessengerEXT(instance, &createInfo, nullptr, + &debug_messenger) + != VK_SUCCESS) + { + throw std::runtime_error("failed to set up debug messenger!"); + } +} + +void OceanApplication::create_surface() +{ + if (!window.createVulkanSurface(instance, surface)) + throw std::runtime_error("failed to create window surface!"); +} + +void OceanApplication::pick_physical_device() +{ + uint32_t deviceCount = 0; + vkEnumeratePhysicalDevices(instance, &deviceCount, nullptr); + + if (deviceCount == 0) + { + throw std::runtime_error("failed to find GPUs with Vulkan support!"); + } + + std::vector devices(deviceCount); + vkEnumeratePhysicalDevices(instance, &deviceCount, devices.data()); + + for (auto i = 0; i < devices.size(); i++) + { + if (app_opts.vulkan_device >= 0 && app_opts.vulkan_device != i) + continue; + + if (is_device_suitable(devices[i])) + { + physical_device = devices[i]; + break; + } + } + + if (physical_device == VK_NULL_HANDLE) + { + throw std::runtime_error("failed to find a suitable GPU!"); + } + + VkPhysicalDeviceProperties properties{}; + vkGetPhysicalDeviceProperties(physical_device, &properties); + + printf("Running on Vulkan physical device: %s\n", properties.deviceName); +} + +void OceanApplication::create_logical_device() +{ + QueueFamilyIndices indices = find_queue_families(physical_device); + + std::vector queueCreateInfos; + std::set uniqueQueueFamilies = { indices.graphicsFamily, + indices.presentFamily }; + + float queuePriority = 1.0f; + for (uint32_t queueFamily : uniqueQueueFamilies) + { + VkDeviceQueueCreateInfo queueCreateInfo{}; + queueCreateInfo.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO; + queueCreateInfo.queueFamilyIndex = queueFamily; + queueCreateInfo.queueCount = 1; + queueCreateInfo.pQueuePriorities = &queuePriority; + queueCreateInfos.push_back(queueCreateInfo); + } + + VkPhysicalDeviceFeatures deviceFeatures{}; + deviceFeatures.fillModeNonSolid = true; + + VkDeviceCreateInfo createInfo{}; + createInfo.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO; + + createInfo.queueCreateInfoCount = + static_cast(queueCreateInfos.size()); + createInfo.pQueueCreateInfos = queueCreateInfos.data(); + + createInfo.pEnabledFeatures = &deviceFeatures; + + auto extensions = get_required_dev_exts(); + createInfo.enabledExtensionCount = static_cast(extensions.size()); + createInfo.ppEnabledExtensionNames = extensions.data(); + createInfo.enabledLayerCount = 0; + + if (vkCreateDevice(physical_device, &createInfo, nullptr, &device) + != VK_SUCCESS) + { + throw std::runtime_error("failed to create logical device!"); + } + + vkGetDeviceQueue(device, indices.graphicsFamily, 0, &graphics_queue); + vkGetDeviceQueue(device, indices.presentFamily, 0, &present_queue); +} + +void OceanApplication::create_swap_chain() +{ + SwapChainSupportDetails swapChainSupport = + query_swap_chain_support(physical_device); + + VkSurfaceFormatKHR surfaceFormat = + choose_swap_surf_format(swapChainSupport.formats); + VkPresentModeKHR presentMode = + choose_swap_present_mode(swapChainSupport.presentModes); + VkExtent2D extent = choose_swap_extent(swapChainSupport.capabilities); + + uint32_t imageCount = swapChainSupport.capabilities.minImageCount + 1; + if (swapChainSupport.capabilities.maxImageCount > 0 + && imageCount > swapChainSupport.capabilities.maxImageCount) + { + imageCount = swapChainSupport.capabilities.maxImageCount; + } + + VkSwapchainCreateInfoKHR createInfo{}; + createInfo.sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR; + createInfo.surface = surface; + + createInfo.minImageCount = imageCount; + createInfo.imageFormat = surfaceFormat.format; + createInfo.imageColorSpace = surfaceFormat.colorSpace; + createInfo.imageExtent = extent; + createInfo.imageArrayLayers = 1; + createInfo.imageUsage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; + + QueueFamilyIndices indices = find_queue_families(physical_device); + uint32_t queueFamilyIndices[] = { indices.graphicsFamily, + indices.presentFamily }; + + if (indices.graphicsFamily != indices.presentFamily) + { + createInfo.imageSharingMode = VK_SHARING_MODE_CONCURRENT; + createInfo.queueFamilyIndexCount = 2; + createInfo.pQueueFamilyIndices = queueFamilyIndices; + } + else + { + createInfo.imageSharingMode = VK_SHARING_MODE_EXCLUSIVE; + } + + createInfo.preTransform = swapChainSupport.capabilities.currentTransform; + createInfo.compositeAlpha = VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR; + createInfo.presentMode = presentMode; + createInfo.clipped = VK_TRUE; + + createInfo.oldSwapchain = VK_NULL_HANDLE; + + if (vkCreateSwapchainKHR(device, &createInfo, nullptr, &swap_chain) + != VK_SUCCESS) + { + throw std::runtime_error("failed to create swap chain!"); + } + + vkGetSwapchainImagesKHR(device, swap_chain, &imageCount, nullptr); + swap_chain_images.resize(imageCount); + vkGetSwapchainImagesKHR(device, swap_chain, &imageCount, + swap_chain_images.data()); + + swap_chain_image_format = surfaceFormat.format; + swap_chain_extent = extent; +} + +void OceanApplication::create_swap_chain_image_views() +{ + swap_chain_image_views.resize(swap_chain_images.size()); + + for (uint32_t i = 0; i < swap_chain_images.size(); i++) + { + swap_chain_image_views[i] = + create_image_view(swap_chain_images[i], swap_chain_image_format, + VK_IMAGE_ASPECT_COLOR_BIT); + } +} + +void OceanApplication::create_render_pass() +{ + VkAttachmentDescription colorAttachment{}; + colorAttachment.format = swap_chain_image_format; + colorAttachment.samples = VK_SAMPLE_COUNT_1_BIT; + colorAttachment.loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR; + colorAttachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE; + colorAttachment.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; + colorAttachment.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; + colorAttachment.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; + colorAttachment.finalLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR; + + VkAttachmentDescription depthAttachment{}; + depthAttachment.format = find_depth_format(); + depthAttachment.samples = VK_SAMPLE_COUNT_1_BIT; + depthAttachment.loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR; + depthAttachment.storeOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; + depthAttachment.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; + depthAttachment.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; + depthAttachment.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; + depthAttachment.finalLayout = + VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; + + VkAttachmentReference colorAttachmentRef{}; + colorAttachmentRef.attachment = 0; + colorAttachmentRef.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + + VkAttachmentReference depthAttachmentRef{}; + depthAttachmentRef.attachment = 1; + depthAttachmentRef.layout = + VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; + + VkSubpassDescription subpass{}; + subpass.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; + subpass.colorAttachmentCount = 1; + subpass.pColorAttachments = &colorAttachmentRef; + subpass.pDepthStencilAttachment = &depthAttachmentRef; + + VkSubpassDependency dependency{}; + dependency.srcSubpass = VK_SUBPASS_EXTERNAL; + dependency.dstSubpass = 0; + dependency.srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT + | VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT; + dependency.srcAccessMask = 0; + dependency.dstStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT + | VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT; + dependency.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT + | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; + + std::array attachments = { colorAttachment, + depthAttachment }; + VkRenderPassCreateInfo renderPassInfo{}; + renderPassInfo.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO; + renderPassInfo.attachmentCount = static_cast(attachments.size()); + renderPassInfo.pAttachments = attachments.data(); + renderPassInfo.subpassCount = 1; + renderPassInfo.pSubpasses = &subpass; + renderPassInfo.dependencyCount = 1; + renderPassInfo.pDependencies = &dependency; + + if (vkCreateRenderPass(device, &renderPassInfo, nullptr, &render_pass) + != VK_SUCCESS) + { + throw std::runtime_error("failed to create render pass!"); + } +} + +void OceanApplication::create_uniform_buffer() +{ + VkDeviceSize buffer_size = sizeof(UniformBufferObject); + + uniform_buffers.resize(swap_chain_images.size()); + uniform_buffers_memory.resize(swap_chain_images.size()); + + _mapped_unif_data.resize(swap_chain_images.size()); + + for (size_t i = 0; i < uniform_buffers.size(); i++) + { + create_buffer(buffer_size, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT + | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, + uniform_buffers[i], uniform_buffers_memory[i]); + + vkMapMemory(device, uniform_buffers_memory[i], 0, buffer_size, 0, + &_mapped_unif_data[i].buffer_memory); + } +} + +void OceanApplication::create_descriptor_set_layout() +{ + VkDescriptorSetLayoutBinding sampler0LayoutBinding{}; + sampler0LayoutBinding.binding = 0; + sampler0LayoutBinding.descriptorCount = 1; + sampler0LayoutBinding.descriptorType = + VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + sampler0LayoutBinding.pImmutableSamplers = nullptr; + sampler0LayoutBinding.stageFlags = VK_SHADER_STAGE_VERTEX_BIT; + + VkDescriptorSetLayoutBinding sampler1LayoutBinding{}; + sampler1LayoutBinding.binding = 1; + sampler1LayoutBinding.descriptorCount = 1; + sampler1LayoutBinding.descriptorType = + VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + sampler1LayoutBinding.pImmutableSamplers = nullptr; + sampler1LayoutBinding.stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT; + + VkDescriptorSetLayoutBinding uniformLayoutBinding{}; + uniformLayoutBinding.binding = 2; + uniformLayoutBinding.descriptorCount = 1; + uniformLayoutBinding.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + uniformLayoutBinding.pImmutableSamplers = nullptr; + uniformLayoutBinding.stageFlags = + VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT; + + std::array bindings = { + sampler0LayoutBinding, sampler1LayoutBinding, uniformLayoutBinding + }; + + VkDescriptorSetLayoutCreateInfo layoutInfo{}; + layoutInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; + layoutInfo.bindingCount = static_cast(bindings.size()); + layoutInfo.pBindings = bindings.data(); + + if (vkCreateDescriptorSetLayout(device, &layoutInfo, nullptr, + &descriptor_set_layout) + != VK_SUCCESS) + { + throw std::runtime_error("failed to create descriptor set layout!"); + } +} + +void OceanApplication::create_graphics_pipeline() +{ + auto vertShaderCode = + cl::util::read_exe_relative_binary_file("ocean.vert.spv"); + auto fragShaderCode = + cl::util::read_exe_relative_binary_file("ocean.frag.spv"); + + VkShaderModule vertShaderModule = create_shader_module(vertShaderCode); + VkShaderModule fragShaderModule = create_shader_module(fragShaderCode); + + VkPipelineShaderStageCreateInfo vertShaderStageInfo{}; + vertShaderStageInfo.sType = + VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; + vertShaderStageInfo.stage = VK_SHADER_STAGE_VERTEX_BIT; + vertShaderStageInfo.module = vertShaderModule; + vertShaderStageInfo.pName = "main"; + + VkPipelineShaderStageCreateInfo fragShaderStageInfo{}; + fragShaderStageInfo.sType = + VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; + fragShaderStageInfo.stage = VK_SHADER_STAGE_FRAGMENT_BIT; + fragShaderStageInfo.module = fragShaderModule; + fragShaderStageInfo.pName = "main"; + + VkPipelineShaderStageCreateInfo shaderStages[] = { vertShaderStageInfo, + fragShaderStageInfo }; + + // vertex info + auto bindingDescription = Vertex::getBindingDescription(); + auto attributeDescriptions = Vertex::getAttributeDescriptions(); + + VkPipelineVertexInputStateCreateInfo vertexInputInfo{}; + vertexInputInfo.sType = + VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO; + vertexInputInfo.vertexBindingDescriptionCount = 1; + vertexInputInfo.vertexAttributeDescriptionCount = + static_cast(attributeDescriptions.size()); + vertexInputInfo.pVertexBindingDescriptions = &bindingDescription; + vertexInputInfo.pVertexAttributeDescriptions = attributeDescriptions.data(); + + VkPipelineInputAssemblyStateCreateInfo inputAssembly{}; + inputAssembly.sType = + VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO; + inputAssembly.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP; + inputAssembly.primitiveRestartEnable = VK_TRUE; + + VkViewport viewport{}; + viewport.x = 0.0f; + viewport.y = 0.0f; + viewport.width = (float)swap_chain_extent.width; + viewport.height = (float)swap_chain_extent.height; + viewport.minDepth = 0.0f; + viewport.maxDepth = 1.0f; + + VkRect2D scissor{}; + scissor.offset = { 0, 0 }; + scissor.extent = swap_chain_extent; + + VkPipelineViewportStateCreateInfo viewportState{}; + viewportState.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO; + viewportState.viewportCount = 1; + viewportState.pViewports = &viewport; + viewportState.scissorCount = 1; + viewportState.pScissors = &scissor; + + VkPipelineRasterizationStateCreateInfo rasterizer{}; + rasterizer.sType = + VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO; + rasterizer.depthClampEnable = VK_FALSE; + rasterizer.rasterizerDiscardEnable = VK_FALSE; + rasterizer.polygonMode = VK_POLYGON_MODE_FILL; + rasterizer.lineWidth = 1.0f; + rasterizer.cullMode = VK_CULL_MODE_BACK_BIT; + rasterizer.frontFace = VK_FRONT_FACE_CLOCKWISE; + rasterizer.depthBiasEnable = VK_FALSE; + + VkPipelineMultisampleStateCreateInfo multisampling{}; + multisampling.sType = + VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO; + multisampling.sampleShadingEnable = VK_FALSE; + multisampling.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT; + + VkPipelineDepthStencilStateCreateInfo depthStencil{}; + depthStencil.sType = + VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO; + depthStencil.depthTestEnable = VK_TRUE; + depthStencil.depthWriteEnable = VK_TRUE; + depthStencil.depthCompareOp = VK_COMPARE_OP_LESS; + depthStencil.depthBoundsTestEnable = VK_FALSE; + depthStencil.stencilTestEnable = VK_FALSE; + + VkPipelineColorBlendAttachmentState colorBlendAttachment{}; + colorBlendAttachment.colorWriteMask = VK_COLOR_COMPONENT_R_BIT + | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT + | VK_COLOR_COMPONENT_A_BIT; + colorBlendAttachment.blendEnable = VK_FALSE; + + VkPipelineColorBlendStateCreateInfo colorBlending{}; + colorBlending.sType = + VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO; + colorBlending.logicOpEnable = VK_FALSE; + colorBlending.logicOp = VK_LOGIC_OP_COPY; + colorBlending.attachmentCount = 1; + colorBlending.pAttachments = &colorBlendAttachment; + colorBlending.blendConstants[0] = 0.0f; + colorBlending.blendConstants[1] = 0.0f; + colorBlending.blendConstants[2] = 0.0f; + colorBlending.blendConstants[3] = 0.0f; + + VkPipelineLayoutCreateInfo pipelineLayoutInfo{}; + pipelineLayoutInfo.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; + pipelineLayoutInfo.setLayoutCount = 1; + pipelineLayoutInfo.pSetLayouts = &descriptor_set_layout; + + if (vkCreatePipelineLayout(device, &pipelineLayoutInfo, nullptr, + &pipeline_layout) + != VK_SUCCESS) + { + throw std::runtime_error("failed to create pipeline layout!"); + } + + VkGraphicsPipelineCreateInfo pipelineInfo{}; + pipelineInfo.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO; + pipelineInfo.stageCount = 2; + pipelineInfo.pStages = shaderStages; + pipelineInfo.pVertexInputState = &vertexInputInfo; + pipelineInfo.pInputAssemblyState = &inputAssembly; + pipelineInfo.pViewportState = &viewportState; + pipelineInfo.pRasterizationState = &rasterizer; + pipelineInfo.pMultisampleState = &multisampling; + pipelineInfo.pDepthStencilState = &depthStencil; + pipelineInfo.pColorBlendState = &colorBlending; + pipelineInfo.layout = pipeline_layout; + pipelineInfo.renderPass = render_pass; + pipelineInfo.subpass = 0; + pipelineInfo.basePipelineHandle = VK_NULL_HANDLE; + + if (vkCreateGraphicsPipelines(device, VK_NULL_HANDLE, 1, &pipelineInfo, + nullptr, &graphics_pipeline) + != VK_SUCCESS) + { + throw std::runtime_error("failed to create graphics pipeline!"); + } + + rasterizer.polygonMode = VK_POLYGON_MODE_LINE; + if (vkCreateGraphicsPipelines(device, VK_NULL_HANDLE, 1, &pipelineInfo, + nullptr, &wireframe_pipeline) + != VK_SUCCESS) + { + throw std::runtime_error("failed to create graphics pipeline!"); + } + + vkDestroyShaderModule(device, fragShaderModule, nullptr); + vkDestroyShaderModule(device, vertShaderModule, nullptr); +} + +void OceanApplication::create_framebuffers() +{ + swap_chain_framebuffers.resize(swap_chain_image_views.size()); + + for (size_t i = 0; i < swap_chain_image_views.size(); i++) + { + std::array attachments = { swap_chain_image_views[i], + depth_image_view }; + + VkFramebufferCreateInfo framebufferInfo{}; + framebufferInfo.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO; + framebufferInfo.renderPass = render_pass; + framebufferInfo.attachmentCount = + static_cast(attachments.size()); + framebufferInfo.pAttachments = attachments.data(); + framebufferInfo.width = swap_chain_extent.width; + framebufferInfo.height = swap_chain_extent.height; + framebufferInfo.layers = 1; + + if (vkCreateFramebuffer(device, &framebufferInfo, nullptr, + &swap_chain_framebuffers[i]) + != VK_SUCCESS) + { + throw std::runtime_error("failed to create framebuffer!"); + } + } +} + +void OceanApplication::create_command_pool() +{ + QueueFamilyIndices queueFamilyIndices = + find_queue_families(physical_device); + + VkCommandPoolCreateInfo poolInfo{}; + poolInfo.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; + poolInfo.queueFamilyIndex = queueFamilyIndices.graphicsFamily; + + if (vkCreateCommandPool(device, &poolInfo, nullptr, &command_pool) + != VK_SUCCESS) + { + throw std::runtime_error("failed to create command pool!"); + } +} + +void OceanApplication::create_vertex_buffers() +{ + size_t iCXY = (ocean_grid_size + 1) * (ocean_grid_size + 1); + ocean_grid_vertices.resize(iCXY); + + cl_float dfY = -0.5f * (ocean_grid_size * mesh_spacing), + dfBaseX = -0.5f * (ocean_grid_size * mesh_spacing); + cl_float tx = 0.f, ty = 0.f, dtx = 1.f / ocean_grid_size, + dty = 1.f / ocean_grid_size; + for (size_t iBase = 0, iY = 0; iY <= ocean_grid_size; + iY++, iBase += ocean_grid_size + 1) + { + tx = 0.f; + double dfX = dfBaseX; + for (int iX = 0; iX <= ocean_grid_size; iX++) + { + ocean_grid_vertices[iBase + iX].pos = glm::vec3(dfX, dfY, 0.0); + ocean_grid_vertices[iBase + iX].tc = glm::vec2(tx, ty); + tx += dtx; + dfX += mesh_spacing; + } + dfY += mesh_spacing; + ty += dty; + } + + vertex_buffers.resize(swap_chain_images.size()); + vertex_buffer_memories.resize(swap_chain_images.size()); + + VkDeviceSize bufferSize = + sizeof(ocean_grid_vertices[0]) * ocean_grid_vertices.size(); + + VkBuffer stagingBuffer; + VkDeviceMemory stagingBufferMemory; + create_buffer(bufferSize, VK_BUFFER_USAGE_TRANSFER_SRC_BIT, + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT + | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, + stagingBuffer, stagingBufferMemory); + + void* data; + vkMapMemory(device, stagingBufferMemory, 0, bufferSize, 0, &data); + memcpy(data, ocean_grid_vertices.data(), (size_t)bufferSize); + vkUnmapMemory(device, stagingBufferMemory); + + for (size_t i = 0; i < swap_chain_images.size(); i++) + { + + // create local memory buffer + create_buffer(bufferSize, + VK_BUFFER_USAGE_TRANSFER_DST_BIT + | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, vertex_buffers[i], + vertex_buffer_memories[i]); + + copy_buffer(stagingBuffer, vertex_buffers[i], bufferSize); + } + + vkDestroyBuffer(device, stagingBuffer, nullptr); + vkFreeMemory(device, stagingBufferMemory, nullptr); +} + +void OceanApplication::create_index_buffers() +{ + size_t totalIndices = ((ocean_grid_size + 1) * 2 + 1) * ocean_grid_size; + ocean_grid_indices.resize(totalIndices); + + VkDeviceSize bufferSize = + sizeof(ocean_grid_indices[0]) * ocean_grid_indices.size(); + + VkBuffer stagingBuffer; + VkDeviceMemory stagingBufferMemory; + create_buffer(bufferSize, VK_BUFFER_USAGE_TRANSFER_SRC_BIT, + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT + | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, + stagingBuffer, stagingBufferMemory); + + size_t indexCount = 0; + for (size_t iY = 0; iY < ocean_grid_size; iY++) + { + size_t iBaseFrom = iY * (ocean_grid_size + 1); + size_t iBaseTo = iBaseFrom + ocean_grid_size + 1; + + for (size_t iX = 0; iX <= ocean_grid_size; iX++) + { + ocean_grid_indices[indexCount++] = static_cast(iBaseFrom + iX); + ocean_grid_indices[indexCount++] = static_cast(iBaseTo + iX); + } + ocean_grid_indices[indexCount++] = -1; + } + + void* data; + vkMapMemory(device, stagingBufferMemory, 0, bufferSize, 0, &data); + memcpy(data, ocean_grid_indices.data(), (size_t)bufferSize); + vkUnmapMemory(device, stagingBufferMemory); + + index_buffer.buffers.resize(swap_chain_images.size()); + index_buffer.buffer_memories.resize(swap_chain_images.size()); + + for (size_t i = 0; i < swap_chain_images.size(); i++) + { + create_buffer(bufferSize, + VK_BUFFER_USAGE_TRANSFER_DST_BIT + | VK_BUFFER_USAGE_INDEX_BUFFER_BIT, + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, + index_buffer.buffers[i], index_buffer.buffer_memories[i]); + + copy_buffer(stagingBuffer, index_buffer.buffers[i], bufferSize); + } + + vkDestroyBuffer(device, stagingBuffer, nullptr); + vkFreeMemory(device, stagingBufferMemory, nullptr); +} + +void OceanApplication::create_texture_images() +{ + VkImageTiling tiling = app_opts.linearImages ? VK_IMAGE_TILING_LINEAR + : VK_IMAGE_TILING_OPTIMAL; + VkMemoryPropertyFlags properties = + app_opts.device_local_images ? VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT : 0; + + uint32_t texWidth = static_cast(ocean_tex_size); + uint32_t texHeight = static_cast(ocean_tex_size); + + VkDeviceSize imageSize = texWidth * texHeight * 4 * sizeof(float); + + create_buffer(imageSize, VK_BUFFER_USAGE_TRANSFER_SRC_BIT, + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT + | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, + staging_tex_buffer, staging_tex_buffer_memory); + + for (size_t target = 0; target < texture_images.size(); target++) + { + texture_images[target].images.resize(swap_chain_images.size()); + texture_images[target].image_memories.resize(swap_chain_images.size()); + + for (size_t i = 0; i < swap_chain_images.size(); i++) + { + create_shareable_image( + texWidth, texHeight, VK_FORMAT_R32G32B32A32_SFLOAT, tiling, + VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, + properties, texture_images[target].images[i], + texture_images[target].image_memories[i]); + if (app_opts.use_external_memory) + { + transition_image_layout( + texture_images[target].images[i], + VK_FORMAT_R32G32B32A32_SFLOAT, VK_IMAGE_LAYOUT_UNDEFINED, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + } + } + } +} + +void OceanApplication::create_texture_image_views() +{ + for (size_t img_num = 0; img_num < texture_images.size(); img_num++) + { + texture_images[img_num].image_views.resize(swap_chain_images.size()); + + for (size_t i = 0; i < swap_chain_images.size(); i++) + { + texture_images[img_num].image_views[i] = create_image_view( + texture_images[img_num].images[i], + VK_FORMAT_R32G32B32A32_SFLOAT, VK_IMAGE_ASPECT_COLOR_BIT); + } + } +} + +void OceanApplication::create_texture_sampler() +{ + VkSamplerCreateInfo samplerInfo{}; + samplerInfo.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO; + samplerInfo.magFilter = VK_FILTER_LINEAR; + samplerInfo.minFilter = VK_FILTER_LINEAR; + samplerInfo.addressModeU = VK_SAMPLER_ADDRESS_MODE_REPEAT; + samplerInfo.addressModeV = VK_SAMPLER_ADDRESS_MODE_REPEAT; + samplerInfo.addressModeW = VK_SAMPLER_ADDRESS_MODE_REPEAT; + samplerInfo.borderColor = VK_BORDER_COLOR_INT_OPAQUE_BLACK; + samplerInfo.unnormalizedCoordinates = VK_FALSE; + samplerInfo.compareEnable = VK_FALSE; + samplerInfo.compareOp = VK_COMPARE_OP_ALWAYS; + samplerInfo.mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST; + + for (size_t sampler_num = 0; sampler_num < texture_sampler.size(); + sampler_num++) + { + if (vkCreateSampler(device, &samplerInfo, nullptr, + &texture_sampler[sampler_num]) + != VK_SUCCESS) + { + throw std::runtime_error("failed to create texture sampler!"); + } + } +} + +VkImageView OceanApplication::create_image_view(VkImage image, VkFormat format, + VkImageAspectFlags aspectFlags) +{ + VkImageViewCreateInfo viewInfo{ VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO }; + viewInfo.pNext = nullptr; + viewInfo.image = image; + viewInfo.viewType = VK_IMAGE_VIEW_TYPE_2D; + viewInfo.format = format; + viewInfo.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + viewInfo.subresourceRange.baseMipLevel = 0; + viewInfo.subresourceRange.levelCount = 1; // VK_REMAINING_MIP_LEVELS; + viewInfo.subresourceRange.baseArrayLayer = 0; + viewInfo.subresourceRange.layerCount = VK_REMAINING_ARRAY_LAYERS; + viewInfo.subresourceRange.aspectMask = aspectFlags; + + VkImageView imageView; + if (vkCreateImageView(device, &viewInfo, nullptr, &imageView) != VK_SUCCESS) + { + throw std::runtime_error("failed to create texture image view!"); + } + + return imageView; +} + +void OceanApplication::create_shareable_image( + uint32_t width, uint32_t height, VkFormat format, VkImageTiling tiling, + VkImageUsageFlags usage, VkMemoryPropertyFlags properties, VkImage& image, + VkDeviceMemory& imageMemory, VkImageType type) +{ + VkExternalMemoryImageCreateInfo externalMemCreateInfo{}; + externalMemCreateInfo.sType = + VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO; + +#ifdef _WIN32 + externalMemCreateInfo.handleTypes = + VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT; +#elif defined(__linux__) + externalMemCreateInfo.handleTypes = + external_mem_type == CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_FD_KHR + ? VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT + : VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT; +#endif + + VkImageCreateInfo imageInfo{}; + imageInfo.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO; + if (app_opts.use_external_memory) + { + imageInfo.pNext = &externalMemCreateInfo; + } + + imageInfo.imageType = type; + imageInfo.extent.width = width; + imageInfo.extent.height = height; + imageInfo.extent.depth = 1; + imageInfo.mipLevels = 1; + imageInfo.arrayLayers = 1; + imageInfo.format = format; + imageInfo.tiling = tiling; + imageInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; + imageInfo.usage = usage; + imageInfo.samples = VK_SAMPLE_COUNT_1_BIT; + imageInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + + if (vkCreateImage(device, &imageInfo, nullptr, &image) != VK_SUCCESS) + { + throw std::runtime_error("failed to create image!"); + } + + VkMemoryRequirements memRequirements; + vkGetImageMemoryRequirements(device, image, &memRequirements); + + VkExportMemoryAllocateInfo exportMemoryAllocInfo{}; + exportMemoryAllocInfo.sType = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO; + exportMemoryAllocInfo.handleTypes = externalMemCreateInfo.handleTypes; + + VkMemoryAllocateInfo allocInfo{}; + allocInfo.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; + if (app_opts.use_external_memory) + { + allocInfo.pNext = &exportMemoryAllocInfo; + } + allocInfo.allocationSize = memRequirements.size; + allocInfo.memoryTypeIndex = + find_memory_type(memRequirements.memoryTypeBits, properties); + + if (vkAllocateMemory(device, &allocInfo, nullptr, &imageMemory) + != VK_SUCCESS) + { + throw std::runtime_error("failed to allocate image memory!"); + } + + vkBindImageMemory(device, image, imageMemory, 0); +} + +void OceanApplication::create_image(uint32_t width, uint32_t height, + VkFormat format, VkImageTiling tiling, + VkImageUsageFlags usage, + VkMemoryPropertyFlags properties, + VkImage& image, VkDeviceMemory& imageMemory) +{ + VkImageCreateInfo imageInfo{}; + imageInfo.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO; + imageInfo.imageType = VK_IMAGE_TYPE_2D; + imageInfo.extent.width = width; + imageInfo.extent.height = height; + imageInfo.extent.depth = 1; + imageInfo.mipLevels = 1; + imageInfo.arrayLayers = 1; + imageInfo.format = format; + imageInfo.tiling = tiling; + imageInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; + imageInfo.usage = usage; + imageInfo.samples = VK_SAMPLE_COUNT_1_BIT; + imageInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + + if (vkCreateImage(device, &imageInfo, nullptr, &image) != VK_SUCCESS) + { + throw std::runtime_error("failed to create image!"); + } + + VkMemoryRequirements memRequirements; + vkGetImageMemoryRequirements(device, image, &memRequirements); + + VkMemoryAllocateInfo allocInfo{}; + allocInfo.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; + allocInfo.allocationSize = memRequirements.size; + allocInfo.memoryTypeIndex = + find_memory_type(memRequirements.memoryTypeBits, properties); + + if (vkAllocateMemory(device, &allocInfo, nullptr, &imageMemory) + != VK_SUCCESS) + { + throw std::runtime_error("failed to allocate image memory!"); + } + + vkBindImageMemory(device, image, imageMemory, 0); +} + +VkFormat +OceanApplication::find_supported_format(const std::vector& candidates, + VkImageTiling tiling, + VkFormatFeatureFlags features) +{ + for (VkFormat format : candidates) + { + VkFormatProperties props; + vkGetPhysicalDeviceFormatProperties(physical_device, format, &props); + + if (tiling == VK_IMAGE_TILING_LINEAR + && (props.linearTilingFeatures & features) == features) + { + return format; + } + else if (tiling == VK_IMAGE_TILING_OPTIMAL + && (props.optimalTilingFeatures & features) == features) + { + return format; + } + } + + throw std::runtime_error("failed to find supported format!"); +} + +VkFormat OceanApplication::find_depth_format() +{ + return find_supported_format( + { VK_FORMAT_D32_SFLOAT, VK_FORMAT_D32_SFLOAT_S8_UINT, + VK_FORMAT_D24_UNORM_S8_UINT }, + VK_IMAGE_TILING_OPTIMAL, + VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT); +} + +bool OceanApplication::has_stencil_component(VkFormat format) +{ + return format == VK_FORMAT_D32_SFLOAT_S8_UINT + || format == VK_FORMAT_D24_UNORM_S8_UINT; +} + +void OceanApplication::create_depth_resources() +{ + VkFormat depthFormat = find_depth_format(); + + create_image( + swap_chain_extent.width, swap_chain_extent.height, depthFormat, + VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT, + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, depth_image, depth_image_memory); + depth_image_view = + create_image_view(depth_image, depthFormat, VK_IMAGE_ASPECT_DEPTH_BIT); +} + +void OceanApplication::transition_image_layout(VkImage image, VkFormat format, + VkImageLayout oldLayout, + VkImageLayout newLayout, + uint32_t layers) +{ + + VkCommandBuffer commandBuffer = begin_single_time_commands(); + + VkImageMemoryBarrier barrier{}; + barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + barrier.oldLayout = oldLayout; + barrier.newLayout = newLayout; + barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + barrier.image = image; + barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + barrier.subresourceRange.baseMipLevel = 0; + barrier.subresourceRange.levelCount = 1; + // vulkan spec: If the calling command’s VkImage parameter is of + // VkImageType VK_IMAGE_TYPE_3D, the baseArrayLayer and + // layerCount members of imageSubresource must be 0 and 1, + // respectively + barrier.subresourceRange.baseArrayLayer = 0; + barrier.subresourceRange.layerCount = layers; + + VkPipelineStageFlags sourceStage; + VkPipelineStageFlags destinationStage; + + if (oldLayout == VK_IMAGE_LAYOUT_UNDEFINED + && newLayout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) + { + barrier.srcAccessMask = 0; + barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + + sourceStage = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; + destinationStage = VK_PIPELINE_STAGE_TRANSFER_BIT; + } + else if (oldLayout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL + && newLayout == VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL) + { + barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; + + sourceStage = VK_PIPELINE_STAGE_TRANSFER_BIT; + destinationStage = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; + } + else if (oldLayout == VK_IMAGE_LAYOUT_UNDEFINED + && newLayout == VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL) + { + barrier.srcAccessMask = 0; + barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; + + sourceStage = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; + destinationStage = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT /*| + VK_PIPELINE_STAGE_VERTEX_SHADER_BIT*/ + ; + } + else + { + throw std::invalid_argument("unsupported layout transition!"); + } + + vkCmdPipelineBarrier(commandBuffer, sourceStage, destinationStage, 0, 0, + nullptr, 0, nullptr, 1, &barrier); + + end_single_time_commands(commandBuffer); +} + +void OceanApplication::copy_buffer_to_image(VkBuffer buffer, VkImage image, + uint32_t width, uint32_t height) +{ + VkCommandBuffer commandBuffer = begin_single_time_commands(); + + VkBufferImageCopy region{}; + region.bufferOffset = 0; + region.bufferRowLength = 0; + region.bufferImageHeight = 0; + region.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + region.imageSubresource.mipLevel = 0; + region.imageSubresource.baseArrayLayer = 0; + region.imageSubresource.layerCount = 1; + region.imageOffset = { 0, 0, 0 }; + region.imageExtent = { width, height, 1 }; + + vkCmdCopyBufferToImage(commandBuffer, buffer, image, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, ®ion); + end_single_time_commands(commandBuffer); +} + +void OceanApplication::create_descriptor_pool() +{ + std::array poolSizes{}; + poolSizes[0].type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + poolSizes[0].descriptorCount = + static_cast(swap_chain_images.size()); + + poolSizes[1].type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + poolSizes[1].descriptorCount = + static_cast(swap_chain_images.size()); + + poolSizes[2].type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + poolSizes[2].descriptorCount = + static_cast(swap_chain_images.size()); + + VkDescriptorPoolCreateInfo poolInfo{}; + poolInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO; + poolInfo.poolSizeCount = static_cast(poolSizes.size()); + poolInfo.pPoolSizes = poolSizes.data(); + poolInfo.maxSets = static_cast(swap_chain_images.size()); + + if (vkCreateDescriptorPool(device, &poolInfo, nullptr, &descriptor_pool) + != VK_SUCCESS) + { + throw std::runtime_error("failed to create descriptor pool!"); + } +} + +void OceanApplication::create_descriptor_sets() +{ + std::vector layouts(swap_chain_images.size(), + descriptor_set_layout); + VkDescriptorSetAllocateInfo allocInfo{}; + allocInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO; + allocInfo.descriptorPool = descriptor_pool; + allocInfo.descriptorSetCount = + static_cast(swap_chain_images.size()); + allocInfo.pSetLayouts = layouts.data(); + + descriptor_sets.resize(swap_chain_images.size()); + if (vkAllocateDescriptorSets(device, &allocInfo, descriptor_sets.data()) + != VK_SUCCESS) + { + throw std::runtime_error("failed to allocate descriptor sets!"); + } + + for (size_t i = 0; i < swap_chain_images.size(); i++) + { + VkDescriptorImageInfo imageInfo[(size_t)InteropTexType::IOPT_COUNT] = { + 0 + }; + + VkDescriptorBufferInfo bufferInfo{}; + bufferInfo.buffer = uniform_buffers[i]; + bufferInfo.offset = 0; + bufferInfo.range = sizeof(UniformBufferObject); + + std::array descriptorWrites{}; + + for (cl_int target = 0; target < IOPT_COUNT; target++) + { + imageInfo[target].imageLayout = + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + imageInfo[target].imageView = texture_images[target].image_views[i]; + imageInfo[target].sampler = texture_sampler[target]; + + descriptorWrites[target].sType = + VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + descriptorWrites[target].dstSet = descriptor_sets[i]; + descriptorWrites[target].dstBinding = target; + descriptorWrites[target].dstArrayElement = 0; + descriptorWrites[target].descriptorType = + VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; + descriptorWrites[target].descriptorCount = 1; + descriptorWrites[target].pImageInfo = &imageInfo[target]; + } + + descriptorWrites[IOPT_COUNT].sType = + VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + descriptorWrites[IOPT_COUNT].dstSet = descriptor_sets[i]; + descriptorWrites[IOPT_COUNT].dstBinding = IOPT_COUNT; + descriptorWrites[IOPT_COUNT].dstArrayElement = 0; + descriptorWrites[IOPT_COUNT].descriptorType = + VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + descriptorWrites[IOPT_COUNT].descriptorCount = 1; + descriptorWrites[IOPT_COUNT].pBufferInfo = &bufferInfo; + + vkUpdateDescriptorSets(device, + static_cast(descriptorWrites.size()), + descriptorWrites.data(), 0, nullptr); + } +} + +void OceanApplication::create_buffer(VkDeviceSize size, + VkBufferUsageFlags usage, + VkMemoryPropertyFlags properties, + VkBuffer& buffer, + VkDeviceMemory& bufferMemory) +{ + VkBufferCreateInfo bufferInfo{}; + bufferInfo.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO; + bufferInfo.size = size; + bufferInfo.usage = usage; + bufferInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + + if (vkCreateBuffer(device, &bufferInfo, nullptr, &buffer) != VK_SUCCESS) + { + throw std::runtime_error("failed to create buffer!"); + } + + VkMemoryRequirements memRequirements; + vkGetBufferMemoryRequirements(device, buffer, &memRequirements); + + VkMemoryAllocateInfo allocInfo{}; + allocInfo.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; + allocInfo.allocationSize = memRequirements.size; + allocInfo.memoryTypeIndex = + find_memory_type(memRequirements.memoryTypeBits, properties); + + if (vkAllocateMemory(device, &allocInfo, nullptr, &bufferMemory) + != VK_SUCCESS) + { + throw std::runtime_error("failed to allocate buffer memory!"); + } + + vkBindBufferMemory(device, buffer, bufferMemory, 0); +} + +void OceanApplication::copy_buffer(VkBuffer srcBuffer, VkBuffer dstBuffer, + VkDeviceSize size) +{ + VkCommandBufferAllocateInfo allocInfo{}; + allocInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; + allocInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; + allocInfo.commandPool = command_pool; + allocInfo.commandBufferCount = 1; + + VkCommandBuffer commandBuffer; + vkAllocateCommandBuffers(device, &allocInfo, &commandBuffer); + + VkCommandBufferBeginInfo beginInfo{}; + beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; + beginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; + + vkBeginCommandBuffer(commandBuffer, &beginInfo); + + VkBufferCopy copyRegion{}; + copyRegion.srcOffset = 0; // Optional + copyRegion.dstOffset = 0; // Optional + copyRegion.size = size; + vkCmdCopyBuffer(commandBuffer, srcBuffer, dstBuffer, 1, ©Region); + + vkEndCommandBuffer(commandBuffer); + + VkSubmitInfo submitInfo{}; + submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; + submitInfo.commandBufferCount = 1; + submitInfo.pCommandBuffers = &commandBuffer; + + vkQueueSubmit(graphics_queue, 1, &submitInfo, VK_NULL_HANDLE); + vkQueueWaitIdle(graphics_queue); + + vkFreeCommandBuffers(device, command_pool, 1, &commandBuffer); +} + +uint32_t OceanApplication::find_memory_type(uint32_t typeFilter, + VkMemoryPropertyFlags properties) +{ + VkPhysicalDeviceMemoryProperties memProperties; + vkGetPhysicalDeviceMemoryProperties(physical_device, &memProperties); + + for (uint32_t i = 0; i < memProperties.memoryTypeCount; i++) + { + if ((typeFilter & (1 << i)) + && (memProperties.memoryTypes[i].propertyFlags & properties) + == properties) + { + return i; + } + } + + throw std::runtime_error("failed to find suitable memory type!"); +} + +VkCommandBuffer OceanApplication::begin_single_time_commands() +{ + VkCommandBufferAllocateInfo allocInfo{}; + allocInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; + allocInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; + allocInfo.commandPool = command_pool; + allocInfo.commandBufferCount = 1; + + VkCommandBuffer commandBuffer; + vkAllocateCommandBuffers(device, &allocInfo, &commandBuffer); + + VkCommandBufferBeginInfo beginInfo{}; + beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; + beginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT; + + vkBeginCommandBuffer(commandBuffer, &beginInfo); + + return commandBuffer; +} + +void OceanApplication::end_single_time_commands(VkCommandBuffer commandBuffer) +{ + vkEndCommandBuffer(commandBuffer); + + VkSubmitInfo submitInfo{}; + submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; + submitInfo.commandBufferCount = 1; + submitInfo.pCommandBuffers = &commandBuffer; + + vkQueueSubmit(graphics_queue, 1, &submitInfo, VK_NULL_HANDLE); + vkQueueWaitIdle(graphics_queue); + + vkFreeCommandBuffers(device, command_pool, 1, &commandBuffer); +} + +void OceanApplication::create_command_buffers() +{ + command_buffers.resize(swap_chain_framebuffers.size()); + + VkCommandBufferAllocateInfo allocInfo{}; + allocInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; + allocInfo.commandPool = command_pool; + allocInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; + allocInfo.commandBufferCount = (uint32_t)command_buffers.size(); + + if (vkAllocateCommandBuffers(device, &allocInfo, command_buffers.data()) + != VK_SUCCESS) + { + throw std::runtime_error("failed to allocate command buffers!"); + } + + for (size_t i = 0; i < command_buffers.size(); i++) + { + VkCommandBufferBeginInfo beginInfo{}; + beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; + + if (vkBeginCommandBuffer(command_buffers[i], &beginInfo) != VK_SUCCESS) + { + throw std::runtime_error( + "failed to begin recording command buffer!"); + } + + VkRenderPassBeginInfo renderPassInfo{}; + renderPassInfo.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; + renderPassInfo.renderPass = render_pass; + renderPassInfo.framebuffer = swap_chain_framebuffers[i]; + renderPassInfo.renderArea.offset = { 0, 0 }; + renderPassInfo.renderArea.extent = swap_chain_extent; + + std::array clearValues{}; + clearValues[0].color = { { 0.0f, 0.0f, 0.0f, 1.0f } }; + clearValues[1].depthStencil = { 1.0f, 0 }; + + renderPassInfo.clearValueCount = + static_cast(clearValues.size()); + renderPassInfo.pClearValues = clearValues.data(); + + vkCmdBeginRenderPass(command_buffers[i], &renderPassInfo, + VK_SUBPASS_CONTENTS_INLINE); + + vkCmdBindPipeline(command_buffers[i], VK_PIPELINE_BIND_POINT_GRAPHICS, + wireframe_mode ? wireframe_pipeline + : graphics_pipeline); + + VkDeviceSize offsets[] = { 0 }; + vkCmdBindVertexBuffers(command_buffers[i], 0, 1, &vertex_buffers[i], + offsets); + + vkCmdBindDescriptorSets( + command_buffers[i], VK_PIPELINE_BIND_POINT_GRAPHICS, + pipeline_layout, 0, 1, &descriptor_sets[i], 0, nullptr); + + vkCmdBindIndexBuffer(command_buffers[i], index_buffer.buffers[i], 0, + VK_INDEX_TYPE_UINT32); + vkCmdDrawIndexed(command_buffers[i], + static_cast(ocean_grid_indices.size()), 1, 0, + 0, 0); + + vkCmdEndRenderPass(command_buffers[i]); + + if (vkEndCommandBuffer(command_buffers[i]) != VK_SUCCESS) + { + throw std::runtime_error("failed to record command buffer!"); + } + } +} + +void OceanApplication::create_sync_objects() +{ + image_available_semaphores.resize(MAX_FRAMES_IN_FLIGHT); + render_finished_semaphores.resize(MAX_FRAMES_IN_FLIGHT); + in_flight_fences.resize(MAX_FRAMES_IN_FLIGHT); + images_in_flight.resize(swap_chain_images.size(), VK_NULL_HANDLE); + + VkExportSemaphoreCreateInfo exportSemaphoreCreateInfo{}; + exportSemaphoreCreateInfo.sType = + VK_STRUCTURE_TYPE_EXPORT_SEMAPHORE_CREATE_INFO; + + VkSemaphoreCreateInfo semaphoreInfo{}; + semaphoreInfo.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO; + + VkFenceCreateInfo fenceInfo{}; + fenceInfo.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; + fenceInfo.flags = VK_FENCE_CREATE_SIGNALED_BIT; + + for (size_t i = 0; i < MAX_FRAMES_IN_FLIGHT; i++) + { + if (vkCreateSemaphore(device, &semaphoreInfo, nullptr, + &image_available_semaphores[i]) + != VK_SUCCESS + || vkCreateSemaphore(device, &semaphoreInfo, nullptr, + &render_finished_semaphores[i]) + != VK_SUCCESS + || vkCreateFence(device, &fenceInfo, nullptr, &in_flight_fences[i]) + != VK_SUCCESS) + { + throw std::runtime_error( + "failed to create synchronization objects for a frame!"); + } + } +} + +void OceanApplication::update_uniforms(uint32_t currentImage) +{ + UniformBufferObject ubo = _mapped_unif_data[currentImage].data; + ubo.choppiness = choppiness; + ubo.alt_scale = alt_scale; + + // update camera related uniform + glm::mat4 view_matrix = + glm::lookAt(camera.eye, camera.eye + camera.dir, camera.up); + + float fov = (float)glm::radians(60.0); + float aspect = (float)win_opts.width / win_opts.height; + glm::mat4 proj_matrix = glm::perspective( + fov, aspect, 1.f, 2.f * ocean_grid_size * mesh_spacing); + proj_matrix[1][1] *= -1; + + ubo.view_mat = view_matrix; + ubo.proj_mat = proj_matrix; + + memcpy(_mapped_unif_data[currentImage].buffer_memory, &ubo, + sizeof(UniformBufferObject)); +} + +void OceanApplication::update_spectrum(uint32_t currentImage, float elapsed) +{ + cl_int2 patch = + cl_int2{ (int)(ocean_grid_size * mesh_spacing), (int)ocean_tex_size }; + + cl::NDRange lws; // NullRange by default. + if (group_size > 0) + { + lws = cl::NDRange{ group_size, group_size }; + } + + if (twiddle_factors_init) + { + try + { + size_t log_2_N = + (size_t)((log((float)ocean_tex_size) / log(2.f)) - 1); + + /// Prepare vector of values to extract results + std::vector v(ocean_tex_size); + for (int i = 0; i < ocean_tex_size; i++) + { + int x = reverse_bits(i, log_2_N); + v[i] = x; + } + + /// Initialize device-side storage + cl::Buffer bit_reversed_inds_mem{ + context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, + sizeof(cl_int) * v.size(), v.data() + }; + + twiddle_kernel.setArg(0, cl_int(ocean_tex_size)); + twiddle_kernel.setArg(1, bit_reversed_inds_mem); + twiddle_kernel.setArg(2, *twiddle_factors_mem); + + command_queue.enqueueNDRangeKernel( + twiddle_kernel, cl::NullRange, + cl::NDRange{ (cl::size_type)log_2_N, ocean_tex_size }, + cl::NDRange{ 1, 16 }); + twiddle_factors_init = false; + } catch (const cl::Error& e) + { + std::cerr << "OpenCL runtime error: " << e.what() << std::endl; + exit(e.err()); + } + } + + // change of some ocean's parameters requires to rebuild initial spectrum + // image + if (changed) + { + try + { + float wind_angle_rad = glm::radians(wind_angle); + cl_float4 params = + cl_float4{ wind_magnitude * glm::cos(wind_angle_rad), + wind_magnitude * glm::sin(wind_angle_rad), amplitude, + supress_factor }; + init_spectrum_kernel.setArg(0, patch); + init_spectrum_kernel.setArg(1, params); + init_spectrum_kernel.setArg(2, *noise_mem); + init_spectrum_kernel.setArg(3, *h0k_mem); + + command_queue.enqueueNDRangeKernel( + init_spectrum_kernel, cl::NullRange, + cl::NDRange{ ocean_tex_size, ocean_tex_size }, lws); + changed = false; + } catch (const cl::Error& e) + { + std::cerr << "OpenCL runtime error: " << e.what() << std::endl; + exit(e.err()); + } + } + + // ping-pong phase spectrum kernel launch + try + { + time_spectrum_kernel.setArg(0, elapsed); + time_spectrum_kernel.setArg(1, patch); + time_spectrum_kernel.setArg(2, *h0k_mem); + time_spectrum_kernel.setArg(3, *dxyz_coef_mem[0]); + time_spectrum_kernel.setArg(4, *dxyz_coef_mem[1]); + time_spectrum_kernel.setArg(5, *dxyz_coef_mem[2]); + + command_queue.enqueueNDRangeKernel( + time_spectrum_kernel, cl::NullRange, + cl::NDRange{ ocean_tex_size, ocean_tex_size }, lws); + } catch (const cl::Error& e) + { + std::cerr << "OpenCL runtime error: " << e.what() << std::endl; + exit(e.err()); + } + + + // perform 1D FFT horizontal and vertical iterations + size_t log_2_N = (size_t)((log((float)ocean_tex_size) / log(2.f)) - 1); + fft_kernel.setArg(1, patch); + fft_kernel.setArg(2, *twiddle_factors_mem); + for (cl_int i = 0; i < 3; i++) + { + const cl::Image* displ_swap[] = { dxyz_coef_mem[i].get(), + hkt_pong_mem.get() }; + cl_int2 mode = cl_int2{ { 0, 0 } }; + + bool ifft_pingpong = false; + for (int p = 0; p < log_2_N; p++) + { + if (ifft_pingpong) + { + fft_kernel.setArg(3, *displ_swap[1]); + fft_kernel.setArg(4, *displ_swap[0]); + } + else + { + fft_kernel.setArg(3, *displ_swap[0]); + fft_kernel.setArg(4, *displ_swap[1]); + } + + mode.s[1] = p; + fft_kernel.setArg(0, mode); + + command_queue.enqueueNDRangeKernel( + fft_kernel, cl::NullRange, + cl::NDRange{ ocean_tex_size, ocean_tex_size }, lws); + + + ifft_pingpong = !ifft_pingpong; + } + + // Cols + mode.s[0] = 1; + for (int p = 0; p < log_2_N; p++) + { + if (ifft_pingpong) + { + fft_kernel.setArg(3, *displ_swap[1]); + fft_kernel.setArg(4, *displ_swap[0]); + } + else + { + fft_kernel.setArg(3, *displ_swap[0]); + fft_kernel.setArg(4, *displ_swap[1]); + } + + mode.s[1] = p; + fft_kernel.setArg(0, mode); + + command_queue.enqueueNDRangeKernel( + fft_kernel, cl::NullRange, + cl::NDRange{ ocean_tex_size, ocean_tex_size }, lws); + + ifft_pingpong = !ifft_pingpong; + } + + if (log_2_N % 2) + { + // swap images if pingpong hold on temporary buffer + std::array orig = { 0, 0, 0 }, + region = { ocean_tex_size, ocean_tex_size, + 1 }; + command_queue.enqueueCopyImage(*displ_swap[0], *displ_swap[1], orig, + orig, region); + } + } + + if (app_opts.use_external_memory) + { + for (size_t target = 0; target < IOPT_COUNT; target++) + { + command_queue.enqueueAcquireExternalMemObjects( + { *ocl_image_mems[target][currentImage] }); + } + } + + // inversion + { + inversion_kernel.setArg(0, patch); + inversion_kernel.setArg(1, *dxyz_coef_mem[0]); + inversion_kernel.setArg(2, *dxyz_coef_mem[1]); + inversion_kernel.setArg(3, *dxyz_coef_mem[2]); + inversion_kernel.setArg( + 4, *ocl_image_mems[IOPT_DISPLACEMENT][currentImage]); + + command_queue.enqueueNDRangeKernel( + inversion_kernel, cl::NullRange, + cl::NDRange{ ocean_tex_size, ocean_tex_size }, lws); + } + + // normals computation + { + cl_float2 factors = cl_float2{ choppiness, alt_scale }; + + normals_kernel.setArg(0, patch); + normals_kernel.setArg(1, factors); + normals_kernel.setArg(2, *noise_mem); + normals_kernel.setArg(3, + *ocl_image_mems[IOPT_DISPLACEMENT][currentImage]); + normals_kernel.setArg(4, + *ocl_image_mems[IOPT_NORMAL_MAP][currentImage]); + + command_queue.enqueueNDRangeKernel( + normals_kernel, cl::NullRange, + cl::NDRange{ ocean_tex_size, ocean_tex_size }, lws); + } +} + +void OceanApplication::show_fps_window_title() +{ + if (show_fps) + { + auto fps_now = std::chrono::system_clock::now(); + + std::chrono::duration elapsed = fps_now - fps_last_time; + float delta = elapsed.count(); + + const float elapsed_tres = 1.f; + + delta_frames++; + if (delta >= 1.f) + { + double fps = double(delta_frames) / delta; + + std::stringstream ss; + ss << app_name << ", [FPS:" << std::fixed << std::setprecision(2) + << fps << "]"; + + window.setTitle(ss.str().c_str()); + + delta_frames = 0; + fps_last_time = fps_now; + } + } + else + { + fps_last_time = std::chrono::system_clock::now(); + delta_frames = 0; + } +} + +void OceanApplication::update_ocean(uint32_t currentImage) +{ + show_fps_window_title(); + + update_uniforms(currentImage); + + auto end = std::chrono::system_clock::now(); + + // time factor of ocean animation + static float elapsed = 0.f; + + if (animate) + { + std::chrono::duration delta = end - start; + elapsed = delta.count(); + + update_spectrum(currentImage, elapsed); + + if (app_opts.use_external_memory) + { + for (size_t target = 0; target < IOPT_COUNT; target++) + { + command_queue.enqueueReleaseExternalMemObjects( + { *ocl_image_mems[target][currentImage] }); + } + + command_queue.finish(); + } + else + { + for (size_t target = 0; target < IOPT_COUNT; target++) + { + size_t rowPitch = 0; + void* pixels = command_queue.enqueueMapImage( + *ocl_image_mems[target][currentImage], CL_TRUE, CL_MAP_READ, + { 0, 0, 0 }, { ocean_tex_size, ocean_tex_size, 1 }, + &rowPitch, nullptr); + + VkDeviceSize imageSize = + ocean_tex_size * ocean_tex_size * 4 * sizeof(float); + + void* data; + vkMapMemory(device, staging_tex_buffer_memory, 0, imageSize, 0, + &data); + memcpy(data, pixels, static_cast(imageSize)); + vkUnmapMemory(device, staging_tex_buffer_memory); + + command_queue.enqueueUnmapMemObject( + *ocl_image_mems[target][currentImage], pixels); + command_queue.flush(); + + transition_image_layout( + texture_images[target].images[currentImage], + VK_FORMAT_R32G32B32A32_SFLOAT, VK_IMAGE_LAYOUT_UNDEFINED, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); + copy_buffer_to_image( + staging_tex_buffer, + texture_images[target].images[currentImage], + static_cast(ocean_tex_size), + static_cast(ocean_tex_size)); + transition_image_layout( + texture_images[target].images[currentImage], + VK_FORMAT_R32G32B32A32_SFLOAT, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); + } + } + } + else + { + // hold the animation at the same time point + std::chrono::duration duration(elapsed); + start = + end - std::chrono::duration_cast(duration); + + if (app_opts.use_external_memory) + { + command_queue.finish(); + } + } +} + +void OceanApplication::draw_frame() +{ + vkWaitForFences(device, 1, &in_flight_fences[current_frame], VK_TRUE, + UINT64_MAX); + + uint32_t imageIndex; + vkAcquireNextImageKHR(device, swap_chain, UINT64_MAX, + image_available_semaphores[current_frame], + VK_NULL_HANDLE, &imageIndex); + + update_ocean(imageIndex); + + if (images_in_flight[imageIndex] != VK_NULL_HANDLE) + { + vkWaitForFences(device, 1, &images_in_flight[imageIndex], VK_TRUE, + UINT64_MAX); + } + images_in_flight[imageIndex] = in_flight_fences[current_frame]; + + VkSubmitInfo submitInfo{}; + submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; + + std::vector waitSemaphores; + std::vector waitStages; + waitSemaphores.push_back(image_available_semaphores[current_frame]); + waitStages.push_back(VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT); + submitInfo.waitSemaphoreCount = + static_cast(waitSemaphores.size()); + submitInfo.pWaitSemaphores = waitSemaphores.data(); + submitInfo.pWaitDstStageMask = waitStages.data(); + + submitInfo.commandBufferCount = 1; + submitInfo.pCommandBuffers = &command_buffers[imageIndex]; + + submitInfo.signalSemaphoreCount = 1; + submitInfo.pSignalSemaphores = &render_finished_semaphores[current_frame]; + + vkResetFences(device, 1, &in_flight_fences[current_frame]); + + if (vkQueueSubmit(graphics_queue, 1, &submitInfo, + in_flight_fences[current_frame]) + != VK_SUCCESS) + { + throw std::runtime_error("failed to submit draw command buffer!"); + } + + VkPresentInfoKHR presentInfo{}; + presentInfo.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR; + + presentInfo.waitSemaphoreCount = 1; + presentInfo.pWaitSemaphores = &render_finished_semaphores[current_frame]; + + VkSwapchainKHR swapChains[] = { swap_chain }; + presentInfo.swapchainCount = 1; + presentInfo.pSwapchains = swapChains; + + presentInfo.pImageIndices = &imageIndex; + + vkQueuePresentKHR(present_queue, &presentInfo); + + current_frame = (current_frame + 1) % MAX_FRAMES_IN_FLIGHT; +} + +void OceanApplication::check_openCL_ext_mem_support(cl::Device& device) +{ + if (cl::util::supports_extension(device, "cl_khr_external_memory")) + { + printf("Device supports cl_khr_external_memory.\n"); + printf("Supported external memory handle types:\n"); + + std::vector types = + device.getInfo(); + for (auto type : types) + { +#define CASE_TO_STRING(_e) \ + case _e: printf("\t%s\n", #_e); break; + switch ( + static_cast::type>( + type)) + { + CASE_TO_STRING(CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_FD_KHR); + CASE_TO_STRING(CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_WIN32_KHR); + CASE_TO_STRING(CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_WIN32_KMT_KHR); + CASE_TO_STRING(CL_EXTERNAL_MEMORY_HANDLE_DMA_BUF_KHR); + default: + printf("Unknown cl_external_memory_handle_type_khr %04X\n", + (unsigned int)type); + } +#undef CASE_TO_STRING + } + + +#ifdef _WIN32 + if (std::find_if( + types.begin(), types.end(), + [](cl::ExternalMemoryType& emt) { + return static_cast< + std::underlying_type_t>( + emt) + == CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_WIN32_KHR; + }) + != types.end()) + { + external_mem_type = CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_WIN32_KHR; + } + else + { + printf("Couldn't find a compatible external memory type " + "(sample supports OPAQUE_WIN32).\n"); + app_opts.use_external_memory = false; + } +#elif defined(__linux__) + if (std::find( + types.begin(), types.end(), + cl::ExternalMemoryType(CL_EXTERNAL_MEMORY_HANDLE_DMA_BUF_KHR)) + != types.end()) + { + external_mem_type = CL_EXTERNAL_MEMORY_HANDLE_DMA_BUF_KHR; + } + else if (std::find(types.begin(), types.end(), + cl::ExternalMemoryType( + CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_FD_KHR)) + != types.end()) + { + external_mem_type = CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_FD_KHR; + } + else + { + printf("Couldn't find a compatible external memory type " + "(sample supports DMA_BUF or OPAQUE_FD).\n"); + app_opts.use_external_memory = false; + } +#endif + } + else + { + printf("Device does not support cl_khr_external_memory.\n"); + app_opts.use_external_memory = false; + } +} + +VkShaderModule +OceanApplication::create_shader_module(const std::vector& code) +{ + VkShaderModuleCreateInfo createInfo{}; + createInfo.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; + createInfo.codeSize = code.size(); + createInfo.pCode = reinterpret_cast(code.data()); + + VkShaderModule shaderModule; + if (vkCreateShaderModule(device, &createInfo, nullptr, &shaderModule) + != VK_SUCCESS) + { + throw std::runtime_error("failed to create shader module!"); + } + + return shaderModule; +} + +VkSurfaceFormatKHR OceanApplication::choose_swap_surf_format( + const std::vector& availableFormats) +{ + for (const auto& availableFormat : availableFormats) + { + if (availableFormat.format == VK_FORMAT_B8G8R8A8_UNORM) + { + return availableFormat; + } + } + + return availableFormats[0]; +} + +VkPresentModeKHR OceanApplication::choose_swap_present_mode( + const std::vector& availablePresentModes) +{ + for (const auto& availablePresentMode : availablePresentModes) + { + if (!app_opts.immediate) + { + if (availablePresentMode == VK_PRESENT_MODE_MAILBOX_KHR) + { + return availablePresentMode; + } + } + else + { + if (availablePresentMode == VK_PRESENT_MODE_IMMEDIATE_KHR) + { + return availablePresentMode; + } + } + } + + return VK_PRESENT_MODE_FIFO_KHR; +} + +VkExtent2D OceanApplication::choose_swap_extent( + const VkSurfaceCapabilitiesKHR& capabilities) +{ + if (capabilities.currentExtent.width != UINT32_MAX) + { + return capabilities.currentExtent; + } + else + { + int width = win_opts.width, height = win_opts.width; + + auto wsize = window.getSize(); + width = wsize.x; + height = wsize.y; + + VkExtent2D actualExtent = { static_cast(width), + static_cast(height) }; + + actualExtent.width = std::max( + capabilities.minImageExtent.width, + std::min(actualExtent.width, capabilities.maxImageExtent.width)); + actualExtent.height = std::max( + capabilities.minImageExtent.height, + std::min(actualExtent.height, capabilities.maxImageExtent.height)); + + return actualExtent; + } +} + +SwapChainSupportDetails +OceanApplication::query_swap_chain_support(VkPhysicalDevice device) +{ + SwapChainSupportDetails details; + + vkGetPhysicalDeviceSurfaceCapabilitiesKHR(device, surface, + &details.capabilities); + + uint32_t formatCount; + vkGetPhysicalDeviceSurfaceFormatsKHR(device, surface, &formatCount, + nullptr); + + if (formatCount != 0) + { + details.formats.resize(formatCount); + vkGetPhysicalDeviceSurfaceFormatsKHR(device, surface, &formatCount, + details.formats.data()); + } + + uint32_t presentModeCount; + vkGetPhysicalDeviceSurfacePresentModesKHR(device, surface, + &presentModeCount, nullptr); + + if (presentModeCount != 0) + { + details.presentModes.resize(presentModeCount); + vkGetPhysicalDeviceSurfacePresentModesKHR( + device, surface, &presentModeCount, details.presentModes.data()); + } + + return details; +} + +bool OceanApplication::is_device_suitable(VkPhysicalDevice device) +{ + QueueFamilyIndices indices = find_queue_families(device); + + bool extensionsSupported = check_device_extension_support(device); + + bool swapChainAdequate = false; + if (extensionsSupported) + { + SwapChainSupportDetails swapChainSupport = + query_swap_chain_support(device); + swapChainAdequate = !swapChainSupport.formats.empty() + && !swapChainSupport.presentModes.empty(); + } + + return indices.isComplete() && extensionsSupported && swapChainAdequate; +} + +bool OceanApplication::check_device_extension_support(VkPhysicalDevice device) +{ + VkPhysicalDeviceProperties pProperties; + vkGetPhysicalDeviceProperties(device, &pProperties); + + uint32_t extensionCount; + vkEnumerateDeviceExtensionProperties(device, nullptr, &extensionCount, + nullptr); + + std::vector availableExtensions(extensionCount); + vkEnumerateDeviceExtensionProperties(device, nullptr, &extensionCount, + availableExtensions.data()); + + auto extensions = get_required_dev_exts(); + std::set requiredExtensions(extensions.begin(), + extensions.end()); + + for (const auto& extension : availableExtensions) + { + requiredExtensions.erase(extension.extensionName); + } + + return requiredExtensions.empty(); +} + +QueueFamilyIndices +OceanApplication::find_queue_families(VkPhysicalDevice device) +{ + QueueFamilyIndices indices; + + uint32_t queueFamilyCount = 0; + vkGetPhysicalDeviceQueueFamilyProperties(device, &queueFamilyCount, + nullptr); + + std::vector queueFamilies(queueFamilyCount); + vkGetPhysicalDeviceQueueFamilyProperties(device, &queueFamilyCount, + queueFamilies.data()); + + int i = 0; + for (const auto& queueFamily : queueFamilies) + { + if (queueFamily.queueFlags & VK_QUEUE_GRAPHICS_BIT) + { + indices.graphicsFamily = i; + } + + VkBool32 presentSupport = false; + vkGetPhysicalDeviceSurfaceSupportKHR(device, i, surface, + &presentSupport); + + if (presentSupport) + { + indices.presentFamily = i; + } + + if (indices.isComplete()) + { + break; + } + + i++; + } + + return indices; +} + +std::vector OceanApplication::get_required_exts() +{ + std::vector extensions = + sf::Vulkan::getGraphicsRequiredInstanceExtensions(); + + if (app_opts.use_external_memory) + { + extensions.push_back( + VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME); + } + if (app_opts.use_external_memory) + { + extensions.push_back( + VK_KHR_EXTERNAL_MEMORY_CAPABILITIES_EXTENSION_NAME); + } + if (app_opts.validationLayersOn) + { + extensions.push_back(VK_EXT_DEBUG_UTILS_EXTENSION_NAME); + } + + return extensions; +} + +std::vector OceanApplication::get_required_dev_exts() +{ + std::vector extensions(deviceExtensions); + + if (app_opts.use_external_memory) + { + extensions.push_back(VK_KHR_EXTERNAL_MEMORY_EXTENSION_NAME); +#ifdef _WIN32 + extensions.push_back(VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME); +#elif defined(__linux__) + extensions.push_back(VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME); +#endif + } + + return extensions; +} + +bool OceanApplication::check_validation_layer_support() +{ + uint32_t layerCount; + vkEnumerateInstanceLayerProperties(&layerCount, nullptr); + + std::vector availableLayers(layerCount); + vkEnumerateInstanceLayerProperties(&layerCount, availableLayers.data()); + + for (const char* layerName : validationLayers) + { + bool layerFound = false; + + for (const auto& layerProperties : availableLayers) + { + if (strcmp(layerName, layerProperties.layerName) == 0) + { + layerFound = true; + break; + } + } + + if (!layerFound) + { + return false; + } + } + + return true; +} + +VKAPI_ATTR VkBool32 VKAPI_CALL OceanApplication::debug_callback( + VkDebugUtilsMessageSeverityFlagBitsEXT messageSeverity, + VkDebugUtilsMessageTypeFlagsEXT messageType, + const VkDebugUtilsMessengerCallbackDataEXT* pCallbackData, void* pUserData) +{ + fprintf(stderr, "validation layer: %s\n", pCallbackData->pMessage); + + return VK_FALSE; +} diff --git a/samples/vulkan/ocean/ocean.frag b/samples/vulkan/ocean/ocean.frag new file mode 100755 index 00000000..42a67c2f --- /dev/null +++ b/samples/vulkan/ocean/ocean.frag @@ -0,0 +1,126 @@ +/* + * Copyright (c) 2024 Mobica Limited, Marcin Hajder + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#version 450 + +layout(location = 0) in vec2 frag_tex_coord; +layout(location = 1) in vec4 ec_pos; + +layout(location = 0) out vec4 out_color; + +layout(binding = 1) uniform sampler2D u_normal_map; +layout(binding = 2) uniform ViewData +{ + uniform mat4 view_mat; + uniform mat4 proj_mat; + uniform vec3 sun_dir; + uniform float choppiness; + uniform float alt_scale; +} +view; + +const vec3 env_specular = vec3(0.8); +const float specular_power = 32.0; +const float specular_scale = 0.75; + +const float fresnel_approx_pow_factor = 2.0; +const float dyna_range = 0.8f; + +const vec3 ocean_bright = vec3(0.5, 1.6, 2.15); +const vec3 ocean_dark = vec3(0.03, 0.06, 0.135); +const float exposure = 0.4; + +vec3 hdr(vec3 color, float exposure) { return 1.0 - exp(-color * exposure); } + +mat3 get_linear_part(mat4 m) +{ + mat3 result; + + result[0][0] = m[0][0]; + result[0][1] = m[0][1]; + result[0][2] = m[0][2]; + + result[1][0] = m[1][0]; + result[1][1] = m[1][1]; + result[1][2] = m[1][2]; + + result[2][0] = m[2][0]; + result[2][1] = m[2][1]; + result[2][2] = m[2][2]; + + return result; +} + +void main() +{ + // normal map computed in opencl kernel + vec4 ndata = texture(u_normal_map, frag_tex_coord); + + // foam, some calculation parameters have been adapted to the initial view + ivec2 ts = textureSize(u_normal_map, 0); + float off_scl_x = 4.0 / ts.x; + float off_scl_y = 4.0 / ts.y; + vec3 n0 = + texture(u_normal_map, frag_tex_coord + vec2(off_scl_x, off_scl_y)).xyz; + vec3 n1 = + texture(u_normal_map, frag_tex_coord + vec2(-off_scl_x, off_scl_y)).xyz; + vec3 n2 = + texture(u_normal_map, frag_tex_coord - vec2(off_scl_x, off_scl_y)).xyz; + vec3 n3 = + texture(u_normal_map, frag_tex_coord - vec2(-off_scl_x, off_scl_y)).xyz; + + float f0 = clamp(abs(dot(n0, n2) * (-0.5) + 0.5), 0.0, 1.0); + float f1 = clamp(abs(dot(n1, n3) * (-0.5) + 0.5), 0.0, 1.0); + + f0 = pow(f0 * 5.0, 2.0); + f1 = pow(f1 * 5.0, 2.0); + + float foam_fac = ndata.w * clamp(max(f0, f1), 0.0, 1.0); + + // preparation of view space lighting computation + mat3 norm_mat = get_linear_part(view.view_mat); + vec3 normal = norm_mat * ndata.xyz; + vec3 light_dir = normalize(norm_mat * view.sun_dir); + vec3 view_dir = normalize(ec_pos.xyz); + + // diffuse + specular + vec3 specular = vec3(0.0); + float n_dot_vp = max(0.0, dot(normal, light_dir)); + float n_dot_e = dot(normal, -view_dir); + float diffuse = clamp(dot(normal, light_dir), 0.0, 1.0); + + if (n_dot_vp > 0.0) + { + vec3 N = normal; + vec3 E = -view_dir; + vec3 R = normalize(reflect(-light_dir, N)); + + // modulate specular scale value based on fragment direction + float dirScale = mix(pow(abs(n_dot_e), 8.0), + 1.0 - pow(abs(1.0 - n_dot_e), 4.0), n_dot_e); + specular = env_specular + * vec3(pow(max(dot(R, E), 0.0), specular_power) * specular_scale + * dirScale); + } + + // refraction factors and final color assembly + float fresnel = clamp( + pow(1.0 + n_dot_e, -fresnel_approx_pow_factor) * dyna_range, 0.0, 1.0); + vec3 bright = fresnel * ocean_bright; + vec3 water = (1.0 - fresnel) * ocean_dark * ocean_bright * diffuse; + vec3 color = bright + water + specular; + out_color = vec4(hdr(color, exposure) + vec3(foam_fac), 1.0); +} diff --git a/samples/vulkan/ocean/ocean.frag.spv b/samples/vulkan/ocean/ocean.frag.spv new file mode 100755 index 00000000..eb331eca Binary files /dev/null and b/samples/vulkan/ocean/ocean.frag.spv differ diff --git a/samples/vulkan/ocean/ocean.hpp b/samples/vulkan/ocean/ocean.hpp new file mode 100755 index 00000000..094f4ef5 --- /dev/null +++ b/samples/vulkan/ocean/ocean.hpp @@ -0,0 +1,338 @@ +/* + * Copyright (c) 2024 Mobica Limited, Marcin Hajder + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef OCEAN_HPP +#define OCEAN_HPP + +#include "ocean_util.hpp" +#include +#include + +class OceanApplication { + +public: + OceanApplication(cl::sdk::options::Window& opts); + void run(); + + void event(const sf::Event& e); // Function that handles render area resize + void keyboard(int key); + void mouseDrag(const int x, const int y); + +public: + cl::sdk::options::SingleDevice dev_opts; + cl::sdk::options::Window win_opts; + CliOptions app_opts; + +private: + std::string app_name; + + sf::WindowBase window; + + Camera camera; + + // ocean texture size - assume uniform x/y + size_t ocean_tex_size = 512; + + // used to specify local work size + size_t group_size = 16; + + // mesh patch size - assume uniform x/y + size_t ocean_grid_size = 256; + + // mesh patch spacing + float mesh_spacing = 2.f; + + bool animate = true; + bool show_fps = true; + + // ocean parameters changed - rebuild initial spectrum resources + bool changed = true; + bool twiddle_factors_init = true; + + // ocean in-factors + float wind_magnitude = 30.f; + float wind_angle = 45.f; + float choppiness = 10.f; + float alt_scale = 20.f; + + float amplitude = 80.f; + float supress_factor = 0.1f; + + // env factors + int sun_elevation = 0; + int sun_azimuth = 90; + bool wireframe_mode = false; + + std::chrono::system_clock::time_point start = + std::chrono::system_clock::now(); + + std::chrono::system_clock::time_point fps_last_time = + std::chrono::system_clock::now(); + int delta_frames = 0; + + VkInstance instance; + VkDebugUtilsMessengerEXT debug_messenger; + VkSurfaceKHR surface; + + VkPhysicalDevice physical_device = VK_NULL_HANDLE; + VkDevice device; + + VkQueue graphics_queue; + VkQueue present_queue; + + VkSwapchainKHR swap_chain; + std::vector swap_chain_images; + VkFormat swap_chain_image_format; + VkExtent2D swap_chain_extent; + std::vector swap_chain_image_views; + std::vector swap_chain_framebuffers; + + VkImage depth_image; + VkDeviceMemory depth_image_memory; + VkImageView depth_image_view; + + VkRenderPass render_pass; + VkDescriptorSetLayout descriptor_set_layout; + VkPipelineLayout pipeline_layout; + VkPipeline graphics_pipeline; + VkPipeline wireframe_pipeline; + + VkCommandPool command_pool; + + VkBuffer staging_tex_buffer; + VkDeviceMemory staging_tex_buffer_memory; + + // Only displacement and normal map images must be shared between OCL and + // Vulkan + enum InteropTexType + { + IOPT_DISPLACEMENT = 0, + IOPT_NORMAL_MAP, + IOPT_COUNT + }; + + struct TextureOCL + { + std::vector images; + std::vector image_memories; + std::vector image_views; + }; + + // vulkan-opencl interop resources + std::array texture_images; + + // Ocean grid vertices and related buffers + std::vector ocean_grid_vertices; + std::vector vertex_buffers; + std::vector vertex_buffer_memories; + + std::vector ocean_grid_indices; + // separate index buffer for [ocean_grid_size] triangle strips + struct IndexBuffer + { + std::vector buffers; + std::vector buffer_memories; + }; + IndexBuffer index_buffer; + + std::array texture_sampler; + + VkDescriptorPool descriptor_pool; + std::vector descriptor_sets; + + std::vector command_buffers; + + std::vector image_available_semaphores; + std::vector render_finished_semaphores; + std::vector in_flight_fences; + std::vector images_in_flight; + size_t current_frame = 0; + +#ifdef _WIN32 + using PFN_vkGetMemoryWin32HandleKHR = VkResult(VKAPI_PTR*)( + VkDevice, const VkMemoryGetWin32HandleInfoKHR*, HANDLE*); + PFN_vkGetMemoryWin32HandleKHR vkGetMemoryWin32HandleKHR = NULL; +#elif defined(__linux__) + PFN_vkGetMemoryFdKHR vkGetMemoryFdKHR = NULL; +#endif + + std::vector uniform_buffers; + std::vector uniform_buffers_memory; + + struct MappedUniformBufferData + { + UniformBufferObject data; + void* buffer_memory; + }; + + std::vector _mapped_unif_data; + + // more OpenCL resources + cl_external_memory_handle_type_khr external_mem_type = 0; + + cl::Context context; + cl::CommandQueue command_queue; + + // generates twiddle factors kernel + cl::Kernel twiddle_kernel; + + // initial spectrum kernel + cl::Kernel init_spectrum_kernel; + + // Fourier components image kernel + cl::Kernel time_spectrum_kernel; + + // FFT kernel + cl::Kernel fft_kernel; + + // inversion kernel + cl::Kernel inversion_kernel; + + // building normals kernel + cl::Kernel normals_kernel; + + // FFT intermediate computation storages without vulkan iteroperability + std::unique_ptr dxyz_coef_mem[3]; + std::unique_ptr hkt_pong_mem; + std::unique_ptr twiddle_factors_mem; + std::unique_ptr h0k_mem; + std::unique_ptr noise_mem; + + size_t ocl_max_img2d_width; + cl_ulong ocl_max_alloc_size, ocl_mem_size; + + // main opencl-vulkan iteroperability resources + // final computation result with displacements and normal map, + // needs to follow swap-chain scheme + std::array>, IOPT_COUNT> + ocl_image_mems; + + void init_openCL(); + void init_openCL_mems(); + void init_vulkan(); + void main_loop(); + void cleanup(); + void create_instance(); + + void populate_dbg_msger_create_info( + VkDebugUtilsMessengerCreateInfoEXT& createInfo); + void setup_dbg_msger(); + + void create_surface(); + void pick_physical_device(); + void create_logical_device(); + void create_swap_chain(); + void create_swap_chain_image_views(); + void create_render_pass(); + void create_uniform_buffer(); + void create_descriptor_set_layout(); + void create_graphics_pipeline(); + void create_framebuffers(); + void create_command_pool(); + void create_vertex_buffers(); + void create_index_buffers(); + void create_texture_images(); + void create_texture_image_views(); + void create_texture_sampler(); + VkImageView create_image_view(VkImage image, VkFormat format, + VkImageAspectFlags aspectFlags); + + void create_shareable_image(uint32_t width, uint32_t height, + VkFormat format, VkImageTiling tiling, + VkImageUsageFlags usage, + VkMemoryPropertyFlags properties, + VkImage& image, VkDeviceMemory& imageMemory, + VkImageType type = VK_IMAGE_TYPE_2D); + + void create_image(uint32_t width, uint32_t height, VkFormat format, + VkImageTiling tiling, VkImageUsageFlags usage, + VkMemoryPropertyFlags properties, VkImage& image, + VkDeviceMemory& imageMemory); + VkFormat find_supported_format(const std::vector& candidates, + VkImageTiling tiling, + VkFormatFeatureFlags features); + + VkFormat find_depth_format(); + + bool has_stencil_component(VkFormat format); + void create_depth_resources(); + + void transition_image_layout(VkImage image, VkFormat format, + VkImageLayout oldLayout, + VkImageLayout newLayout, uint32_t layers = 1); + + void copy_buffer_to_image(VkBuffer buffer, VkImage image, uint32_t width, + uint32_t height); + + void create_descriptor_pool(); + + void create_descriptor_sets(); + void create_buffer(VkDeviceSize size, VkBufferUsageFlags usage, + VkMemoryPropertyFlags properties, VkBuffer& buffer, + VkDeviceMemory& bufferMemory); + + void copy_buffer(VkBuffer srcBuffer, VkBuffer dstBuffer, VkDeviceSize size); + uint32_t find_memory_type(uint32_t typeFilter, + VkMemoryPropertyFlags properties); + + VkCommandBuffer begin_single_time_commands(); + + void end_single_time_commands(VkCommandBuffer commandBuffer); + void create_command_buffers(); + + void create_sync_objects(); + void update_uniforms(uint32_t currentImage); + + void show_fps_window_title(); + void update_spectrum(uint32_t currentImage, float elapsed); + void update_ocean(uint32_t currentImage); + + void draw_frame(); + + void check_openCL_ext_mem_support(cl::Device& device); + + VkShaderModule create_shader_module(const std::vector& code); + + VkSurfaceFormatKHR choose_swap_surf_format( + const std::vector& availableFormats); + + VkPresentModeKHR choose_swap_present_mode( + const std::vector& availablePresentModes); + + VkExtent2D choose_swap_extent(const VkSurfaceCapabilitiesKHR& capabilities); + + SwapChainSupportDetails query_swap_chain_support(VkPhysicalDevice device); + + bool is_device_suitable(VkPhysicalDevice device); + + bool check_device_extension_support(VkPhysicalDevice device); + + QueueFamilyIndices find_queue_families(VkPhysicalDevice device); + + std::vector get_required_exts(); + + std::vector get_required_dev_exts(); + + bool check_validation_layer_support(); + + static VKAPI_ATTR VkBool32 VKAPI_CALL + debug_callback(VkDebugUtilsMessageSeverityFlagBitsEXT messageSeverity, + VkDebugUtilsMessageTypeFlagsEXT messageType, + const VkDebugUtilsMessengerCallbackDataEXT* pCallbackData, + void* pUserData); +}; + +#endif // OCEAN_HPP diff --git a/samples/vulkan/ocean/ocean.png b/samples/vulkan/ocean/ocean.png new file mode 100644 index 00000000..f112470b Binary files /dev/null and b/samples/vulkan/ocean/ocean.png differ diff --git a/samples/vulkan/ocean/ocean.vert b/samples/vulkan/ocean/ocean.vert new file mode 100755 index 00000000..a4a04adf --- /dev/null +++ b/samples/vulkan/ocean/ocean.vert @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2024 Mobica Limited, Marcin Hajder + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#version 450 + +layout(location = 0) out vec2 frag_tex_coord; +layout(location = 1) out vec4 ec_pos; + +layout(location = 0) in vec3 in_position; +layout(location = 1) in vec2 in_tex_coords; + +layout(set = 0, binding = 0) uniform sampler2D u_displacement_map; +layout(std140, set = 0, binding = 2) uniform ViewData +{ + uniform mat4 view_mat; + uniform mat4 proj_mat; + uniform vec3 sun_dir; + uniform float choppiness; + uniform float alt_scale; +} +view; + +void main() +{ + vec3 displ = texture(u_displacement_map, in_tex_coords).rbg; + displ.xy *= view.choppiness; + displ.z *= view.alt_scale; + vec3 ocean_vert = in_position + displ; + ec_pos = view.view_mat * vec4(ocean_vert, 1.0); + gl_Position = view.proj_mat * ec_pos; + frag_tex_coord = in_tex_coords; +} diff --git a/samples/vulkan/ocean/ocean.vert.spv b/samples/vulkan/ocean/ocean.vert.spv new file mode 100755 index 00000000..a5ac0598 Binary files /dev/null and b/samples/vulkan/ocean/ocean.vert.spv differ diff --git a/samples/vulkan/ocean/ocean_util.hpp b/samples/vulkan/ocean/ocean_util.hpp new file mode 100755 index 00000000..55775451 --- /dev/null +++ b/samples/vulkan/ocean/ocean_util.hpp @@ -0,0 +1,186 @@ +/* + * Copyright (c) 2024 Mobica Limited, Marcin Hajder + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef OCEAN_UTIL_HPP +#define OCEAN_UTIL_HPP + +#ifdef _WIN32 +#include +#include +#include +#define VK_USE_PLATFORM_WIN32_KHR +#endif + +#include +#include + +#include + +#define CL_HPP_TARGET_OPENCL_VERSION 300 + +const float DRAG_SPEED_FAC = 0.2f; +const float ROLL_SPEED_FAC = 8.f; +const int MAX_FRAMES_IN_FLIGHT = 2; + +#define test_error(errCode, msg) \ + { \ + auto errCodeResult = errCode; \ + if (errCodeResult != CL_SUCCESS) \ + { \ + print_error(errCodeResult, msg); \ + return errCode; \ + } \ + } + +static uint32_t reverse_bits(uint32_t n, uint32_t log_2_N) +{ + uint32_t r = 0; + for (uint32_t j = 0; j < log_2_N; j++) + { + r = (r << 1) + (n & 1); + n >>= 1; + } + return r; +} + +const std::vector validationLayers = { + "VK_LAYER_KHRONOS_validation", + //"VK_LAYER_LUNARG_api_dump", // useful for debugging but adds a LOT of + // output! +}; + +const std::vector deviceExtensions = { + VK_KHR_SWAPCHAIN_EXTENSION_NAME, +}; + +static VkResult CreateDebugUtilsMessengerEXT( + VkInstance instance, const VkDebugUtilsMessengerCreateInfoEXT* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkDebugUtilsMessengerEXT* pDebugMessenger) +{ + auto func = (PFN_vkCreateDebugUtilsMessengerEXT)vkGetInstanceProcAddr( + instance, "vkCreateDebugUtilsMessengerEXT"); + if (func != nullptr) + { + return func(instance, pCreateInfo, pAllocator, pDebugMessenger); + } + else + { + return VK_ERROR_EXTENSION_NOT_PRESENT; + } +} + +static void +DestroyDebugUtilsMessengerEXT(VkInstance instance, + VkDebugUtilsMessengerEXT debugMessenger, + const VkAllocationCallbacks* pAllocator) +{ + auto func = (PFN_vkDestroyDebugUtilsMessengerEXT)vkGetInstanceProcAddr( + instance, "vkDestroyDebugUtilsMessengerEXT"); + if (func != nullptr) + { + func(instance, debugMessenger, pAllocator); + } +} + +struct QueueFamilyIndices +{ + uint32_t graphicsFamily; + uint32_t presentFamily; + + QueueFamilyIndices(): graphicsFamily(~0), presentFamily(~0) {} + + bool isComplete() { return graphicsFamily != ~0 && presentFamily != ~0; } +}; + +struct SwapChainSupportDetails +{ + VkSurfaceCapabilitiesKHR capabilities; + std::vector formats; + std::vector presentModes; +}; + +struct UniformBufferObject +{ + alignas(4) glm::mat4 view_mat; + alignas(4) glm::mat4 proj_mat; + alignas(4) glm::vec3 sun_dir = glm::normalize(glm::vec3(0.f, 1.f, 1.f)); + alignas(4) std::float_t choppiness = 1.f; + alignas(4) std::float_t alt_scale = 1.f; +}; + +struct Vertex +{ + + glm::vec3 pos; + glm::vec2 tc; + + static VkVertexInputBindingDescription getBindingDescription() + { + VkVertexInputBindingDescription bindingDescription{}; + + bindingDescription.binding = 0; + bindingDescription.stride = sizeof(Vertex); + bindingDescription.inputRate = VK_VERTEX_INPUT_RATE_VERTEX; + + return bindingDescription; + } + + static std::array + getAttributeDescriptions() + { + std::array + attributeDescriptions{}; + + attributeDescriptions[0].binding = 0; + attributeDescriptions[0].location = 0; + attributeDescriptions[0].format = VK_FORMAT_R32G32B32_SFLOAT; + attributeDescriptions[0].offset = offsetof(Vertex, pos); + + attributeDescriptions[1].binding = 0; + attributeDescriptions[1].location = 1; + attributeDescriptions[1].format = VK_FORMAT_R32G32_SFLOAT; + attributeDescriptions[1].offset = offsetof(Vertex, tc); + + return attributeDescriptions; + } +}; + +struct Camera +{ + glm::vec3 eye = glm::vec3(0.0f, 0.0f, 20.0f); + glm::vec3 dir = glm::vec3(-0.57359f, 0.73945f, -0.35241f); + glm::vec3 up = glm::vec3(-0.2159f, 0.27846f, 0.93584f); + glm::vec3 rvec = glm::vec3(1.0f, 0.0f, 0.0f); + glm::vec2 begin = glm::vec2(-1.0f, -1.0f); + float yaw = 37.8f; + float pitch = 69.3649f; + bool drag = false; +}; + +struct CliOptions +{ + std::int32_t vulkan_device = -1; + + bool immediate = false; + + bool linearImages = false; + bool device_local_images = true; + bool use_external_memory = true; + bool validationLayersOn = false; +}; + +#endif // OCEAN_UTIL_HPP diff --git a/samples/vulkan/ocean/time_spectrum.cl b/samples/vulkan/ocean/time_spectrum.cl new file mode 100644 index 00000000..ad02bf22 --- /dev/null +++ b/samples/vulkan/ocean/time_spectrum.cl @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2024 Mobica Limited, Marcin Hajder + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +constant float PI = 3.14159265359; +constant float G = 9.81; +constant sampler_t sampler = CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST | CLK_NORMALIZED_COORDS_FALSE; + +typedef float2 complex; + +complex mul(complex c0, complex c1) +{ + return (complex)(c0.x * c1.x - c0.y * c1.y, c0.x * c1.y + c0.y * c1.x); +} + +complex add(complex c0, complex c1) +{ + return (complex)(c0.x + c1.x, c0.y + c1.y); +} + +complex conj(complex c) +{ + return (complex)(c.x, -c.y); +} + +kernel void spectrum( float dt, int2 patch_info, + read_only image2d_t src, write_only image2d_t dst_x, + write_only image2d_t dst_y, write_only image2d_t dst_z ) +{ + int2 uv = (int2)((int)get_global_id(0), (int)get_global_id(1)); + int res = patch_info.y; + float2 wave_vec = convert_float2(uv) - (float2)((float)(res-1)/2.f); + float2 k = (2.f * PI * wave_vec) / patch_info.x; + float k_mag = length(k); + + float w = sqrt(G * k_mag); + + float4 h0k = read_imagef(src, sampler, uv); + complex fourier_amp = (complex)(h0k.x, h0k.y); + complex fourier_amp_conj = conj((complex)(h0k.z, h0k.w)); + + float cos_wt = cos(w*dt); + float sin_wt = sin(w*dt); + + // euler formula + complex exp_iwt = (complex)(cos_wt, sin_wt); + complex exp_iwt_inv = (complex)(cos_wt, -sin_wt); + + // dy + complex h_k_t_dy = add(mul(fourier_amp, exp_iwt), (mul(fourier_amp_conj, exp_iwt_inv))); + + // dx + complex dx = (complex)(0.0,-k.x/k_mag); + complex h_k_t_dx = mul(dx, h_k_t_dy); + + // dz + complex dz = (complex)(0.0,-k.y/k_mag); + complex h_k_t_dz = mul(dz, h_k_t_dy); + + // amplitude + write_imagef(dst_y, uv, (float4)(h_k_t_dy.x, h_k_t_dy.y, 0, 1)); + + // choppiness + write_imagef(dst_x, uv, (float4)(h_k_t_dx.x, h_k_t_dx.y, 0, 1)); + write_imagef(dst_z, uv, (float4)(h_k_t_dz.x, h_k_t_dz.y, 0, 1)); +} diff --git a/samples/vulkan/ocean/twiddle.cl b/samples/vulkan/ocean/twiddle.cl new file mode 100644 index 00000000..4052eed4 --- /dev/null +++ b/samples/vulkan/ocean/twiddle.cl @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2024 Mobica Limited, Marcin Hajder + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +constant float PI = 3.14159265359; + +typedef float2 complex; + +kernel void generate( int resolution, global int * bit_reversed, write_only image2d_t dst ) +{ + int2 uv = (int2)((int)get_global_id(0), (int)get_global_id(1)); + float k = fmod(uv.y * ((float)(resolution) / pow(2.f, (float)(uv.x+1))), resolution); + complex twiddle = (complex)( cos(2.0*PI*k/(float)(resolution)), sin(2.0*PI*k/(float)(resolution))); + + int butterflyspan = (int)(pow(2.f, (float)(uv.x))); + int butterflywing; + + if (fmod(uv.y, pow(2.f, (float)(uv.x + 1))) < pow(2.f, (float)(uv.x))) + butterflywing = 1; + else + butterflywing = 0; + + // first stage, bit reversed indices + if (uv.x == 0) { + // top butterfly wing + if (butterflywing == 1) + write_imagef(dst, uv, (float4)(twiddle.x, twiddle.y, bit_reversed[(int)(uv.y)], bit_reversed[(int)(uv.y + 1)])); + // bot butterfly wing + else + write_imagef(dst, uv, (float4)(twiddle.x, twiddle.y, bit_reversed[(int)(uv.y - 1)], bit_reversed[(int)(uv.y)])); + } + // second to log2(resolution) stage + else { + // top butterfly wing + if (butterflywing == 1) + write_imagef(dst, uv, (float4)(twiddle.x, twiddle.y, uv.y, uv.y + butterflyspan)); + // bot butterfly wing + else + write_imagef(dst, uv, (float4)(twiddle.x, twiddle.y, uv.y - butterflyspan, uv.y)); + } +}