diff --git a/assets/shader.vert b/assets/shader.vert index 1d47401..ca41ab9 100644 Binary files a/assets/shader.vert and b/assets/shader.vert differ diff --git a/ext/CMakeLists.txt b/ext/CMakeLists.txt index 94c6e10..bd7eb32 100644 --- a/ext/CMakeLists.txt +++ b/ext/CMakeLists.txt @@ -27,6 +27,10 @@ target_compile_definitions(glm PUBLIC message(STATUS "[Vulkan-Headers]") add_subdirectory(src/Vulkan-Headers) +# add VulkanMemoryAllocator to build tree +message(STATUS "[VulkanMemoryAllocator]") +add_subdirectory(src/VulkanMemoryAllocator) + # setup Dear ImGui library message(STATUS "[Dear ImGui]") add_library(imgui) @@ -55,6 +59,28 @@ target_sources(imgui PRIVATE src/imgui/backends/imgui_impl_vulkan.h ) +# setup vma library (source file with VMA interface) +message(STATUS "[vma]") +add_library(vma) +add_library(vma::vma ALIAS vma) +target_link_libraries(vma PUBLIC + Vulkan::Headers + GPUOpen::VulkanMemoryAllocator +) +target_include_directories(vma SYSTEM PUBLIC + src/VulkanMemoryAllocator/include +) +target_compile_definitions(vma PUBLIC + VMA_STATIC_VULKAN_FUNCTIONS=0 + VMA_DYNAMIC_VULKAN_FUNCTIONS=1 +) +target_sources(vma PRIVATE + vk_mem_alloc.cpp +) + +# ignore compiler warnings +target_compile_options(vma PRIVATE -w) + # declare ext library target add_library(${PROJECT_NAME} INTERFACE) add_library(learn-vk::ext ALIAS ${PROJECT_NAME}) @@ -63,6 +89,7 @@ add_library(learn-vk::ext ALIAS ${PROJECT_NAME}) target_link_libraries(${PROJECT_NAME} INTERFACE glm::glm imgui::imgui + vma::vma ) # setup preprocessor defines diff --git a/ext/src.zip b/ext/src.zip index 032ad24..a308392 100644 Binary files a/ext/src.zip and b/ext/src.zip differ diff --git a/ext/vk_mem_alloc.cpp b/ext/vk_mem_alloc.cpp new file mode 100644 index 0000000..3fd28ba --- /dev/null +++ b/ext/vk_mem_alloc.cpp @@ -0,0 +1,3 @@ +#define VMA_IMPLEMENTATION + +#include diff --git a/guide/src/SUMMARY.md b/guide/src/SUMMARY.md index 6c9363b..a465269 100644 --- a/guide/src/SUMMARY.md +++ b/guide/src/SUMMARY.md @@ -33,3 +33,13 @@ - [GLSL to SPIR-V](shader_objects/glsl_to_spir_v.md) - [Drawing a Triangle](shader_objects/drawing_triangle.md) - [Graphics Pipelines](shader_objects/pipelines.md) + +# Shader Resources + +- [Memory Allocation](memory/README.md) + - [Vulkan Memory Allocator](memory/vma.md) + - [Buffers](memory/buffers.md) + - [Vertex Buffer](memory/vertex_buffer.md) + - [Command Block](memory/command_block.md) + - [Device Buffers](memory/device_buffers.md) + - [Images](memory/images.md) diff --git a/guide/src/memory/README.md b/guide/src/memory/README.md new file mode 100644 index 0000000..b58383a --- /dev/null +++ b/guide/src/memory/README.md @@ -0,0 +1,5 @@ +# Memory Allocation + +Being an explicit API, [allocating memory](https://docs.vulkan.org/guide/latest/memory_allocation.html) in Vulkan that can be used by the device is the application's responsibility. The specifics can get quite complicated, but as recommended by the spec, we shall simply defer all that to a library: [Vulkan Memory Allocator (VMA)](https://github.com/GPUOpen-LibrariesAndSDKs/VulkanMemoryAllocator). + +Vulkan exposes two kinds of objects that use such allocated memory: Buffers and Images, VMA offers transparent support for both: we just have to allocate/free buffers and images through VMA instead of the device directly. Unlike memory allocation / object construction on the CPU, there are many more parameters (than say alignment and size) to provide for the creation of buffers and images. As you might have guessed, we shall constrain ourselves to a subset that's relevant for shader resources: vertex buffers, uniform/storage buffers, and texture images. diff --git a/guide/src/memory/buffers.md b/guide/src/memory/buffers.md new file mode 100644 index 0000000..4c74806 --- /dev/null +++ b/guide/src/memory/buffers.md @@ -0,0 +1,94 @@ +# Buffers + +First add the RAII wrapper components for VMA buffers: + +```cpp +struct RawBuffer { + [[nodiscard]] auto mapped_span() const -> std::span { + return std::span{static_cast(mapped), size}; + } + + auto operator==(RawBuffer const& rhs) const -> bool = default; + + VmaAllocator allocator{}; + VmaAllocation allocation{}; + vk::Buffer buffer{}; + vk::DeviceSize size{}; + void* mapped{}; +}; + +struct BufferDeleter { + void operator()(RawBuffer const& raw_buffer) const noexcept; +}; + +// ... +void BufferDeleter::operator()(RawBuffer const& raw_buffer) const noexcept { + vmaDestroyBuffer(raw_buffer.allocator, raw_buffer.buffer, + raw_buffer.allocation); +} +``` + +Buffers can be backed by host (RAM) or device (VRAM) memory: the former is mappable and thus useful for data that changes every frame, latter is faster to access for the GPU but needs more complex methods to copy data to. Add the related types and a create function: + +```cpp +struct BufferCreateInfo { + VmaAllocator allocator; + vk::BufferUsageFlags usage; + std::uint32_t queue_family; +}; + +enum class BufferMemoryType : std::int8_t { Host, Device }; + +[[nodiscard]] auto create_buffer(BufferCreateInfo const& create_info, + BufferMemoryType memory_type, + vk::DeviceSize size) -> Buffer; + +// ... +auto vma::create_buffer(BufferCreateInfo const& create_info, + BufferMemoryType const memory_type, + vk::DeviceSize const size) -> Buffer { + if (size == 0) { + std::println(stderr, "Buffer cannot be 0-sized"); + return {}; + } + + auto allocation_ci = VmaAllocationCreateInfo{}; + allocation_ci.flags = + VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT; + auto usage = create_info.usage; + if (memory_type == BufferMemoryType::Device) { + allocation_ci.usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE; + // device buffers need to support TransferDst. + usage |= vk::BufferUsageFlagBits::eTransferDst; + } else { + allocation_ci.usage = VMA_MEMORY_USAGE_AUTO_PREFER_HOST; + // host buffers can provide mapped memory. + allocation_ci.flags |= VMA_ALLOCATION_CREATE_MAPPED_BIT; + } + + auto buffer_ci = vk::BufferCreateInfo{}; + buffer_ci.setQueueFamilyIndices(create_info.queue_family) + .setSize(size) + .setUsage(usage); + auto vma_buffer_ci = static_cast(buffer_ci); + + VmaAllocation allocation{}; + VkBuffer buffer{}; + auto allocation_info = VmaAllocationInfo{}; + auto const result = + vmaCreateBuffer(create_info.allocator, &vma_buffer_ci, &allocation_ci, + &buffer, &allocation, &allocation_info); + if (result != VK_SUCCESS) { + std::println(stderr, "Failed to create VMA Buffer"); + return {}; + } + + return RawBuffer{ + .allocator = create_info.allocator, + .allocation = allocation, + .buffer = buffer, + .size = size, + .mapped = allocation_info.pMappedData, + }; +} +``` diff --git a/guide/src/memory/command_block.md b/guide/src/memory/command_block.md new file mode 100644 index 0000000..6f5812b --- /dev/null +++ b/guide/src/memory/command_block.md @@ -0,0 +1,84 @@ +# Command Block + +Long-lived vertex buffers perform better when backed by Device memory, especially for 3D meshes. Data is transferred to device buffers in two steps: + +1. Allocate a host buffer and copy the data to its mapped memory +1. Allocate a device buffer, record a Buffer Copy operation and submit it + +The second step requires a command buffer and queue submission (_and_ waiting for the submitted work to complete). Encapsulate this behavior into a class, it will also be used for creating images: + +```cpp +class CommandBlock { + public: + explicit CommandBlock(vk::Device device, vk::Queue queue, + vk::CommandPool command_pool); + + [[nodiscard]] auto command_buffer() const -> vk::CommandBuffer { + return *m_command_buffer; + } + + void submit_and_wait(); + + private: + vk::Device m_device{}; + vk::Queue m_queue{}; + vk::UniqueCommandBuffer m_command_buffer{}; +}; +``` + +The constructor takes an existing command pool created for such ad-hoc allocations, and the queue for submission later. This way it can be passed around after creation and used by other code. + +```cpp +CommandBlock::CommandBlock(vk::Device const device, vk::Queue const queue, + vk::CommandPool const command_pool) + : m_device(device), m_queue(queue) { + // allocate a UniqueCommandBuffer which will free the underlying command + // buffer from its owning pool on destruction. + auto allocate_info = vk::CommandBufferAllocateInfo{}; + allocate_info.setCommandPool(command_pool) + .setCommandBufferCount(1) + .setLevel(vk::CommandBufferLevel::ePrimary); + // all the current VulkanHPP functions for UniqueCommandBuffer allocation + // return vectors. + auto command_buffers = m_device.allocateCommandBuffersUnique(allocate_info); + m_command_buffer = std::move(command_buffers.front()); + + // start recording commands before returning. + auto begin_info = vk::CommandBufferBeginInfo{}; + begin_info.setFlags(vk::CommandBufferUsageFlagBits::eOneTimeSubmit); + m_command_buffer->begin(begin_info); +} +``` + +`submit_and_wait()` resets the unique command buffer at the end, to free it from its command pool: + +```cpp +void CommandBlock::submit_and_wait() { + if (!m_command_buffer) { return; } + + // end recording and submit. + m_command_buffer->end(); + auto submit_info = vk::SubmitInfo2KHR{}; + auto const command_buffer_info = + vk::CommandBufferSubmitInfo{*m_command_buffer}; + submit_info.setCommandBufferInfos(command_buffer_info); + auto fence = m_device.createFenceUnique({}); + m_queue.submit2(submit_info, *fence); + + // wait for submit fence to be signaled. + static constexpr auto timeout_v = + static_cast(std::chrono::nanoseconds(30s).count()); + auto const result = m_device.waitForFences(*fence, vk::True, timeout_v); + if (result != vk::Result::eSuccess) { + std::println(stderr, "Failed to submit Command Buffer"); + } + // free the command buffer. + m_command_buffer.reset(); +} +``` + +## Multithreading considerations + +Instead of blocking the main thread on every Command Block's `submit_and_wait()`, you might be wondering if command block usage could be multithreaded. The answer is yes! But with some extra work: each thread will require its own command pool - just using one owned (unique) pool per Command Block (with no need to free the buffer) is a good starting point. All queue operations need to be synchronized, ie a critical section protected by a mutex. This includes Swapchain acquire/present calls, and Queue submissions. A `class Queue` value type that stores a copy of the `vk::Queue` and a pointer/reference to its `std::mutex` - and wraps the submit call - can be passed to command blocks. Just this much will enable asynchronous asset loading etc, as each loading thread will use its own command pool, and queue submissions all around will be critical sections. `VmaAllocator` is internally synchronized (can be disabled at build time), so performing allocations through the same allocator on multiple threads is safe. + +For multi-threaded rendering, use a Secondary command buffer per thread to record rendering commands, accumulate and execute them in the main (Primary) command buffer currently in `RenderSync`. This is not particularly helpful unless you have thousands of expensive draw calls and dozens of render passes, as recording even a hundred draws will likely be faster on a single thread. diff --git a/guide/src/memory/device_buffers.md b/guide/src/memory/device_buffers.md new file mode 100644 index 0000000..804c704 --- /dev/null +++ b/guide/src/memory/device_buffers.md @@ -0,0 +1,133 @@ +# Device Buffers + +This guide will only use device buffers for vertex buffers, where both vertex and index data will be strung together in a single VBO. The create function can thus take the data and perform the buffer copy operation before returning. In essence this return value is a "GPU const" buffer. To enable utilizing separate spans for vertices and indices (instead of forcing allocation of a contiguous bytestream and copying the data), the create function takes a slightly awkward span of spans: + +```cpp +// disparate byte spans. +using ByteSpans = std::span const>; + +// returns a Device Buffer with each byte span sequentially written. +[[nodiscard]] auto create_device_buffer(BufferCreateInfo const& create_info, + CommandBlock command_block, + ByteSpans const& byte_spans) -> Buffer; +``` + +Implement `create_device_buffer()`: + +```cpp +auto vma::create_device_buffer(BufferCreateInfo const& create_info, + CommandBlock command_block, + ByteSpans const& byte_spans) -> Buffer { + auto const total_size = std::accumulate( + byte_spans.begin(), byte_spans.end(), 0uz, + [](std::size_t const n, std::span bytes) { + return n + bytes.size(); + }); + + auto staging_ci = create_info; + staging_ci.usage = vk::BufferUsageFlagBits::eTransferSrc; + + // create staging Host Buffer with TransferSrc usage. + auto staging_buffer = + create_buffer(staging_ci, BufferMemoryType::Host, total_size); + // create the Device Buffer. + auto ret = create_buffer(create_info, BufferMemoryType::Device, total_size); + // can't do anything if either buffer creation failed. + if (!staging_buffer.get().buffer || !ret.get().buffer) { return {}; } + + // copy byte spans into staging buffer. + auto dst = staging_buffer.get().mapped_span(); + for (auto const bytes : byte_spans) { + std::memcpy(dst.data(), bytes.data(), bytes.size()); + dst = dst.subspan(bytes.size()); + } + + // record buffer copy operation. + auto buffer_copy = vk::BufferCopy2{}; + buffer_copy.setSize(total_size); + auto copy_buffer_info = vk::CopyBufferInfo2{}; + copy_buffer_info.setSrcBuffer(staging_buffer.get().buffer) + .setDstBuffer(ret.get().buffer) + .setRegions(buffer_copy); + command_block.command_buffer().copyBuffer2(copy_buffer_info); + + // submit and wait. + // waiting here is necessary to keep the staging buffer alive while the GPU + // accesses it through the recorded commands. + // this is also why the function takes ownership of the passed CommandBlock + // instead of just referencing it / taking a vk::CommandBuffer. + command_block.submit_and_wait(); + + return ret; +} +``` + +Add a command block pool to `App`, and a helper function to create command blocks: + +```cpp +void App::create_cmd_block_pool() { + auto command_pool_ci = vk::CommandPoolCreateInfo{}; + command_pool_ci + .setQueueFamilyIndex(m_gpu.queue_family) + // this flag indicates that the allocated Command Buffers will be + // short-lived. + .setFlags(vk::CommandPoolCreateFlagBits::eTransient); + m_cmd_block_pool = m_device->createCommandPoolUnique(command_pool_ci); +} + +auto App::create_command_block() const -> CommandBlock { + return CommandBlock{*m_device, m_queue, *m_cmd_block_pool}; +} +``` + +Update `create_vertex_buffer()` to create a quad with indices: + +```cpp +template +[[nodiscard]] constexpr auto to_byte_array(T const& t) { + return std::bit_cast>(t); +} + +// ... +void App::create_vertex_buffer() { + // vertices of a quad. + static constexpr auto vertices_v = std::array{ + Vertex{.position = {-0.5f, -0.5f}, .color = {1.0f, 0.0f, 0.0f}}, + Vertex{.position = {0.5f, -0.5f}, .color = {0.0f, 1.0f, 0.0f}}, + Vertex{.position = {0.5f, 0.5f}, .color = {0.0f, 0.0f, 1.0f}}, + Vertex{.position = {-0.5f, 0.5f}, .color = {1.0f, 1.0f, 0.0f}}, + }; + static constexpr auto indices_v = std::array{ + 0u, 1u, 2u, 2u, 3u, 0u, + }; + static constexpr auto vertices_bytes_v = to_byte_array(vertices_v); + static constexpr auto indices_bytes_v = to_byte_array(indices_v); + static constexpr auto total_bytes_v = + std::array, 2>{ + vertices_bytes_v, + indices_bytes_v, + }; + // we want to write total_bytes_v to a Device VertexBuffer | IndexBuffer. + m_vbo = vma::create_device_buffer(m_allocator.get(), + vk::BufferUsageFlagBits::eVertexBuffer | + vk::BufferUsageFlagBits::eIndexBuffer, + create_command_block(), total_bytes_v); +} +``` + +Update `draw()`: + +```cpp +void App::draw(vk::CommandBuffer const command_buffer) const { + m_shader->bind(command_buffer, m_framebuffer_size); + // single VBO at binding 0 at no offset. + command_buffer.bindVertexBuffers(0, m_vbo.get().buffer, vk::DeviceSize{}); + // u32 indices after offset of 4 vertices. + command_buffer.bindIndexBuffer(m_vbo.get().buffer, 4 * sizeof(Vertex), + vk::IndexType::eUint32); + // m_vbo has 6 indices. + command_buffer.drawIndexed(6, 1, 0, 0, 0); +} +``` + +![VBO Quad](./vbo_quad.png) diff --git a/guide/src/memory/images.md b/guide/src/memory/images.md new file mode 100644 index 0000000..583957c --- /dev/null +++ b/guide/src/memory/images.md @@ -0,0 +1,184 @@ +# Images + +Images have a lot more properties and creation parameters than buffers. We shall constrain ourselves to just two kinds: sampled images (textures) for shaders, and depth images for rendering. For now add the foundation types and functions: + +```cpp +struct RawImage { + auto operator==(RawImage const& rhs) const -> bool = default; + + VmaAllocator allocator{}; + VmaAllocation allocation{}; + vk::Image image{}; + vk::Extent2D extent{}; + vk::Format format{}; + std::uint32_t levels{}; +}; + +struct ImageDeleter { + void operator()(RawImage const& raw_image) const noexcept; +}; + +using Image = Scoped; + +struct ImageCreateInfo { + VmaAllocator allocator; + std::uint32_t queue_family; +}; + +[[nodiscard]] auto create_image(ImageCreateInfo const& create_info, + vk::ImageUsageFlags usage, std::uint32_t levels, + vk::Format format, vk::Extent2D extent) + -> Image; +``` + +Implementation: + +```cpp +void ImageDeleter::operator()(RawImage const& raw_image) const noexcept { + vmaDestroyImage(raw_image.allocator, raw_image.image, raw_image.allocation); +} + +// ... +auto vma::create_image(ImageCreateInfo const& create_info, + vk::ImageUsageFlags const usage, + std::uint32_t const levels, vk::Format const format, + vk::Extent2D const extent) -> Image { + if (extent.width == 0 || extent.height == 0) { + std::println(stderr, "Images cannot have 0 width or height"); + return {}; + } + auto image_ci = vk::ImageCreateInfo{}; + image_ci.setImageType(vk::ImageType::e2D) + .setExtent({extent.width, extent.height, 1}) + .setFormat(format) + .setUsage(usage) + .setArrayLayers(1) + .setMipLevels(levels) + .setSamples(vk::SampleCountFlagBits::e1) + .setTiling(vk::ImageTiling::eOptimal) + .setInitialLayout(vk::ImageLayout::eUndefined) + .setQueueFamilyIndices(create_info.queue_family); + auto const vk_image_ci = static_cast(image_ci); + + auto allocation_ci = VmaAllocationCreateInfo{}; + allocation_ci.usage = VMA_MEMORY_USAGE_AUTO; + VkImage image{}; + VmaAllocation allocation{}; + auto const result = vmaCreateImage(create_info.allocator, &vk_image_ci, + &allocation_ci, &image, &allocation, {}); + if (result != VK_SUCCESS) { + std::println(stderr, "Failed to create VMA Image"); + return {}; + } + + return RawImage{ + .allocator = create_info.allocator, + .allocation = allocation, + .image = image, + .extent = extent, + .format = format, + .levels = levels, + }; +} +``` + +For creating sampled images, we need both the image bytes and size (extent). Wrap that into a struct: + +```cpp +struct Bitmap { + std::span bytes{}; + glm::ivec2 size{}; +}; +``` + +The creation process is similar to device buffers: requiring a staging copy, but it also needs layout transitions. In short: + +1. Create the image and staging buffer +1. Transition the layout from Undefined to TransferDst +1. Record a buffer image copy operation +1. Transition the layout from TransferDst to ShaderReadOnlyOptimal + +```cpp +auto vma::create_sampled_image(ImageCreateInfo const& create_info, + CommandBlock command_block, Bitmap const& bitmap) + -> Image { + // create image. + // no mip-mapping right now: 1 level. + auto const mip_levels = 1u; + auto const usize = glm::uvec2{bitmap.size}; + auto const extent = vk::Extent2D{usize.x, usize.y}; + auto const usage = + vk::ImageUsageFlagBits::eTransferDst | vk::ImageUsageFlagBits::eSampled; + auto ret = create_image(create_info, usage, mip_levels, + vk::Format::eR8G8B8A8Srgb, extent); + + // create staging buffer. + auto const buffer_ci = BufferCreateInfo{ + .allocator = create_info.allocator, + .usage = vk::BufferUsageFlagBits::eTransferSrc, + .queue_family = create_info.queue_family, + }; + auto const staging_buffer = create_buffer(buffer_ci, BufferMemoryType::Host, + bitmap.bytes.size_bytes()); + + // can't do anything if either creation failed. + if (!ret.get().image || !staging_buffer.get().buffer) { return {}; } + + // copy bytes into staging buffer. + std::memcpy(staging_buffer.get().mapped, bitmap.bytes.data(), + bitmap.bytes.size_bytes()); + + // transition image for transfer. + auto dependency_info = vk::DependencyInfo{}; + auto subresource_range = vk::ImageSubresourceRange{}; + subresource_range.setAspectMask(vk::ImageAspectFlagBits::eColor) + .setLayerCount(1) + .setLevelCount(mip_levels); + auto barrier = vk::ImageMemoryBarrier2{}; + barrier.setImage(ret.get().image) + .setSrcQueueFamilyIndex(create_info.queue_family) + .setDstQueueFamilyIndex(create_info.queue_family) + .setOldLayout(vk::ImageLayout::eUndefined) + .setNewLayout(vk::ImageLayout::eTransferDstOptimal) + .setSubresourceRange(subresource_range) + .setSrcStageMask(vk::PipelineStageFlagBits2::eTopOfPipe) + .setSrcAccessMask(vk::AccessFlagBits2::eNone) + .setDstStageMask(vk::PipelineStageFlagBits2::eTransfer) + .setDstAccessMask(vk::AccessFlagBits2::eMemoryRead | + vk::AccessFlagBits2::eMemoryWrite); + dependency_info.setImageMemoryBarriers(barrier); + command_block.command_buffer().pipelineBarrier2(dependency_info); + + // record buffer image copy. + auto buffer_image_copy = vk::BufferImageCopy2{}; + auto subresource_layers = vk::ImageSubresourceLayers{}; + subresource_layers.setAspectMask(vk::ImageAspectFlagBits::eColor) + .setLayerCount(1) + .setLayerCount(mip_levels); + buffer_image_copy.setImageSubresource(subresource_layers) + .setImageExtent(vk::Extent3D{extent.width, extent.height, 1}); + auto copy_info = vk::CopyBufferToImageInfo2{}; + copy_info.setDstImage(ret.get().image) + .setDstImageLayout(vk::ImageLayout::eTransferDstOptimal) + .setSrcBuffer(staging_buffer.get().buffer) + .setRegions(buffer_image_copy); + command_block.command_buffer().copyBufferToImage2(copy_info); + + // transition image for sampling. + barrier.setOldLayout(barrier.newLayout) + .setNewLayout(vk::ImageLayout::eShaderReadOnlyOptimal) + .setSrcStageMask(barrier.dstStageMask) + .setSrcAccessMask(barrier.dstAccessMask) + .setDstStageMask(vk::PipelineStageFlagBits2::eAllGraphics) + .setDstAccessMask(vk::AccessFlagBits2::eMemoryRead | + vk::AccessFlagBits2::eMemoryWrite); + dependency_info.setImageMemoryBarriers(barrier); + command_block.command_buffer().pipelineBarrier2(dependency_info); + + command_block.submit_and_wait(); + + return ret; +} +``` + +Before such images can be used as textures, we need to set up Descriptor Set infrastructure. diff --git a/guide/src/memory/vbo_quad.png b/guide/src/memory/vbo_quad.png new file mode 100644 index 0000000..554785b Binary files /dev/null and b/guide/src/memory/vbo_quad.png differ diff --git a/guide/src/memory/vertex_buffer.md b/guide/src/memory/vertex_buffer.md new file mode 100644 index 0000000..75e04d3 --- /dev/null +++ b/guide/src/memory/vertex_buffer.md @@ -0,0 +1,104 @@ +# Vertex Buffer + +The goal here is to move the hard-coded vertices in the shader to application code. For the time being we will use an ad-hoc Host `vma::Buffer` and focus more on the rest of the infrastructure like vertex attributes. + +First add a new header, `vertex.hpp`: + +```cpp +struct Vertex { + glm::vec2 position{}; + glm::vec3 color{1.0f}; +}; + +// two vertex attributes: position at 0, color at 1. +constexpr auto vertex_attributes_v = std::array{ + // the format matches the type and layout of data: vec2 => 2x 32-bit floats. + vk::VertexInputAttributeDescription2EXT{0, 0, vk::Format::eR32G32Sfloat, + offsetof(Vertex, position)}, + // vec3 => 3x 32-bit floats + vk::VertexInputAttributeDescription2EXT{1, 0, vk::Format::eR32G32B32Sfloat, + offsetof(Vertex, color)}, +}; + +// one vertex binding at location 0. +constexpr auto vertex_bindings_v = std::array{ + // we are using interleaved data with a stride of sizeof(Vertex). + vk::VertexInputBindingDescription2EXT{0, sizeof(Vertex), + vk::VertexInputRate::eVertex, 1}, +}; +``` + +Add the vertex attributes and bindings to the Shader Create Info: + +```cpp +// ... +static constexpr auto vertex_input_v = ShaderVertexInput{ + .attributes = vertex_attributes_v, + .bindings = vertex_bindings_v, +}; +auto const shader_ci = ShaderProgram::CreateInfo{ + .device = *m_device, + .vertex_spirv = vertex_spirv, + .fragment_spirv = fragment_spirv, + .vertex_input = vertex_input_v, + .set_layouts = {}, +}; +// ... +``` + +With the vertex input defined, we can update the vertex shader and recompile it: + +```glsl +#version 450 core + +layout (location = 0) in vec2 a_pos; +layout (location = 1) in vec3 a_color; + +layout (location = 0) out vec3 out_color; + +void main() { + const vec2 position = a_pos; + + out_color = a_color; + gl_Position = vec4(position, 0.0, 1.0); +} +``` + +Add a VBO (Vertex Buffer Object) member and create it: + +```cpp +void App::create_vertex_buffer() { + // vertices moved from the shader. + static constexpr auto vertices_v = std::array{ + Vertex{.position = {-0.5f, -0.5f}, .color = {1.0f, 0.0f, 0.0f}}, + Vertex{.position = {0.5f, -0.5f}, .color = {0.0f, 1.0f, 0.0f}}, + Vertex{.position = {0.0f, 0.5f}, .color = {0.0f, 0.0f, 1.0f}}, + }; + + // we want to write vertices_v to a Host VertexBuffer. + auto const buffer_ci = vma::BufferCreateInfo{ + .allocator = m_allocator.get(), + .usage = vk::BufferUsageFlagBits::eVertexBuffer, + .queue_family = m_gpu.queue_family, + }; + m_vbo = vma::create_buffer(buffer_ci, vma::BufferMemoryType::Host, + sizeof(vertices_v)); + + // host buffers have a memory-mapped pointer available to memcpy data to. + std::memcpy(m_vbo.get().mapped, vertices_v.data(), sizeof(vertices_v)); +} +``` + +Bind the VBO before recording the draw call: + +```cpp +// single VBO at binding 0 at no offset. +command_buffer.bindVertexBuffers(0, m_vbo->get_raw().buffer, + vk::DeviceSize{}); +// m_vbo has 3 vertices. +command_buffer.draw(3, 1, 0, 0); +``` + +You should see the same triangle as before. But now we can use whatever set of vertices we like! The Primitive Topology is Triange List by default, so every three vertices in the array is drawn as a triangle, eg for 9 vertices: `[[0, 1, 2], [3, 4, 5], [6, 7, 8]]`, where each inner `[]` represents a triangle comprised of the vertices at those indices. Try playing around with customized vertices and topologies, use Render Doc to debug unexpected outputs / bugs. + +Host Vertex Buffers are useful for primitives that are temporary and/or frequently changing, such as UI objects. A 2D framework can use such VBOs exclusively: a simple approach would be a pool of buffers per virtual frame where for each draw a buffer is obtained from the current virtual frame's pool and vertices are copied in. diff --git a/guide/src/memory/vma.md b/guide/src/memory/vma.md new file mode 100644 index 0000000..25e90c7 --- /dev/null +++ b/guide/src/memory/vma.md @@ -0,0 +1,66 @@ +# Vulkan Memory Allocator + +VMA has full CMake support, but it is also a single-header library that requires users to "instantiate" it in a single translation unit. Isolating that into a wrapper library to minimize warning pollution etc, we create our own `vma::vma` target that compiles this source file: + +```cpp +// vk_mem_alloc.cpp +#define VMA_IMPLEMENTATION + +#include +``` + +Unlike VulkanHPP, VMA's interface is C only, thus we shall use our `Scoped` class template to wrap objects in RAII types. The first thing we need is a `VmaAllocator`, which is similar to a `vk::Device` or `GLFWwindow*`: + +```cpp +// vma.hpp +namespace lvk::vma { +struct Deleter { + void operator()(VmaAllocator allocator) const noexcept; +}; + +using Allocator = Scoped; + +[[nodiscard]] auto create_allocator(vk::Instance instance, + vk::PhysicalDevice physical_device, + vk::Device device) -> Allocator; +} // namespace lvk::vma + +// vma.cpp +void Deleter::operator()(VmaAllocator allocator) const noexcept { + vmaDestroyAllocator(allocator); +} + +// ... +auto vma::create_allocator(vk::Instance const instance, + vk::PhysicalDevice const physical_device, + vk::Device const device) -> Allocator { + auto const& dispatcher = VULKAN_HPP_DEFAULT_DISPATCHER; + // need to zero initialize C structs, unlike VulkanHPP. + auto vma_vk_funcs = VmaVulkanFunctions{}; + vma_vk_funcs.vkGetInstanceProcAddr = dispatcher.vkGetInstanceProcAddr; + vma_vk_funcs.vkGetDeviceProcAddr = dispatcher.vkGetDeviceProcAddr; + + auto allocator_ci = VmaAllocatorCreateInfo{}; + allocator_ci.physicalDevice = physical_device; + allocator_ci.device = device; + allocator_ci.pVulkanFunctions = &vma_vk_funcs; + allocator_ci.instance = instance; + VmaAllocator ret{}; + auto const result = vmaCreateAllocator(&allocator_ci, &ret); + if (result == VK_SUCCESS) { return ret; } + + throw std::runtime_error{"Failed to create Vulkan Memory Allocator"}; +} +``` + +`App` stores and creates a `vma::Allocator` object: + +```cpp +// ... +vma::Allocator m_allocator{}; // anywhere between m_device and m_shader. + +// ... +void App::create_allocator() { + m_allocator = vma::create_allocator(*m_instance, m_gpu.device, *m_device); +} +``` diff --git a/src/app.cpp b/src/app.cpp index a9e63e5..3f5839e 100644 --- a/src/app.cpp +++ b/src/app.cpp @@ -1,4 +1,6 @@ #include +#include +#include #include #include #include @@ -11,6 +13,11 @@ namespace lvk { using namespace std::chrono_literals; namespace { +template +[[nodiscard]] constexpr auto to_byte_array(T const& t) { + return std::bit_cast>(t); +} + [[nodiscard]] auto locate_assets_dir() -> fs::path { // look for '/assets/', starting from the working // directory and walking up the parent directory tree. @@ -77,10 +84,14 @@ void App::run() { create_surface(); select_gpu(); create_device(); + create_allocator(); create_swapchain(); create_render_sync(); create_imgui(); create_shader(); + create_cmd_block_pool(); + + create_vertex_buffer(); main_loop(); } @@ -228,23 +239,75 @@ void App::create_imgui() { m_imgui.emplace(imgui_ci); } +void App::create_allocator() { + m_allocator = vma::create_allocator(*m_instance, m_gpu.device, *m_device); +} + void App::create_shader() { auto const vertex_spirv = to_spir_v(asset_path("shader.vert")); auto const fragment_spirv = to_spir_v(asset_path("shader.frag")); + + static constexpr auto vertex_input_v = ShaderVertexInput{ + .attributes = vertex_attributes_v, + .bindings = vertex_bindings_v, + }; auto const shader_ci = ShaderProgram::CreateInfo{ .device = *m_device, .vertex_spirv = vertex_spirv, .fragment_spirv = fragment_spirv, - .vertex_input = {}, + .vertex_input = vertex_input_v, .set_layouts = {}, }; m_shader.emplace(shader_ci); } +void App::create_cmd_block_pool() { + auto command_pool_ci = vk::CommandPoolCreateInfo{}; + command_pool_ci + .setQueueFamilyIndex(m_gpu.queue_family) + // this flag indicates that the allocated Command Buffers will be + // short-lived. + .setFlags(vk::CommandPoolCreateFlagBits::eTransient); + m_cmd_block_pool = m_device->createCommandPoolUnique(command_pool_ci); +} + +void App::create_vertex_buffer() { + // vertices of a quad. + static constexpr auto vertices_v = std::array{ + Vertex{.position = {-0.5f, -0.5f}, .color = {1.0f, 0.0f, 0.0f}}, + Vertex{.position = {0.5f, -0.5f}, .color = {0.0f, 1.0f, 0.0f}}, + Vertex{.position = {0.5f, 0.5f}, .color = {0.0f, 0.0f, 1.0f}}, + Vertex{.position = {-0.5f, 0.5f}, .color = {1.0f, 1.0f, 0.0f}}, + }; + static constexpr auto indices_v = std::array{ + 0u, 1u, 2u, 2u, 3u, 0u, + }; + static constexpr auto vertices_bytes_v = to_byte_array(vertices_v); + static constexpr auto indices_bytes_v = to_byte_array(indices_v); + static constexpr auto total_bytes_v = + std::array, 2>{ + vertices_bytes_v, + indices_bytes_v, + }; + // we want to write total_bytes_v to a Device VertexBuffer | IndexBuffer. + auto const buffer_ci = vma::BufferCreateInfo{ + .allocator = m_allocator.get(), + .usage = vk::BufferUsageFlagBits::eVertexBuffer | + vk::BufferUsageFlagBits::eIndexBuffer, + .queue_family = m_gpu.queue_family, + }; + m_vbo = vma::create_device_buffer(buffer_ci, create_command_block(), + total_bytes_v); +} + auto App::asset_path(std::string_view const uri) const -> fs::path { return m_assets_dir / uri; } +auto App::create_command_block() const -> CommandBlock { + return CommandBlock{*m_device, m_queue, *m_cmd_block_pool}; +} + void App::main_loop() { while (glfwWindowShouldClose(m_window.get()) == GLFW_FALSE) { glfwPollEvents(); @@ -419,7 +482,12 @@ void App::inspect() { void App::draw(vk::CommandBuffer const command_buffer) const { m_shader->bind(command_buffer, m_framebuffer_size); - // current shader has hard-coded logic for 3 vertices. - command_buffer.draw(3, 1, 0, 0); + // single VBO at binding 0 at no offset. + command_buffer.bindVertexBuffers(0, m_vbo.get().buffer, vk::DeviceSize{}); + // u32 indices after offset of 4 vertices. + command_buffer.bindIndexBuffer(m_vbo.get().buffer, 4 * sizeof(Vertex), + vk::IndexType::eUint32); + // m_vbo has 6 indices. + command_buffer.drawIndexed(6, 1, 0, 0, 0); } } // namespace lvk diff --git a/src/app.hpp b/src/app.hpp index 7f5d47a..5bd094d 100644 --- a/src/app.hpp +++ b/src/app.hpp @@ -1,10 +1,12 @@ #pragma once +#include #include #include #include #include #include #include +#include #include #include @@ -35,9 +37,13 @@ class App { void create_swapchain(); void create_render_sync(); void create_imgui(); + void create_allocator(); void create_shader(); + void create_cmd_block_pool(); + void create_vertex_buffer(); [[nodiscard]] auto asset_path(std::string_view uri) const -> fs::path; + [[nodiscard]] auto create_command_block() const -> CommandBlock; void main_loop(); @@ -61,11 +67,14 @@ class App { vk::UniqueSurfaceKHR m_surface{}; Gpu m_gpu{}; // not an RAII member. vk::UniqueDevice m_device{}; - vk::Queue m_queue{}; // not an RAII member. + vk::Queue m_queue{}; // not an RAII member. + vma::Allocator m_allocator{}; // anywhere between m_device and m_shader. std::optional m_swapchain{}; // command pool for all render Command Buffers. vk::UniqueCommandPool m_render_cmd_pool{}; + // command pool for all Command Blocks. + vk::UniqueCommandPool m_cmd_block_pool{}; // Sync and Command Buffer for virtual frames. Buffered m_render_sync{}; // Current virtual frame index. @@ -75,6 +84,8 @@ class App { std::optional m_shader{}; + vma::Buffer m_vbo{}; + glm::ivec2 m_framebuffer_size{}; std::optional m_render_target{}; bool m_wireframe{}; diff --git a/src/bitmap.hpp b/src/bitmap.hpp new file mode 100644 index 0000000..41004a2 --- /dev/null +++ b/src/bitmap.hpp @@ -0,0 +1,11 @@ +#pragma once +#include +#include +#include + +namespace lvk { +struct Bitmap { + std::span bytes{}; + glm::ivec2 size{}; +}; +} // namespace lvk diff --git a/src/command_block.cpp b/src/command_block.cpp new file mode 100644 index 0000000..1a1258e --- /dev/null +++ b/src/command_block.cpp @@ -0,0 +1,50 @@ +#include +#include +#include + +namespace lvk { +using namespace std::chrono_literals; + +CommandBlock::CommandBlock(vk::Device const device, vk::Queue const queue, + vk::CommandPool const command_pool) + : m_device(device), m_queue(queue) { + // allocate a UniqueCommandBuffer which will free the underlying command + // buffer from its owning pool on destruction. + auto allocate_info = vk::CommandBufferAllocateInfo{}; + allocate_info.setCommandPool(command_pool) + .setCommandBufferCount(1) + .setLevel(vk::CommandBufferLevel::ePrimary); + // all the current VulkanHPP functions for UniqueCommandBuffer allocation + // return vectors. + auto command_buffers = m_device.allocateCommandBuffersUnique(allocate_info); + m_command_buffer = std::move(command_buffers.front()); + + // start recording commands before returning. + auto begin_info = vk::CommandBufferBeginInfo{}; + begin_info.setFlags(vk::CommandBufferUsageFlagBits::eOneTimeSubmit); + m_command_buffer->begin(begin_info); +} + +void CommandBlock::submit_and_wait() { + if (!m_command_buffer) { return; } + + // end recording and submit. + m_command_buffer->end(); + auto submit_info = vk::SubmitInfo2KHR{}; + auto const command_buffer_info = + vk::CommandBufferSubmitInfo{*m_command_buffer}; + submit_info.setCommandBufferInfos(command_buffer_info); + auto fence = m_device.createFenceUnique({}); + m_queue.submit2(submit_info, *fence); + + // wait for submit fence to be signaled. + static constexpr auto timeout_v = + static_cast(std::chrono::nanoseconds(30s).count()); + auto const result = m_device.waitForFences(*fence, vk::True, timeout_v); + if (result != vk::Result::eSuccess) { + std::println(stderr, "Failed to submit Command Buffer"); + } + // free the command buffer. + m_command_buffer.reset(); +} +} // namespace lvk diff --git a/src/command_block.hpp b/src/command_block.hpp new file mode 100644 index 0000000..a5719c8 --- /dev/null +++ b/src/command_block.hpp @@ -0,0 +1,21 @@ +#pragma once +#include + +namespace lvk { +class CommandBlock { + public: + explicit CommandBlock(vk::Device device, vk::Queue queue, + vk::CommandPool command_pool); + + [[nodiscard]] auto command_buffer() const -> vk::CommandBuffer { + return *m_command_buffer; + } + + void submit_and_wait(); + + private: + vk::Device m_device{}; + vk::Queue m_queue{}; + vk::UniqueCommandBuffer m_command_buffer{}; +}; +} // namespace lvk diff --git a/src/glsl/shader.vert b/src/glsl/shader.vert index 8efde56..edebf18 100644 --- a/src/glsl/shader.vert +++ b/src/glsl/shader.vert @@ -1,22 +1,13 @@ #version 450 core +layout (location = 0) in vec2 a_pos; +layout (location = 1) in vec3 a_color; + layout (location = 0) out vec3 out_color; void main() { - const vec2 positions[] = { - vec2(-0.5, -0.5), - vec2(0.5, -0.5), - vec2(0.0, 0.5), - }; - - const vec3 colors[] = { - vec3(1.0, 0.0, 0.0), - vec3(0.0, 1.0, 0.0), - vec3(0.0, 0.0, 1.0), - }; - - const vec2 position = positions[gl_VertexIndex]; + const vec2 position = a_pos; - out_color = colors[gl_VertexIndex]; + out_color = a_color; gl_Position = vec4(position, 0.0, 1.0); } diff --git a/src/scoped.hpp b/src/scoped.hpp index 462d6d9..73b6dec 100644 --- a/src/scoped.hpp +++ b/src/scoped.hpp @@ -4,8 +4,7 @@ namespace lvk { template -concept Scopeable = - std::equality_comparable && std::is_default_constructible_v; +concept Scopeable = std::equality_comparable; template class Scoped { diff --git a/src/vertex.hpp b/src/vertex.hpp new file mode 100644 index 0000000..a8c003c --- /dev/null +++ b/src/vertex.hpp @@ -0,0 +1,28 @@ +#pragma once +#include +#include +#include + +namespace lvk { +struct Vertex { + glm::vec2 position{}; + glm::vec3 color{1.0f}; +}; + +// two vertex attributes: position at 0, color at 1. +constexpr auto vertex_attributes_v = std::array{ + // the format matches the type and layout of data: vec2 => 2x 32-bit floats. + vk::VertexInputAttributeDescription2EXT{0, 0, vk::Format::eR32G32Sfloat, + offsetof(Vertex, position)}, + // vec3 => 3x 32-bit floats + vk::VertexInputAttributeDescription2EXT{1, 0, vk::Format::eR32G32B32Sfloat, + offsetof(Vertex, color)}, +}; + +// one vertex binding at location 0. +constexpr auto vertex_bindings_v = std::array{ + // we are using interleaved data with a stride of sizeof(Vertex). + vk::VertexInputBindingDescription2EXT{0, sizeof(Vertex), + vk::VertexInputRate::eVertex, 1}, +}; +} // namespace lvk diff --git a/src/vma.cpp b/src/vma.cpp new file mode 100644 index 0000000..94d9c87 --- /dev/null +++ b/src/vma.cpp @@ -0,0 +1,259 @@ +#include +#include +#include +#include + +namespace lvk { +namespace vma { +void Deleter::operator()(VmaAllocator allocator) const noexcept { + vmaDestroyAllocator(allocator); +} + +void BufferDeleter::operator()(RawBuffer const& raw_buffer) const noexcept { + vmaDestroyBuffer(raw_buffer.allocator, raw_buffer.buffer, + raw_buffer.allocation); +} + +void ImageDeleter::operator()(RawImage const& raw_image) const noexcept { + vmaDestroyImage(raw_image.allocator, raw_image.image, raw_image.allocation); +} +} // namespace vma + +auto vma::create_allocator(vk::Instance const instance, + vk::PhysicalDevice const physical_device, + vk::Device const device) -> Allocator { + auto const& dispatcher = VULKAN_HPP_DEFAULT_DISPATCHER; + // need to zero initialize C structs, unlike VulkanHPP. + auto vma_vk_funcs = VmaVulkanFunctions{}; + vma_vk_funcs.vkGetInstanceProcAddr = dispatcher.vkGetInstanceProcAddr; + vma_vk_funcs.vkGetDeviceProcAddr = dispatcher.vkGetDeviceProcAddr; + + auto allocator_ci = VmaAllocatorCreateInfo{}; + allocator_ci.physicalDevice = physical_device; + allocator_ci.device = device; + allocator_ci.pVulkanFunctions = &vma_vk_funcs; + allocator_ci.instance = instance; + VmaAllocator ret{}; + auto const result = vmaCreateAllocator(&allocator_ci, &ret); + if (result == VK_SUCCESS) { return ret; } + + throw std::runtime_error{"Failed to create Vulkan Memory Allocator"}; +} + +auto vma::create_buffer(BufferCreateInfo const& create_info, + BufferMemoryType const memory_type, + vk::DeviceSize const size) -> Buffer { + if (size == 0) { + std::println(stderr, "Buffer cannot be 0-sized"); + return {}; + } + + auto allocation_ci = VmaAllocationCreateInfo{}; + allocation_ci.flags = + VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT; + auto usage = create_info.usage; + if (memory_type == BufferMemoryType::Device) { + allocation_ci.usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE; + // device buffers need to support TransferDst. + usage |= vk::BufferUsageFlagBits::eTransferDst; + } else { + allocation_ci.usage = VMA_MEMORY_USAGE_AUTO_PREFER_HOST; + // host buffers can provide mapped memory. + allocation_ci.flags |= VMA_ALLOCATION_CREATE_MAPPED_BIT; + } + + auto buffer_ci = vk::BufferCreateInfo{}; + buffer_ci.setQueueFamilyIndices(create_info.queue_family) + .setSize(size) + .setUsage(usage); + auto vma_buffer_ci = static_cast(buffer_ci); + + VmaAllocation allocation{}; + VkBuffer buffer{}; + auto allocation_info = VmaAllocationInfo{}; + auto const result = + vmaCreateBuffer(create_info.allocator, &vma_buffer_ci, &allocation_ci, + &buffer, &allocation, &allocation_info); + if (result != VK_SUCCESS) { + std::println(stderr, "Failed to create VMA Buffer"); + return {}; + } + + return RawBuffer{ + .allocator = create_info.allocator, + .allocation = allocation, + .buffer = buffer, + .size = size, + .mapped = allocation_info.pMappedData, + }; +} + +auto vma::create_device_buffer(BufferCreateInfo const& create_info, + CommandBlock command_block, + ByteSpans const& byte_spans) -> Buffer { + auto const total_size = std::accumulate( + byte_spans.begin(), byte_spans.end(), 0uz, + [](std::size_t const n, std::span bytes) { + return n + bytes.size(); + }); + + auto staging_ci = create_info; + staging_ci.usage = vk::BufferUsageFlagBits::eTransferSrc; + + // create staging Host Buffer with TransferSrc usage. + auto staging_buffer = + create_buffer(staging_ci, BufferMemoryType::Host, total_size); + // create the Device Buffer. + auto ret = create_buffer(create_info, BufferMemoryType::Device, total_size); + // can't do anything if either buffer creation failed. + if (!staging_buffer.get().buffer || !ret.get().buffer) { return {}; } + + // copy byte spans into staging buffer. + auto dst = staging_buffer.get().mapped_span(); + for (auto const bytes : byte_spans) { + std::memcpy(dst.data(), bytes.data(), bytes.size()); + dst = dst.subspan(bytes.size()); + } + + // record buffer copy operation. + auto buffer_copy = vk::BufferCopy2{}; + buffer_copy.setSize(total_size); + auto copy_buffer_info = vk::CopyBufferInfo2{}; + copy_buffer_info.setSrcBuffer(staging_buffer.get().buffer) + .setDstBuffer(ret.get().buffer) + .setRegions(buffer_copy); + command_block.command_buffer().copyBuffer2(copy_buffer_info); + + // submit and wait. + // waiting here is necessary to keep the staging buffer alive while the GPU + // accesses it through the recorded commands. + // this is also why the function takes ownership of the passed CommandBlock + // instead of just referencing it / taking a vk::CommandBuffer. + command_block.submit_and_wait(); + + return ret; +} + +auto vma::create_image(ImageCreateInfo const& create_info, + vk::ImageUsageFlags const usage, + std::uint32_t const levels, vk::Format const format, + vk::Extent2D const extent) -> Image { + if (extent.width == 0 || extent.height == 0) { + std::println(stderr, "Images cannot have 0 width or height"); + return {}; + } + auto image_ci = vk::ImageCreateInfo{}; + image_ci.setImageType(vk::ImageType::e2D) + .setExtent({extent.width, extent.height, 1}) + .setFormat(format) + .setUsage(usage) + .setArrayLayers(1) + .setMipLevels(levels) + .setSamples(vk::SampleCountFlagBits::e1) + .setTiling(vk::ImageTiling::eOptimal) + .setInitialLayout(vk::ImageLayout::eUndefined) + .setQueueFamilyIndices(create_info.queue_family); + auto const vk_image_ci = static_cast(image_ci); + + auto allocation_ci = VmaAllocationCreateInfo{}; + allocation_ci.usage = VMA_MEMORY_USAGE_AUTO; + VkImage image{}; + VmaAllocation allocation{}; + auto const result = vmaCreateImage(create_info.allocator, &vk_image_ci, + &allocation_ci, &image, &allocation, {}); + if (result != VK_SUCCESS) { + std::println(stderr, "Failed to create VMA Image"); + return {}; + } + + return RawImage{ + .allocator = create_info.allocator, + .allocation = allocation, + .image = image, + .extent = extent, + .format = format, + .levels = levels, + }; +} + +auto vma::create_sampled_image(ImageCreateInfo const& create_info, + CommandBlock command_block, Bitmap const& bitmap) + -> Image { + // create image. + // no mip-mapping right now: 1 level. + auto const mip_levels = 1u; + auto const usize = glm::uvec2{bitmap.size}; + auto const extent = vk::Extent2D{usize.x, usize.y}; + auto const usage = + vk::ImageUsageFlagBits::eTransferDst | vk::ImageUsageFlagBits::eSampled; + auto ret = create_image(create_info, usage, mip_levels, + vk::Format::eR8G8B8A8Srgb, extent); + + // create staging buffer. + auto const buffer_ci = BufferCreateInfo{ + .allocator = create_info.allocator, + .usage = vk::BufferUsageFlagBits::eTransferSrc, + .queue_family = create_info.queue_family, + }; + auto const staging_buffer = create_buffer(buffer_ci, BufferMemoryType::Host, + bitmap.bytes.size_bytes()); + + // can't do anything if either creation failed. + if (!ret.get().image || !staging_buffer.get().buffer) { return {}; } + + // copy bytes into staging buffer. + std::memcpy(staging_buffer.get().mapped, bitmap.bytes.data(), + bitmap.bytes.size_bytes()); + + // transition image for transfer. + auto dependency_info = vk::DependencyInfo{}; + auto subresource_range = vk::ImageSubresourceRange{}; + subresource_range.setAspectMask(vk::ImageAspectFlagBits::eColor) + .setLayerCount(1) + .setLevelCount(mip_levels); + auto barrier = vk::ImageMemoryBarrier2{}; + barrier.setImage(ret.get().image) + .setSrcQueueFamilyIndex(create_info.queue_family) + .setDstQueueFamilyIndex(create_info.queue_family) + .setOldLayout(vk::ImageLayout::eUndefined) + .setNewLayout(vk::ImageLayout::eTransferDstOptimal) + .setSubresourceRange(subresource_range) + .setSrcStageMask(vk::PipelineStageFlagBits2::eTopOfPipe) + .setSrcAccessMask(vk::AccessFlagBits2::eNone) + .setDstStageMask(vk::PipelineStageFlagBits2::eTransfer) + .setDstAccessMask(vk::AccessFlagBits2::eMemoryRead | + vk::AccessFlagBits2::eMemoryWrite); + dependency_info.setImageMemoryBarriers(barrier); + command_block.command_buffer().pipelineBarrier2(dependency_info); + + // record buffer image copy. + auto buffer_image_copy = vk::BufferImageCopy2{}; + auto subresource_layers = vk::ImageSubresourceLayers{}; + subresource_layers.setAspectMask(vk::ImageAspectFlagBits::eColor) + .setLayerCount(1) + .setLayerCount(mip_levels); + buffer_image_copy.setImageSubresource(subresource_layers) + .setImageExtent(vk::Extent3D{extent.width, extent.height, 1}); + auto copy_info = vk::CopyBufferToImageInfo2{}; + copy_info.setDstImage(ret.get().image) + .setDstImageLayout(vk::ImageLayout::eTransferDstOptimal) + .setSrcBuffer(staging_buffer.get().buffer) + .setRegions(buffer_image_copy); + command_block.command_buffer().copyBufferToImage2(copy_info); + + // transition image for sampling. + barrier.setOldLayout(barrier.newLayout) + .setNewLayout(vk::ImageLayout::eShaderReadOnlyOptimal) + .setSrcStageMask(barrier.dstStageMask) + .setSrcAccessMask(barrier.dstAccessMask) + .setDstStageMask(vk::PipelineStageFlagBits2::eAllGraphics) + .setDstAccessMask(vk::AccessFlagBits2::eMemoryRead | + vk::AccessFlagBits2::eMemoryWrite); + dependency_info.setImageMemoryBarriers(barrier); + command_block.command_buffer().pipelineBarrier2(dependency_info); + + command_block.submit_and_wait(); + + return ret; +} +} // namespace lvk diff --git a/src/vma.hpp b/src/vma.hpp new file mode 100644 index 0000000..2ea912e --- /dev/null +++ b/src/vma.hpp @@ -0,0 +1,89 @@ +#pragma once +#include +#include +#include +#include +#include + +namespace lvk::vma { +struct Deleter { + void operator()(VmaAllocator allocator) const noexcept; +}; + +using Allocator = Scoped; + +[[nodiscard]] auto create_allocator(vk::Instance instance, + vk::PhysicalDevice physical_device, + vk::Device device) -> Allocator; + +struct RawBuffer { + [[nodiscard]] auto mapped_span() const -> std::span { + return std::span{static_cast(mapped), size}; + } + + auto operator==(RawBuffer const& rhs) const -> bool = default; + + VmaAllocator allocator{}; + VmaAllocation allocation{}; + vk::Buffer buffer{}; + vk::DeviceSize size{}; + void* mapped{}; +}; + +struct BufferDeleter { + void operator()(RawBuffer const& raw_buffer) const noexcept; +}; + +using Buffer = Scoped; + +struct BufferCreateInfo { + VmaAllocator allocator; + vk::BufferUsageFlags usage; + std::uint32_t queue_family; +}; + +enum class BufferMemoryType : std::int8_t { Host, Device }; + +[[nodiscard]] auto create_buffer(BufferCreateInfo const& create_info, + BufferMemoryType memory_type, + vk::DeviceSize size) -> Buffer; + +// disparate byte spans. +using ByteSpans = std::span const>; + +// returns a Device Buffer with each byte span sequentially written. +[[nodiscard]] auto create_device_buffer(BufferCreateInfo const& create_info, + CommandBlock command_block, + ByteSpans const& byte_spans) -> Buffer; + +struct RawImage { + auto operator==(RawImage const& rhs) const -> bool = default; + + VmaAllocator allocator{}; + VmaAllocation allocation{}; + vk::Image image{}; + vk::Extent2D extent{}; + vk::Format format{}; + std::uint32_t levels{}; +}; + +struct ImageDeleter { + void operator()(RawImage const& raw_image) const noexcept; +}; + +using Image = Scoped; + +struct ImageCreateInfo { + VmaAllocator allocator; + std::uint32_t queue_family; +}; + +[[nodiscard]] auto create_image(ImageCreateInfo const& create_info, + vk::ImageUsageFlags usage, std::uint32_t levels, + vk::Format format, vk::Extent2D extent) + -> Image; + +[[nodiscard]] auto create_sampled_image(ImageCreateInfo const& create_info, + CommandBlock command_block, + Bitmap const& bitmap) -> Image; +} // namespace lvk::vma