diff --git a/backends/vulkan/runtime/api/containers/Tensor.cpp b/backends/vulkan/runtime/api/containers/Tensor.cpp index e9437e3bd09..fedb0d7f173 100644 --- a/backends/vulkan/runtime/api/containers/Tensor.cpp +++ b/backends/vulkan/runtime/api/containers/Tensor.cpp @@ -567,6 +567,7 @@ vTensor::vTensor( max_ubo_nbytes_{ calculate_max_ubo_nbytes(min_nbytes_per_ubo_, storage_type)}, uniforms_(), + buffer_meta_(), // Construct Tensor storage storage_(std::make_shared( context, @@ -611,6 +612,7 @@ vTensor::vTensor( max_ubo_nbytes_{ calculate_max_ubo_nbytes(min_nbytes_per_ubo_, utils::kTexture3D)}, uniforms_(), + buffer_meta_(), // Construct Tensor storage storage_(std::make_shared(context, image)) { uniform_data_ = std::make_shared(UniformData{ @@ -634,6 +636,7 @@ vTensor::vTensor(vTensor& other) min_nbytes_per_ubo_{other.min_nbytes_per_ubo_}, max_ubo_nbytes_{other.max_ubo_nbytes_}, uniforms_(), + buffer_meta_(), // Copy Tensor storage storage_(other.storage_) { uniform_data_ = std::make_shared(*other.get_uniform_data()); @@ -659,6 +662,7 @@ vTensor::vTensor( min_nbytes_per_ubo_{other.min_nbytes_per_ubo_}, max_ubo_nbytes_{other.max_ubo_nbytes_}, uniforms_(), + buffer_meta_(), // Copy Tensor storage storage_(other.storage_) { uniform_data_ = std::make_shared(UniformData{ @@ -711,6 +715,38 @@ uint32_t vTensor::UniformData::write_attribute( return 0; } +vTensor::BufferMetadata::BufferMetadata( + std::vector& src_sizes, + std::vector& src_dim_order, + std::vector& src_strides, + size_t src_numel) { + update(src_sizes, src_dim_order, src_strides, src_numel); +} + +void vTensor::BufferMetadata::update( + std::vector& src_sizes, + std::vector& src_dim_order, + std::vector& src_strides, + size_t src_numel) { + int32_t fixed_ndim = utils::safe_downcast(kTensorDimLimit); + + std::vector fu_sizes = flip_and_unsqueeze( + src_sizes, kTensorSizes, src_numel, fixed_ndim); + std::vector fu_dim_order = flip_and_unsqueeze( + src_dim_order, kTensorDimOrder, src_numel, fixed_ndim); + std::vector fu_strides = flip_and_unsqueeze( + src_strides, kTensorStrides, src_numel, fixed_ndim); + + for (int i = 0; i < fixed_ndim; ++i) { + sizes[i] = fu_sizes.at(i); + dim_order[i] = fu_dim_order.at(i); + strides[i] = fu_strides.at(i); + } + + ndim = utils::safe_downcast(src_sizes.size()); + numel = utils::safe_downcast(src_numel); +} + vkapi::VulkanImage& vTensor::image( vkapi::PipelineBarrier& pipeline_barrier, const vkapi::PipelineStageFlags stage) & { @@ -799,6 +835,15 @@ const vkapi::BufferBindInfo vTensor::numel_ubo() { return metadata_ubo_impl(&numel_uniform_offset_, uniform_data_->numel); } +const vkapi::BufferBindInfo vTensor::buffer_meta_ubo() { + size_t ubo_nbytes = sizeof(BufferMetadata); + if (!buffer_meta_.buffer()) { + BufferMetadata data(sizes_, dim_order_, strides_, numel_); + buffer_meta_ = ParamsBuffer(storage_->context_, data); + } + return vkapi::BufferBindInfo(buffer_meta_.buffer(), 0, ubo_nbytes); +} + VkMemoryRequirements vTensor::get_memory_requirements() const { switch (storage_type()) { case utils::kBuffer: @@ -875,6 +920,11 @@ void vTensor::update_metadata() { uniforms_.update( uniform_data_->logical_limits.limits, logical_limits_uniform_offset_); } + + if (buffer_meta_.buffer()) { + BufferMetadata data(sizes_, dim_order_, strides_, numel_); + buffer_meta_.update(data); + } } void vTensor::check_sizes(const std::vector& sizes) const { diff --git a/backends/vulkan/runtime/api/containers/Tensor.h b/backends/vulkan/runtime/api/containers/Tensor.h index fefbd2aa71a..eb0e09dbd81 100644 --- a/backends/vulkan/runtime/api/containers/Tensor.h +++ b/backends/vulkan/runtime/api/containers/Tensor.h @@ -19,6 +19,8 @@ namespace vkcompute { namespace api { +static constexpr size_t kTensorDimLimit = 8; + /* * Given a GPUMemoryLayout value, produce a dim order vector that matches the * given memory layout. The produced dim order vector will be in the NCHW @@ -262,6 +264,26 @@ class vTensor final { const Attribute attr); }; + struct BufferMetadata { + uint32_t sizes[kTensorDimLimit]; + uint32_t dim_order[kTensorDimLimit]; + uint32_t strides[kTensorDimLimit]; + uint32_t ndim; + uint32_t numel; + + BufferMetadata( + std::vector& sizes, + std::vector& dim_order, + std::vector& strides, + size_t numel); + + void update( + std::vector& sizes, + std::vector& dim_order, + std::vector& strides, + size_t numel); + }; + private: /* * "Core" tensor metadata. They are the minimum amount of information required @@ -332,6 +354,11 @@ class vTensor final { */ ParamsBuffer uniforms_; + /* + * Used to store data for BufferMetadata to pass to shaders as buffer_meta_ubo + */ + ParamsBuffer buffer_meta_; + uint32_t uniforms_size_ = 0u; uint32_t sizes_uniform_offset_ = kUniformOffsetUnset; uint32_t dim_order_uniform_offset_ = kUniformOffsetUnset; @@ -557,6 +584,8 @@ class vTensor final { const vkapi::BufferBindInfo numel_ubo(); + const vkapi::BufferBindInfo buffer_meta_ubo(); + public: inline size_t staging_buffer_numel() const { return storage_->buffer_len(); diff --git a/backends/vulkan/runtime/graph/ComputeGraph.h b/backends/vulkan/runtime/graph/ComputeGraph.h index 7686aa65025..4257f63fab6 100644 --- a/backends/vulkan/runtime/graph/ComputeGraph.h +++ b/backends/vulkan/runtime/graph/ComputeGraph.h @@ -357,6 +357,10 @@ class ComputeGraph final { return values_.at(idx).toConstTensor().has_buffer_storage(); } + inline bool is_texture_storage(const ValueRef idx) const { + return !is_buffer_storage(idx); + } + /* * Checks that the following is true: * 1. The value at `idx` is a tensor @@ -411,6 +415,10 @@ class ComputeGraph final { return values_.at(idx).toTensor().sizes_ubo(); } + inline vkapi::BufferBindInfo buffer_meta_ubo(const ValueRef idx) { + return values_.at(idx).toTensor().buffer_meta_ubo(); + } + inline vkapi::BufferBindInfo strides_ubo(const ValueRef idx) { return values_.at(idx).toTensor().strides_ubo(); } diff --git a/backends/vulkan/runtime/graph/ops/glsl/binary_op.glsl b/backends/vulkan/runtime/graph/ops/glsl/binary_op.glsl index f2a9e9cfdac..6f2a93667ea 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/binary_op.glsl +++ b/backends/vulkan/runtime/graph/ops/glsl/binary_op.glsl @@ -34,6 +34,8 @@ $if IS_COMPARISON_OP: layout(std430) buffer; +#include "indexing.glslh" + $if IS_COMPARISON_OP: ${layout_declare_tensor(B, "w", "t_out", "uint8", STORAGE)} $else: @@ -43,13 +45,11 @@ ${layout_declare_tensor(B, "r", "t_in", DTYPE, STORAGE)} ${layout_declare_tensor(B, "r", "t_other", DTYPE, STORAGE)} $if STORAGE == "buffer": + ${layout_declare_ubo(B, "BufferMetadata", "outp")} + ${layout_declare_ubo(B, "BufferMetadata", "inp")} + ${layout_declare_ubo(B, "BufferMetadata", "other")} + layout(push_constant) uniform restrict Block { - ivec4 in_sizes; - ivec4 other_sizes; - ivec4 out_strides; - ivec4 in_strides; - ivec4 other_strides; - int out_numel; float alpha; }; $else: @@ -83,25 +83,30 @@ $else: #ifdef USING_BUFFER void main() { - const int out_bufi = ivec3(gl_GlobalInvocationID).x; - if (out_bufi >= out_numel) { + const uint out_bufi = gl_GlobalInvocationID.x; + if (out_bufi >= numel(outp)) { return; } // Simple case; no broadcasting - if (in_sizes == other_sizes) { + if (are_equal(inp, other)) { t_out[out_bufi] = T(op(t_in[out_bufi], t_other[out_bufi], T(alpha))); return; } - const ivec4 out_tidx = bufi_to_tidx(out_bufi, out_strides, out_dim_order); - const ivec4 in_tidx = min(out_tidx, in_sizes - 1); - const ivec4 other_tidx = min(out_tidx, other_sizes - 1); + TensorIndex outp_tidx; + linear_idx_to_tensor_idx(outp, out_bufi, outp_tidx); + + TensorIndex inp_tidx = outp_tidx; + clamp_tensor_idx(inp, inp_tidx); + + TensorIndex other_tidx = outp_tidx; + clamp_tensor_idx(other, other_tidx); - const int in_bufi = tidx_to_bufi(in_tidx, in_strides); - const int other_bufi = tidx_to_bufi(other_tidx, other_strides); + uint inp_bufi = tensor_idx_to_linear_idx(inp, inp_tidx); + uint other_bufi = tensor_idx_to_linear_idx(other, other_tidx); - t_out[out_bufi] = T(op(t_in[in_bufi], t_other[other_bufi], T(alpha))); + t_out[out_bufi] = T(op(t_in[inp_bufi], t_other[other_bufi], T(alpha))); } #else // USING_TEXTURE diff --git a/backends/vulkan/runtime/graph/ops/glsl/buffer_to_nchw.glsl b/backends/vulkan/runtime/graph/ops/glsl/buffer_to_nchw.glsl index 423c4df2679..6d164ae2645 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/buffer_to_nchw.glsl +++ b/backends/vulkan/runtime/graph/ops/glsl/buffer_to_nchw.glsl @@ -4,40 +4,33 @@ #define T ${buffer_scalar_type(DTYPE)} -#include "indexing_utils.h" - ${define_required_extensions(DTYPE)} layout(std430) buffer; -${layout_declare_tensor(0, "w", "nchw_buf", DTYPE, STORAGE)} -${layout_declare_tensor(1, "r", "t_in", DTYPE, STORAGE)} +#include "indexing.glslh" + +${layout_declare_tensor(B, "w", "nchw_buf", DTYPE, STORAGE)} +${layout_declare_tensor(B, "r", "t_inp", DTYPE, STORAGE)} -$if USE_PUSH_CONST: - layout(push_constant) uniform restrict Block { - ivec4 in_sizes; - ivec4 in_strides; - int numel; - }; -$else: - ${layout_declare_ubo(2, "ivec4", "in_sizes")} - ${layout_declare_ubo(3, "ivec4", "in_strides")} - ${layout_declare_ubo(4, "int", "numel")} +${layout_declare_ubo(B, "BufferMetadata", "inp")} layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in; // This constant is unused in this shader but is kept so that the signature is // consistent with image_to_nchw. -layout(constant_id = 3) const int UNUSED_packed_dim = W_DIM; +${layout_declare_spec_const(C, "int", "unused", "0")} void main() { - int nchwi = int(gl_GlobalInvocationID.x); - if (nchwi >= numel) { + uint inp_bufi = gl_GlobalInvocationID.x; + if (inp_bufi>= numel(inp)) { return; } - ivec4 in_tidx = nchwi_to_tidx(nchwi, in_sizes); - const int in_bufi = tidx_to_bufi(in_tidx, in_strides); + TensorIndex inp_tidx; + linear_idx_to_tensor_idx(inp, inp_bufi, inp_tidx); + + uint nchwi = tensor_idx_to_contiguous_idx(inp, inp_tidx); - nchw_buf[nchwi] = t_in[in_bufi]; + nchw_buf[nchwi] = t_inp[inp_bufi]; } diff --git a/backends/vulkan/runtime/graph/ops/glsl/buffer_to_nchw.yaml b/backends/vulkan/runtime/graph/ops/glsl/buffer_to_nchw.yaml index 679e686dc2f..929108cca5e 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/buffer_to_nchw.yaml +++ b/backends/vulkan/runtime/graph/ops/glsl/buffer_to_nchw.yaml @@ -19,5 +19,3 @@ buffer_to_nchw: - VALUE: int32 shader_variants: - NAME: buffer_to_nchw - - NAME: buffer_to_nchw_no_pc - USE_PUSH_CONST: False diff --git a/backends/vulkan/runtime/graph/ops/glsl/indexing.glslh b/backends/vulkan/runtime/graph/ops/glsl/indexing.glslh new file mode 100644 index 00000000000..7155b4616e3 --- /dev/null +++ b/backends/vulkan/runtime/graph/ops/glsl/indexing.glslh @@ -0,0 +1,207 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ + +#ifndef INDEXING_GLSLH +#define INDEXING_GLSLH + +#define DIMLIMIT 8 +#define DIMLIMIT_DIV4 2 + +#define mul_4(x) ((x) << 2) +#define div_4(x) ((x) >> 2) + +#define mod_4(x) ((x) & 3) + +// +// BufferMetadata +// + +struct BufferMetadata { + uvec4 sizes[DIMLIMIT_DIV4]; + uvec4 dim_order[DIMLIMIT_DIV4]; + uvec4 strides[DIMLIMIT_DIV4]; + uvec2 ndim_numel; +}; + +uint ndim(const BufferMetadata meta) { + return meta.ndim_numel[0]; +} + +int int_ndim(const BufferMetadata meta) { + return int(meta.ndim_numel[0]); +} + +uint numel(const BufferMetadata meta) { + return meta.ndim_numel[1]; +} + +uint dim_order_at(const BufferMetadata meta, const int dim) { + return meta.dim_order[div_4(dim)][mod_4(dim)]; +} + +uint dim_order_at(const BufferMetadata meta, const uint dim) { + return meta.dim_order[div_4(dim)][mod_4(dim)]; +} + +uint stride_at(const BufferMetadata meta, const int dim) { + return meta.strides[div_4(dim)][mod_4(dim)]; +} + +uint stride_at(const BufferMetadata meta, const uint dim) { + return meta.strides[div_4(dim)][mod_4(dim)]; +} + +uint size_at(const BufferMetadata meta, const int dim) { + return meta.sizes[div_4(dim)][mod_4(dim)]; +} + +uint size_at(const BufferMetadata meta, const uint dim) { + return meta.sizes[div_4(dim)][mod_4(dim)]; +} + +bool are_equal(const BufferMetadata meta1, const BufferMetadata meta2) { + // sizes and strides must be the same to be considered equal + if (meta1.sizes[0] != meta2.sizes[0]) { + return false; + } + if (meta1.sizes[1] != meta2.sizes[1]) { + return false; + } + if (meta1.strides[0] != meta2.strides[0]) { + return false; + } + if (meta1.strides[1] != meta2.strides[1]) { + return false; + } + return true; +} + +// +// TensorIndex +// + +struct TensorIndex { + uvec4 data[DIMLIMIT_DIV4]; +}; + +void initialize(out TensorIndex tidx) { + tidx.data[0] = uvec4(0); + tidx.data[1] = uvec4(0); +} + +uint idx_at(const TensorIndex tidx, const int dim) { + return tidx.data[div_4(dim)][mod_4(dim)]; +} + +// +// Index Conversions +// + +void contiguous_idx_to_tensor_idx( + const BufferMetadata meta, + uint contiguous_idx, + out TensorIndex tidx) { + initialize(tidx); + int dim = int_ndim(meta); + int i = 0; + + uint contiguous_strides[DIMLIMIT]; + contiguous_strides[0] = 1; + for (int d = 1; d < DIMLIMIT; ++d) { + contiguous_strides[d] = size_at(meta, d - 1) * contiguous_strides[d - 1]; + } + + for (int d = max(dim - 1, 0); d >= 0; d--) { + uint dim_stride = contiguous_strides[d]; + + tidx.data[div_4(d)][mod_4(d)] = contiguous_idx / dim_stride; + contiguous_idx = contiguous_idx % dim_stride; + } +} + +uint tensor_idx_to_contiguous_idx( + const BufferMetadata meta, + const TensorIndex tidx) { + uint contiguous_strides[DIMLIMIT]; + contiguous_strides[0] = 1; + for (int d = 1; d < DIMLIMIT; ++d) { + contiguous_strides[d] = size_at(meta, d - 1) * contiguous_strides[d - 1]; + } + + uint contig_idx = 0; + for (int d = 0; d < ndim(meta); ++d) { + contig_idx += contiguous_strides[d] * idx_at(tidx, d); + } + return contig_idx; +} + +void linear_idx_to_tensor_idx( + const BufferMetadata meta, + uint linear_idx, + out TensorIndex tidx) { + initialize(tidx); + int dim = int_ndim(meta); + int i = 0; + for (int d = max(dim - 1, 0); d >= 0; d--) { + uint dim_idx = dim_order_at(meta, d); + uint dim_stride = stride_at(meta, dim_idx); + + tidx.data[div_4(dim_idx)][mod_4(dim_idx)] = linear_idx / dim_stride; + linear_idx = linear_idx % dim_stride; + } +} + +uint tensor_idx_to_linear_idx( + const BufferMetadata meta, + const TensorIndex tidx) { + uint lin_idx = 0; + for (int d = 0; d < ndim(meta); ++d) { + lin_idx += stride_at(meta, d) * idx_at(tidx, d); + } + return lin_idx; +} + +void clamp_tensor_idx(const BufferMetadata meta, inout TensorIndex tidx) { + tidx.data[0] = min(tidx.data[0], meta.sizes[0] - 1); + tidx.data[1] = min(tidx.data[1], meta.sizes[1] - 1); +} + +// +// Debug utilities +// + +#ifdef DEBUG_MODE + +void printTensorIndex(const TensorIndex tidx) { + debugPrintfEXT( + "TensorIndex: tidx=[%u %u %u %u %u %u %u %u]\\n", + tidx.data[0][0], tidx.data[0][1], tidx.data[0][2], tidx.data[0][3], + tidx.data[1][0], tidx.data[1][1], tidx.data[1][2], tidx.data[1][3] + ); +} + +void printBufferMetadata(const BufferMetadata meta) { + debugPrintfEXT( + "BufferMetadata: ndim=%u numel=%u\\n sizes=[%u %u %u %u %u %u %u %u]\\n dim_order=[%u %u %u %u %u %u %u %u]\\n strides=[%u %u %u %u %u %u %u %u]\\n", + meta.ndim_numel[0], meta.ndim_numel[1], + meta.sizes[0][0], meta.sizes[0][1], meta.sizes[0][2], meta.sizes[0][3], + meta.sizes[1][1], meta.sizes[1][1], meta.sizes[1][2], meta.sizes[1][3], + meta.dim_order[0][0], meta.dim_order[0][1], + meta.dim_order[0][2], meta.dim_order[0][3], + meta.dim_order[1][0], meta.dim_order[1][1], + meta.dim_order[1][2], meta.dim_order[1][3], + meta.strides[0][0], meta.strides[0][1], + meta.strides[0][2], meta.strides[0][3], + meta.strides[1][1], meta.strides[1][1], + meta.strides[1][2], meta.strides[1][3] + ); +} + +#endif + +#endif // INDEXING_GLSLH diff --git a/backends/vulkan/runtime/graph/ops/glsl/nchw_to_buffer.glsl b/backends/vulkan/runtime/graph/ops/glsl/nchw_to_buffer.glsl index 62cd0610ffb..074624dc37e 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/nchw_to_buffer.glsl +++ b/backends/vulkan/runtime/graph/ops/glsl/nchw_to_buffer.glsl @@ -4,46 +4,45 @@ #define T ${buffer_scalar_type(DTYPE)} -#include "indexing_utils.h" - ${define_required_extensions(DTYPE)} layout(std430) buffer; -${layout_declare_tensor(B, "w", "t_out", DTYPE, STORAGE)} +#include "indexing.glslh" + +${layout_declare_tensor(B, "w", "t_outp", DTYPE, STORAGE)} ${layout_declare_tensor(B, "r", "nchw_in", DTYPE, STORAGE)} -$if USE_PUSH_CONST: - layout(push_constant) uniform restrict Block { - ivec4 out_sizes; - ivec4 out_strides; - int numel; - }; -$else: - ${layout_declare_ubo(B, "ivec4", "out_sizes")} - ${layout_declare_ubo(B, "ivec4", "out_strides")} - ${layout_declare_ubo(B, "int", "numel")} +${layout_declare_ubo(B, "BufferMetadata", "outp")} layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in; -${layout_declare_spec_const(C, "int", "out_layout", "DEFAULT_DIM_ORDER")} -const lowp ivec4 out_dim_order = unhash_dim_order(out_layout); +// This constant is unused in this shader but is kept so that the signature is +// consistent with nchw_to_image. +${layout_declare_spec_const(C, "int", "unused", "0")} ${layout_declare_spec_const(C, "int", "transpose_hw", "0")} void main() { - int out_bufi = int(gl_GlobalInvocationID.x); - if (out_bufi >= numel) { + const uint outp_bufi = int(gl_GlobalInvocationID.x); + if (outp_bufi >= numel(outp)) { return; } - ivec4 out_tidx = bufi_to_tidx(out_bufi, out_strides, out_dim_order); + TensorIndex outp_tidx; + uint nchwi; + + linear_idx_to_tensor_idx(outp, outp_bufi, outp_tidx); - ivec4 sizes = out_sizes; if (transpose_hw == 1) { - sizes.xy = sizes.yx; - out_tidx.xy = out_tidx.yx; + BufferMetadata transposed_meta = outp; + transposed_meta.sizes[0].xy = transposed_meta.sizes[0].yx; + outp_tidx.data[0].xy = outp_tidx.data[0].yx; + nchwi = tensor_idx_to_contiguous_idx(transposed_meta, outp_tidx); + } + // Normal case + else { + nchwi = tensor_idx_to_contiguous_idx(outp, outp_tidx); } - const int in_nchwi = tidx_to_nchwi(out_tidx, sizes); - t_out[out_bufi] = nchw_in[in_nchwi]; + t_outp[outp_bufi] = nchw_in[nchwi]; } diff --git a/backends/vulkan/runtime/graph/ops/glsl/nchw_to_buffer.yaml b/backends/vulkan/runtime/graph/ops/glsl/nchw_to_buffer.yaml index 99e41a0ab6f..9d6c3aa76a9 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/nchw_to_buffer.yaml +++ b/backends/vulkan/runtime/graph/ops/glsl/nchw_to_buffer.yaml @@ -19,5 +19,3 @@ nchw_to_buffer: - VALUE: int32 shader_variants: - NAME: nchw_to_buffer - - NAME: nchw_to_buffer_no_pc - USE_PUSH_CONST: False diff --git a/backends/vulkan/runtime/graph/ops/impl/BinaryOp.cpp b/backends/vulkan/runtime/graph/ops/impl/BinaryOp.cpp index 6e9baafd45f..025b483eab7 100644 --- a/backends/vulkan/runtime/graph/ops/impl/BinaryOp.cpp +++ b/backends/vulkan/runtime/graph/ops/impl/BinaryOp.cpp @@ -139,15 +139,11 @@ void add_binary_op_buffer_node( // Inputs and Outputs {{out, vkapi::kWrite}, {{in1, in2}, vkapi::kRead}}, // Shader params buffers - {}, + {graph.buffer_meta_ubo(out), + graph.buffer_meta_ubo(in1), + graph.buffer_meta_ubo(in2)}, // Push Constants {{ - graph.sizes_pc_of(in1), - graph.sizes_pc_of(in2), - graph.strides_pc_of(out), - graph.strides_pc_of(in1), - graph.strides_pc_of(in2), - graph.numel_pc_of(out), PushConstantDataInfo(&alpha_val, sizeof(float)), }}, // Specialization Constants diff --git a/backends/vulkan/runtime/graph/ops/impl/Staging.cpp b/backends/vulkan/runtime/graph/ops/impl/Staging.cpp index 5faeae3e21b..6cd5115563a 100644 --- a/backends/vulkan/runtime/graph/ops/impl/Staging.cpp +++ b/backends/vulkan/runtime/graph/ops/impl/Staging.cpp @@ -29,13 +29,13 @@ void add_staging_to_tensor_node( vkapi::ShaderInfo shader = get_nchw_to_tensor_shader( graph, out_tensor, graph.int8_buffers_enabled()); - std::vector pcs; + vkapi::ParamsBindList param_buffers = {}; if (graph.is_buffer_storage(out_tensor)) { - pcs = { - graph.sizes_pc_of(out_tensor), - graph.strides_pc_of(out_tensor), - graph.numel_pc_of(out_tensor)}; - } else { + param_buffers.append(graph.buffer_meta_ubo(out_tensor)); + } + + std::vector pcs; + if (graph.is_texture_storage(out_tensor)) { pcs = {graph.sizes_pc_of(out_tensor)}; } @@ -47,7 +47,7 @@ void add_staging_to_tensor_node( // Input and Outputs {{out_tensor, vkapi::kWrite}, {in_staging, vkapi::kRead}}, // Parameter Buffers - {}, + param_buffers, // Push Constants pcs, // Specialization Constants @@ -113,13 +113,13 @@ void add_tensor_to_staging_node( vkapi::ShaderInfo shader = get_tensor_to_nchw_shader(graph, in_tensor, graph.int8_buffers_enabled()); - std::vector pcs; + vkapi::ParamsBindList param_buffers = {}; if (graph.is_buffer_storage(in_tensor)) { - pcs = { - graph.sizes_pc_of(in_tensor), - graph.strides_pc_of(in_tensor), - graph.numel_pc_of(in_tensor)}; - } else { + param_buffers.append(graph.buffer_meta_ubo(in_tensor)); + } + + std::vector pcs; + if (graph.is_texture_storage(in_tensor)) { pcs = {graph.sizes_pc_of(in_tensor)}; } @@ -135,7 +135,7 @@ void add_tensor_to_staging_node( // Input and Outputs {{out_staging, vkapi::kWrite}, {in_tensor, vkapi::kRead}}, // Parameter Buffers - {}, + param_buffers, // Push Constants pcs, // Specialization Constants @@ -154,6 +154,11 @@ void add_prepack_standard_node( vkapi::ShaderInfo shader = get_nchw_to_tensor_shader(graph, tensor, graph.int8_buffers_enabled()); + vkapi::ParamsBindList param_buffers = {}; + if (graph.is_buffer_storage(tensor)) { + param_buffers.append(graph.buffer_meta_ubo(tensor)); + } + std::vector pcs; if (graph.is_buffer_storage(tensor)) { pcs = { @@ -175,7 +180,7 @@ void add_prepack_standard_node( tensor_data, tensor, // Parameter Buffers - {}, + param_buffers, // Specialization Constants {graph.hashed_layout_of(tensor), transpose_hw_spec}, pcs)); diff --git a/backends/vulkan/runtime/graph/ops/utils/StagingUtils.cpp b/backends/vulkan/runtime/graph/ops/utils/StagingUtils.cpp index 904b91965d6..c90bfa402bb 100644 --- a/backends/vulkan/runtime/graph/ops/utils/StagingUtils.cpp +++ b/backends/vulkan/runtime/graph/ops/utils/StagingUtils.cpp @@ -44,9 +44,6 @@ vkapi::ShaderInfo get_nchw_to_tensor_shader( if (dst_storage_type == utils::kBuffer) { kernel_name = "nchw_to_buffer"; - if (!push_constant_variant) { - kernel_name += "_no_pc"; - } add_dtype_suffix(kernel_name, dst_dtype); return VK_KERNEL_FROM_STR(kernel_name); } @@ -85,9 +82,6 @@ vkapi::ShaderInfo get_tensor_to_nchw_shader( if (src_storage_type == utils::kBuffer) { kernel_name = "buffer_to_nchw"; - if (!push_constant_variant) { - kernel_name += "_no_pc"; - } add_dtype_suffix(kernel_name, src_dtype); return VK_KERNEL_FROM_STR(kernel_name); } diff --git a/backends/vulkan/test/utils/test_utils.cpp b/backends/vulkan/test/utils/test_utils.cpp index c026c1364fa..07d28229221 100644 --- a/backends/vulkan/test/utils/test_utils.cpp +++ b/backends/vulkan/test/utils/test_utils.cpp @@ -43,9 +43,6 @@ vkapi::ShaderInfo get_nchw_to_tensor_shader( if (v_dst.storage_type() == utils::kBuffer) { kernel_name = "nchw_to_buffer"; - if (!push_constant_variant) { - kernel_name += "_no_pc"; - } add_dtype_suffix(kernel_name, v_dst.dtype()); return VK_KERNEL_FROM_STR(kernel_name); } @@ -80,9 +77,6 @@ vkapi::ShaderInfo get_tensor_to_nchw_shader( if (v_src.storage_type() == utils::kBuffer) { kernel_name = "buffer_to_nchw"; - if (!push_constant_variant) { - kernel_name += "_no_pc"; - } add_dtype_suffix(kernel_name, v_src.dtype()); return VK_KERNEL_FROM_STR(kernel_name); } @@ -120,9 +114,7 @@ void record_nchw_to_buffer_op( vkapi::PipelineStage::COMPUTE, vkapi::MemoryAccessType::WRITE), src_buffer, - v_dst.sizes_ubo(), - v_dst.strides_ubo(), - v_dst.numel_ubo()); + v_dst.buffer_meta_ubo()); } void record_buffer_to_nchw_op( @@ -140,9 +132,7 @@ void record_buffer_to_nchw_op( 0, dst_buffer, v_src.buffer(pipeline_barrier, vkapi::PipelineStage::COMPUTE), - v_src.sizes_ubo(), - v_src.strides_ubo(), - v_src.numel_ubo()); + v_src.buffer_meta_ubo()); } void record_nchw_to_image_op(