Skip to content

[ET-VK] Introduce BufferMetadata GLSL struct to abstract tensor layout #13595

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Aug 23, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 50 additions & 0 deletions backends/vulkan/runtime/api/containers/Tensor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -567,6 +567,7 @@ vTensor::vTensor(
max_ubo_nbytes_{
calculate_max_ubo_nbytes(min_nbytes_per_ubo_, storage_type)},
uniforms_(),
buffer_meta_(),
// Construct Tensor storage
storage_(std::make_shared<vTensorStorage>(
context,
Expand Down Expand Up @@ -611,6 +612,7 @@ vTensor::vTensor(
max_ubo_nbytes_{
calculate_max_ubo_nbytes(min_nbytes_per_ubo_, utils::kTexture3D)},
uniforms_(),
buffer_meta_(),
// Construct Tensor storage
storage_(std::make_shared<vTensorStorage>(context, image)) {
uniform_data_ = std::make_shared<UniformData>(UniformData{
Expand All @@ -634,6 +636,7 @@ vTensor::vTensor(vTensor& other)
min_nbytes_per_ubo_{other.min_nbytes_per_ubo_},
max_ubo_nbytes_{other.max_ubo_nbytes_},
uniforms_(),
buffer_meta_(),
// Copy Tensor storage
storage_(other.storage_) {
uniform_data_ = std::make_shared<UniformData>(*other.get_uniform_data());
Expand All @@ -659,6 +662,7 @@ vTensor::vTensor(
min_nbytes_per_ubo_{other.min_nbytes_per_ubo_},
max_ubo_nbytes_{other.max_ubo_nbytes_},
uniforms_(),
buffer_meta_(),
// Copy Tensor storage
storage_(other.storage_) {
uniform_data_ = std::make_shared<UniformData>(UniformData{
Expand Down Expand Up @@ -711,6 +715,38 @@ uint32_t vTensor::UniformData::write_attribute(
return 0;
}

vTensor::BufferMetadata::BufferMetadata(
std::vector<int64_t>& src_sizes,
std::vector<int64_t>& src_dim_order,
std::vector<int64_t>& src_strides,
size_t src_numel) {
update(src_sizes, src_dim_order, src_strides, src_numel);
}

void vTensor::BufferMetadata::update(
std::vector<int64_t>& src_sizes,
std::vector<int64_t>& src_dim_order,
std::vector<int64_t>& src_strides,
size_t src_numel) {
int32_t fixed_ndim = utils::safe_downcast<int32_t>(kTensorDimLimit);

std::vector<uint32_t> fu_sizes = flip_and_unsqueeze<uint32_t>(
src_sizes, kTensorSizes, src_numel, fixed_ndim);
std::vector<uint32_t> fu_dim_order = flip_and_unsqueeze<uint32_t>(
src_dim_order, kTensorDimOrder, src_numel, fixed_ndim);
std::vector<uint32_t> fu_strides = flip_and_unsqueeze<uint32_t>(
src_strides, kTensorStrides, src_numel, fixed_ndim);

for (int i = 0; i < fixed_ndim; ++i) {
sizes[i] = fu_sizes.at(i);
dim_order[i] = fu_dim_order.at(i);
strides[i] = fu_strides.at(i);
}

ndim = utils::safe_downcast<uint32_t>(src_sizes.size());
numel = utils::safe_downcast<uint32_t>(src_numel);
}

vkapi::VulkanImage& vTensor::image(
vkapi::PipelineBarrier& pipeline_barrier,
const vkapi::PipelineStageFlags stage) & {
Expand Down Expand Up @@ -799,6 +835,15 @@ const vkapi::BufferBindInfo vTensor::numel_ubo() {
return metadata_ubo_impl(&numel_uniform_offset_, uniform_data_->numel);
}

const vkapi::BufferBindInfo vTensor::buffer_meta_ubo() {
size_t ubo_nbytes = sizeof(BufferMetadata);
if (!buffer_meta_.buffer()) {
BufferMetadata data(sizes_, dim_order_, strides_, numel_);
buffer_meta_ = ParamsBuffer(storage_->context_, data);
}
return vkapi::BufferBindInfo(buffer_meta_.buffer(), 0, ubo_nbytes);
}

VkMemoryRequirements vTensor::get_memory_requirements() const {
switch (storage_type()) {
case utils::kBuffer:
Expand Down Expand Up @@ -875,6 +920,11 @@ void vTensor::update_metadata() {
uniforms_.update(
uniform_data_->logical_limits.limits, logical_limits_uniform_offset_);
}

if (buffer_meta_.buffer()) {
BufferMetadata data(sizes_, dim_order_, strides_, numel_);
buffer_meta_.update(data);
}
}

void vTensor::check_sizes(const std::vector<int64_t>& sizes) const {
Expand Down
29 changes: 29 additions & 0 deletions backends/vulkan/runtime/api/containers/Tensor.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
namespace vkcompute {
namespace api {

static constexpr size_t kTensorDimLimit = 8;

/*
* Given a GPUMemoryLayout value, produce a dim order vector that matches the
* given memory layout. The produced dim order vector will be in the NCHW
Expand Down Expand Up @@ -262,6 +264,26 @@ class vTensor final {
const Attribute attr);
};

struct BufferMetadata {
uint32_t sizes[kTensorDimLimit];
uint32_t dim_order[kTensorDimLimit];
uint32_t strides[kTensorDimLimit];
uint32_t ndim;
uint32_t numel;

BufferMetadata(
std::vector<int64_t>& sizes,
std::vector<int64_t>& dim_order,
std::vector<int64_t>& strides,
size_t numel);

void update(
std::vector<int64_t>& sizes,
std::vector<int64_t>& dim_order,
std::vector<int64_t>& strides,
size_t numel);
};

private:
/*
* "Core" tensor metadata. They are the minimum amount of information required
Expand Down Expand Up @@ -332,6 +354,11 @@ class vTensor final {
*/
ParamsBuffer uniforms_;

/*
* Used to store data for BufferMetadata to pass to shaders as buffer_meta_ubo
*/
ParamsBuffer buffer_meta_;

uint32_t uniforms_size_ = 0u;
uint32_t sizes_uniform_offset_ = kUniformOffsetUnset;
uint32_t dim_order_uniform_offset_ = kUniformOffsetUnset;
Expand Down Expand Up @@ -557,6 +584,8 @@ class vTensor final {

const vkapi::BufferBindInfo numel_ubo();

const vkapi::BufferBindInfo buffer_meta_ubo();

public:
inline size_t staging_buffer_numel() const {
return storage_->buffer_len();
Expand Down
8 changes: 8 additions & 0 deletions backends/vulkan/runtime/graph/ComputeGraph.h
Original file line number Diff line number Diff line change
Expand Up @@ -357,6 +357,10 @@ class ComputeGraph final {
return values_.at(idx).toConstTensor().has_buffer_storage();
}

inline bool is_texture_storage(const ValueRef idx) const {
return !is_buffer_storage(idx);
}

/*
* Checks that the following is true:
* 1. The value at `idx` is a tensor
Expand Down Expand Up @@ -411,6 +415,10 @@ class ComputeGraph final {
return values_.at(idx).toTensor().sizes_ubo();
}

inline vkapi::BufferBindInfo buffer_meta_ubo(const ValueRef idx) {
return values_.at(idx).toTensor().buffer_meta_ubo();
}

inline vkapi::BufferBindInfo strides_ubo(const ValueRef idx) {
return values_.at(idx).toTensor().strides_ubo();
}
Expand Down
35 changes: 20 additions & 15 deletions backends/vulkan/runtime/graph/ops/glsl/binary_op.glsl
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ $if IS_COMPARISON_OP:

layout(std430) buffer;

#include "indexing.glslh"

$if IS_COMPARISON_OP:
${layout_declare_tensor(B, "w", "t_out", "uint8", STORAGE)}
$else:
Expand All @@ -43,13 +45,11 @@ ${layout_declare_tensor(B, "r", "t_in", DTYPE, STORAGE)}
${layout_declare_tensor(B, "r", "t_other", DTYPE, STORAGE)}

$if STORAGE == "buffer":
${layout_declare_ubo(B, "BufferMetadata", "outp")}
${layout_declare_ubo(B, "BufferMetadata", "inp")}
${layout_declare_ubo(B, "BufferMetadata", "other")}

layout(push_constant) uniform restrict Block {
ivec4 in_sizes;
ivec4 other_sizes;
ivec4 out_strides;
ivec4 in_strides;
ivec4 other_strides;
int out_numel;
float alpha;
};
$else:
Expand Down Expand Up @@ -83,25 +83,30 @@ $else:
#ifdef USING_BUFFER

void main() {
const int out_bufi = ivec3(gl_GlobalInvocationID).x;
if (out_bufi >= out_numel) {
const uint out_bufi = gl_GlobalInvocationID.x;
if (out_bufi >= numel(outp)) {
return;
}

// Simple case; no broadcasting
if (in_sizes == other_sizes) {
if (are_equal(inp, other)) {
t_out[out_bufi] = T(op(t_in[out_bufi], t_other[out_bufi], T(alpha)));
return;
}

const ivec4 out_tidx = bufi_to_tidx(out_bufi, out_strides, out_dim_order);
const ivec4 in_tidx = min(out_tidx, in_sizes - 1);
const ivec4 other_tidx = min(out_tidx, other_sizes - 1);
TensorIndex outp_tidx;
linear_idx_to_tensor_idx(outp, out_bufi, outp_tidx);

TensorIndex inp_tidx = outp_tidx;
clamp_tensor_idx(inp, inp_tidx);

TensorIndex other_tidx = outp_tidx;
clamp_tensor_idx(other, other_tidx);

const int in_bufi = tidx_to_bufi(in_tidx, in_strides);
const int other_bufi = tidx_to_bufi(other_tidx, other_strides);
uint inp_bufi = tensor_idx_to_linear_idx(inp, inp_tidx);
uint other_bufi = tensor_idx_to_linear_idx(other, other_tidx);

t_out[out_bufi] = T(op(t_in[in_bufi], t_other[other_bufi], T(alpha)));
t_out[out_bufi] = T(op(t_in[inp_bufi], t_other[other_bufi], T(alpha)));
}

#else // USING_TEXTURE
Expand Down
33 changes: 13 additions & 20 deletions backends/vulkan/runtime/graph/ops/glsl/buffer_to_nchw.glsl
Original file line number Diff line number Diff line change
Expand Up @@ -4,40 +4,33 @@

#define T ${buffer_scalar_type(DTYPE)}

#include "indexing_utils.h"

${define_required_extensions(DTYPE)}

layout(std430) buffer;

${layout_declare_tensor(0, "w", "nchw_buf", DTYPE, STORAGE)}
${layout_declare_tensor(1, "r", "t_in", DTYPE, STORAGE)}
#include "indexing.glslh"

${layout_declare_tensor(B, "w", "nchw_buf", DTYPE, STORAGE)}
${layout_declare_tensor(B, "r", "t_inp", DTYPE, STORAGE)}

$if USE_PUSH_CONST:
layout(push_constant) uniform restrict Block {
ivec4 in_sizes;
ivec4 in_strides;
int numel;
};
$else:
${layout_declare_ubo(2, "ivec4", "in_sizes")}
${layout_declare_ubo(3, "ivec4", "in_strides")}
${layout_declare_ubo(4, "int", "numel")}
${layout_declare_ubo(B, "BufferMetadata", "inp")}

layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;

// This constant is unused in this shader but is kept so that the signature is
// consistent with image_to_nchw.
layout(constant_id = 3) const int UNUSED_packed_dim = W_DIM;
${layout_declare_spec_const(C, "int", "unused", "0")}

void main() {
int nchwi = int(gl_GlobalInvocationID.x);
if (nchwi >= numel) {
uint inp_bufi = gl_GlobalInvocationID.x;
if (inp_bufi>= numel(inp)) {
return;
}

ivec4 in_tidx = nchwi_to_tidx(nchwi, in_sizes);
const int in_bufi = tidx_to_bufi(in_tidx, in_strides);
TensorIndex inp_tidx;
linear_idx_to_tensor_idx(inp, inp_bufi, inp_tidx);

uint nchwi = tensor_idx_to_contiguous_idx(inp, inp_tidx);

nchw_buf[nchwi] = t_in[in_bufi];
nchw_buf[nchwi] = t_inp[inp_bufi];
}
2 changes: 0 additions & 2 deletions backends/vulkan/runtime/graph/ops/glsl/buffer_to_nchw.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,5 +19,3 @@ buffer_to_nchw:
- VALUE: int32
shader_variants:
- NAME: buffer_to_nchw
- NAME: buffer_to_nchw_no_pc
USE_PUSH_CONST: False
Loading
Loading