Skip to content

Commit a48173d

Browse files
committed
add a spir-v multi-block read scenario
1 parent 66536a1 commit a48173d

File tree

1 file changed

+40
-5
lines changed

1 file changed

+40
-5
lines changed

samples/99_blockreads/block_read_kernel.cl

Lines changed: 40 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,32 @@ void intel_sub_group_2d_block_read_transpose_32b_32r1x1c(global void* base_addre
1111
destination[1] = temp.s1;
1212
}
1313

14+
void __spirv_Subgroup2DBlockLoadINTEL(
15+
int element_size, int block_width, int block_height, int block_count,
16+
const __global void* src_base_pointer, int memory_width, int memory_height, int memory_pitch,
17+
int2 coordinate,
18+
private void* dst_pointer);
19+
void __spirv_Subgroup2DBlockLoadTransposeINTEL(
20+
int element_size, int block_width, int block_height, int block_count,
21+
const __global void* src_base_pointer, int memory_width, int memory_height, int memory_pitch,
22+
int2 coordinate,
23+
private void* dst_pointer);
24+
void __spirv_Subgroup2DBlockLoadTransformINTEL(
25+
int element_size, int block_width, int block_height, int block_count,
26+
const __global void* src_base_pointer, int memory_width, int memory_height, int memory_pitch,
27+
int2 coordinate,
28+
private void* dst_pointer);
29+
void __spirv_Subgroup2DBlockPrefetchINTEL(
30+
int element_size, int block_width, int block_height, int block_count,
31+
const __global void* src_base_pointer, int memory_width, int memory_height, int memory_pitch,
32+
int2 coordinate);
33+
void __spirv_Subgroup2DBlockStoreINTEL(
34+
int element_size, int block_width, int block_height, int block_count,
35+
const private void* src_pointer,
36+
__global void* dst_base_pointer,
37+
int memory_width, int memory_height, int memory_pitch,
38+
int2 coordinate);
39+
1440
__attribute__((intel_reqd_sub_group_size(16)))
1541
kernel void BlockReadTest(global void* matrix, int bytewidth, int height)
1642
{
@@ -38,7 +64,7 @@ kernel void BlockReadTest(global void* matrix, int bytewidth, int height)
3864
intel_sub_group_2d_block_read_16b_4r16x1c(matrix, bytewidth, height, bytepitch, coord, data);
3965
printf("GID %3d: data = %04X %04X %04X %04X\n", (int)get_global_id(0),
4066
data[0], data[1], data[2], data[3]);
41-
#elif 1
67+
#elif 0
4268
// This is another multi-row 2D block read.
4369
// Each work-item gets 32 8-bit values, from four different 8 row x 16 column blocks.
4470
// The first 8 8-bit values are the 32 rows from a column of the first block.
@@ -51,7 +77,7 @@ kernel void BlockReadTest(global void* matrix, int bytewidth, int height)
5177
data[ 8], data[ 9], data[10], data[11], data[12], data[13], data[14], data[15],
5278
data[16], data[17], data[18], data[19], data[20], data[21], data[22], data[23],
5379
data[24], data[25], data[26], data[27], data[28], data[29], data[30], data[31]);
54-
#elif 1
80+
#elif 0
5581
// This is another multi-row 2D block read.
5682
// Each work-item gets 128 8-bit values, from four different 32 row x 16 column blocks.
5783
// The first 32 8-bit values are the 32 rows from a column of the first block.
@@ -64,7 +90,7 @@ kernel void BlockReadTest(global void* matrix, int bytewidth, int height)
6490
data[16], data[17], data[18], data[19], data[20], data[21], data[22], data[23],
6591
data[24], data[25], data[26], data[27], data[28], data[29], data[30], data[31],
6692
data[32], data[33], data[34], data[35], data[36], data[37], data[38], data[39]);
67-
#elif 1
93+
#elif 0
6894
// This is another multi-row 2D block read.
6995
// Each work-item gets 128 8-bit values, from four different 32 row x 16 column blocks.
7096
// The first 32 8-bit values are the 32 rows from a column of the first block.
@@ -85,15 +111,15 @@ kernel void BlockReadTest(global void* matrix, int bytewidth, int height)
85111
intel_sub_group_2d_block_read_transpose_32b_16r8x1c(matrix, bytewidth, height, bytepitch, coord, data);
86112
printf("GID %3d: data = %08X %08X %08X %08X %08X %08X %08X %08X ...\n", (int)get_global_id(0),
87113
data[0], data[1], data[2], data[3], data[4], data[5], data[6], data[7]);
88-
#elif 1
114+
#elif 0
89115
// This is a more complicated transposed 2D block read, since there are 32 rows (pre-transpose) and only 16 work-items.
90116
// Each work-item gets 16 32-bit values, where each 32-bit value contains two columns of data (pre-transpose).
91117
// Each work-item therefore gets 16 columns of data from one matrix row, and 16 columns of data from another matrix row.
92118
// The data from the two matrix rows are interleaved, so there are two columns of data from one row, then two columns from the other row, etc.
93119
uint data[2];
94120
intel_sub_group_2d_block_read_transpose_32b_32r1x1c(matrix, bytewidth, height, bytepitch, coord, data);
95121
printf("GID %3d: data = %08X %08X\n", (int)get_global_id(0), data[0], data[1]);
96-
#elif 1
122+
#elif 0
97123
// This is a more complicated transposed 2D block read, since there are 32 rows (pre-transpose) and only 16 work-items.
98124
// Each work-item gets 16 32-bit values, where each 32-bit value contains two columns of data (pre-transpose).
99125
// Each work-item therefore gets 16 columns of data from one matrix row, and 16 columns of data from another matrix row.
@@ -102,5 +128,14 @@ kernel void BlockReadTest(global void* matrix, int bytewidth, int height)
102128
intel_sub_group_2d_block_read_transpose_32b_32r8x1c(matrix, bytewidth, height, bytepitch, coord, data);
103129
printf("GID %3d: data = %08X %08X %08X %08X %08X %08X %08X %08X ...\n", (int)get_global_id(0),
104130
data[0], data[1], data[2], data[3], data[4], data[5], data[6], data[7]);
131+
#elif 1
132+
// This is a multi-row, multi-block 2D block read.
133+
// Each work-item gets four 8-bit values, from two different 2 row x 16 column blocks.
134+
// The first two 8-bit values are the two rows from a column of the first block.
135+
// The second two 8-bit values are the two rows from a column of the second block.
136+
uchar data[4];
137+
__spirv_Subgroup2DBlockLoadINTEL(1, 16, 2, 2, matrix, bytewidth, height, bytepitch, coord, data);
138+
printf("GID %3d: data = %02X %02X %02X %02X\n", (int)get_global_id(0),
139+
data[ 0], data[ 1], data[ 2], data[ 3]);
105140
#endif
106141
}

0 commit comments

Comments
 (0)