@@ -98,7 +98,7 @@ tile_store(tile_t& tile, payload_t& payload) {
98
98
99
99
static constexpr uint32_t num_block_x = tile_desc::num_block_x;
100
100
static constexpr uint32_t num_block_y = tile_desc::num_block_y;
101
- // static constexpr uint32_t num_block = tile_desc::num_block;
101
+ // static constexpr uint32_t num_block = tile_desc::num_block;
102
102
103
103
using load_store_attr = typename arch_attr_t <
104
104
payload_t ::arch_tag>::template load_store_attr<msg_type::block_2d>;
@@ -145,7 +145,7 @@ tile_store(tile_t& tile, payload_t& payload) {
145
145
#pragma unroll
146
146
for (uint32_t j = 0 ; j < num_block_x; j += arr_len) {
147
147
int32_t offset_x = j * block_size_x;
148
- // xetla_tdescriptor tdesc = payload_row.row(j);
148
+ // xetla_tdescriptor tdesc = payload_row.row(j);
149
149
auto reg_blk = tile.reg .xetla_select <store_block_elems, 1 >(
150
150
(i * num_block_x + j) * block_elems);
151
151
xetla_vector<dtype, store_block_elems> combine_blk;
@@ -163,7 +163,7 @@ tile_store(tile_t& tile, payload_t& payload) {
163
163
for (uint32_t ii = 0 ; ii < block_size_y / st_block_size_y; ++ii) {
164
164
constexpr uint32_t store_elems =
165
165
st_block_size_y * block_size_x * arr_len;
166
- xetla_vector<dtype, store_elems> st_blk =
166
+ auto st_blk =
167
167
combine_blk.xetla_select <store_elems, 1 >(ii * store_elems);
168
168
// xetla_tstore_global<dtype, store_elems, L1, L2, payload_t::arch_tag>(
169
169
// tdesc, st_blk);
@@ -173,7 +173,7 @@ tile_store(tile_t& tile, payload_t& payload) {
173
173
st_block_size_y,
174
174
L1,
175
175
L2>(
176
- payload.base_ptr ,
176
+ reinterpret_cast <dtype*>( payload.base_ptr ) ,
177
177
payload.surface_width ,
178
178
payload.surface_height ,
179
179
payload.surface_pitch ,
@@ -210,7 +210,7 @@ tile_store(tile_t& tile, payload_t& payload) {
210
210
blk_remained_y,
211
211
L1,
212
212
L2>(
213
- payload.base_ptr ,
213
+ reinterpret_cast <dtype*>( payload.base_ptr ) ,
214
214
payload.surface_width ,
215
215
payload.surface_height ,
216
216
payload.surface_pitch ,
@@ -240,7 +240,7 @@ tile_store(tile_t& tile, payload_t& payload) {
240
240
#pragma unroll
241
241
for (uint32_t j = 0 ; j < num_block_x; j += arr_len) {
242
242
int offset_x = j * block_size_x;
243
- // xetla_tdescriptor tdesc = payload_row.row(j);
243
+ // xetla_tdescriptor tdesc = payload_row.row(j);
244
244
auto reg_blk = tile.reg .xetla_select <remained_block_elems * arr_len, 1 >(
245
245
processed_elems + j * remained_block_elems);
246
246
// Do combination
@@ -271,7 +271,7 @@ tile_store(tile_t& tile, payload_t& payload) {
271
271
remained_st_blk_size_y,
272
272
L1,
273
273
L2>(
274
- payload.base_ptr ,
274
+ reinterpret_cast <dtype*>( payload.base_ptr ) ,
275
275
payload.surface_width ,
276
276
payload.surface_height ,
277
277
payload.surface_pitch ,
@@ -308,7 +308,7 @@ tile_store(tile_t& tile, payload_t& payload) {
308
308
final_st_blk_size_y,
309
309
L1,
310
310
L2>(
311
- payload.base_ptr ,
311
+ reinterpret_cast <dtype*>( payload.base_ptr ) ,
312
312
payload.surface_width ,
313
313
payload.surface_height ,
314
314
payload.surface_pitch ,
0 commit comments