Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
554 changes: 554 additions & 0 deletions sw/include/kultest/gemmx/data.h

Large diffs are not rendered by default.

115 changes: 115 additions & 0 deletions sw/include/kultest/gemmx/snax-gemmx-lib.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
// Copyright 2024 KU Leuven.
// Licensed under the Apache License, Version 2.0, see LICENSE for details.
// SPDX-License-Identifier: Apache-2.0
//
// Xiaoling Yi <xiaoling.yi@esat.kuleuven.be>

#include <stdbool.h>
#include "../snrt/snrt_TO.h"
#include "stdint.h"
#include "streamer_csr_addr_map.h"

#pragma once

#define GEMMX_CSR_ADDR_BASE (STREAMER_PERFORMANCE_COUNTER_CSR + 1)
#define T_BOUND_K (GEMMX_CSR_ADDR_BASE)
#define T_BOUND_N (T_BOUND_K + 1)
#define T_BOUND_M (T_BOUND_N + 1)

#define SUBTRACTIONS (T_BOUND_M + 1)

#define SIMD_CSR0 (SUBTRACTIONS + 1)
#define SIMD_CSR1 (SIMD_CSR0 + 1)

#define SIMD_SHARED_BITPACKED_SHIFT0 (SIMD_CSR1 + 1)
#define SIMD_SHARED_BITPACKED_SHIFT1 (SIMD_SHARED_BITPACKED_SHIFT0 + 1)

#define SIMD_SHARED_MULTIPLIER0 (SIMD_SHARED_BITPACKED_SHIFT1 + 1)
#define SIMD_SHARED_MULTIPLIER1 (SIMD_SHARED_MULTIPLIER0 + 1)
#define SIMD_SHARED_MULTIPLIER2 (SIMD_SHARED_MULTIPLIER1 + 1)
#define SIMD_SHARED_MULTIPLIER3 (SIMD_SHARED_MULTIPLIER2 + 1)
#define SIMD_SHARED_MULTIPLIER4 (SIMD_SHARED_MULTIPLIER3 + 1)
#define SIMD_SHARED_MULTIPLIER5 (SIMD_SHARED_MULTIPLIER4 + 1)
#define SIMD_SHARED_MULTIPLIER6 (SIMD_SHARED_MULTIPLIER5 + 1)
#define SIMD_SHARED_MULTIPLIER7 (SIMD_SHARED_MULTIPLIER6 + 1)

#define TEMPORAL_LOOP_BOUND (SIMD_SHARED_MULTIPLIER7 + 1)
#define BYPASS_SIMD (TEMPORAL_LOOP_BOUND + 1)

#define GEMMX_START (BYPASS_SIMD + 1)
#define GEMMX_BUSY (GEMMX_START + 1)
#define GEMMX_PERFORMANCE_COUNTER (GEMMX_BUSY + 1)

// Pack matrix size setting to one CSR
int32_t gen_size_config(uint8_t Batch, uint8_t M, uint8_t K, uint8_t N);

// Pack two subtraction values to one CSR
int32_t gen_subtraction_config(int8_t subtraction_a, int8_t subtraction_b);

// generate the configuration for CSR0
int32_t gen_csr0_config(uint8_t input_zp_i, uint8_t output_zp_i,
uint8_t max_int_i, uint8_t min_int_i);

// generate the configuration for CSR1
int32_t gen_csr1_config(bool double_round_i);

// Set STREAMER configuration CSR
void set_gemmx_streamer_csr(
int Aslstride0, int Aslstride1, int Atlbound0, int Atlstride0,
int Atlbound1, int Atlstride1, int Atlbound2, int Atlstride2, int Atlbound3,
int Atlstride3, int Atlbound4, int Atlstride4, int Atlbound5,
int Atlstride5, int set_addr_remap_index_A,

int Bslstride0, int Bslstride1, int Btlbound0, int Btlstride0,
int Btlbound1, int Btlstride1, int Btlbound2, int Btlstride2,
int set_addr_remap_index_B,

int D8slstride0, int D8slstride1, int D8tlbound0, int D8tlstride0,
int D8tlbound1, int D8tlstride1, int D8tlbound2, int D8tlstride2,
int set_addr_remap_index_D8,

int Cslstride0, int Cslstride1, int Ctlbound0, int Ctlstride0,
int Ctlbound1, int Ctlstride1, int Ctlbound2, int Ctlstride2,
int set_addr_remap_index_C,

int D32slstride0, int D32slstride1, int D32tlbound0, int D32tlstride0,
int D32tlbound1, int D32tlstride1, int D32tlbound2, int D32tlstride2,
int set_addr_remap_index_D32,

int delta_local_a, int delta_local_b, int delta_local_d8, int delta_local_c,
int delta_local_d32, int bypassSIMD, int32_t transpose_A,
int32_t transpose_B, int32_t channel_en_C, int32_t broadcast_C);

// Set CSR to start STREAMER
inline void set_gemmx_streamer_start() { write_csr(STREAMER_START_CSR, 1); }

// Set GEMM configuration CSR
void set_gemmx_csr(int tempLoop0, int tempLoop1, int tempLoop2,
int subtractions, uint32_t csr0, uint32_t csr1,
int shared_bitpacked_shift0, int shared_bitpacked_shift1,
int shared_multiplier0, int shared_multiplier1,
int shared_multiplier2, int shared_multiplier3,
int shared_multiplier4, int shared_multiplier5,
int shared_multiplier6, int shared_multiplier7,
uint32_t temporal_loop_bound, uint32_t bypassSIMD);

// Set CSR to start GEMM
inline void set_gemmx_start() { write_csr(GEMMX_START, 1); }

// Poll until Streamer and GEMM accelerator finish
void wait_gemmx_and_streamer();

// Read performance counter of the Streamer, a read-only CSR
uint32_t read_gemmx_streamer_perf_counter();

// Read performance counter of GEMM, a read-only CSR
uint32_t read_gemmx_perf_counter();

// Check the result of the implicit im2col convolution
uint32_t check_gemmx_result_D8(int8_t* output, int8_t* output_golden,
int32_t Batch, int32_t M, int32_t N,
bool banked_data_layout);

uint32_t check_gemmx_result_D32(int32_t* output, int32_t* output_golden,
int32_t Batch, int32_t M, int32_t N,
bool banked_data_layout);
11 changes: 11 additions & 0 deletions sw/include/kultest/gemmx/snax-gemmx-params.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
// Copyright 2023 KU Leuven.
// Licensed under the Apache License, Version 2.0, see LICENSE for details.
// SPDX-License-Identifier: Apache-2.0
//
// Xiaoling Yi <xiaoling.yi@esat.kuleuven.be>

#pragma once

#define meshRow 8
#define tileSize 8
#define meshCol 8
83 changes: 83 additions & 0 deletions sw/include/kultest/gemmx/streamer_csr_addr_map.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
// Copyright 2024 KU Leuven.
// Licensed under the Apache License, Version 2.0, see LICENSE for details.
// SPDX-License-Identifier: Apache-2.0
//
// Xiaoling Yi <xiaoling.yi@esat.kuleuven.be>
// This file is generated by Streamer module in hw/chisel to map the CSR address of Streamer automatically, do not modify it manually
// Generated at 2024-12-05T20:22:29.903791Z

// CSR Map for READER_0
#define BASE_PTR_READER_0_LOW 960
#define BASE_PTR_READER_0_HIGH 961
#define S_STRIDE_READER_0_0 962
#define T_BOUND_READER_0_0 963
#define T_BOUND_READER_0_1 964
#define T_BOUND_READER_0_2 965
#define T_BOUND_READER_0_3 966
#define T_BOUND_READER_0_4 967
#define T_BOUND_READER_0_5 968
#define T_STRIDE_READER_0_0 969
#define T_STRIDE_READER_0_1 970
#define T_STRIDE_READER_0_2 971
#define T_STRIDE_READER_0_3 972
#define T_STRIDE_READER_0_4 973
#define T_STRIDE_READER_0_5 974
#define ADDR_REMAP_INDEX_READER_0 975
// CSR Map for READER_1
#define BASE_PTR_READER_1_LOW 976
#define BASE_PTR_READER_1_HIGH 977
#define S_STRIDE_READER_1_0 978
#define T_BOUND_READER_1_0 979
#define T_BOUND_READER_1_1 980
#define T_BOUND_READER_1_2 981
#define T_STRIDE_READER_1_0 982
#define T_STRIDE_READER_1_1 983
#define T_STRIDE_READER_1_2 984
#define ADDR_REMAP_INDEX_READER_1 985
// CSR Map for WRITER_0
#define BASE_PTR_WRITER_0_LOW 986
#define BASE_PTR_WRITER_0_HIGH 987
#define S_STRIDE_WRITER_0_0 988
#define T_BOUND_WRITER_0_0 989
#define T_BOUND_WRITER_0_1 990
#define T_BOUND_WRITER_0_2 991
#define T_STRIDE_WRITER_0_0 992
#define T_STRIDE_WRITER_0_1 993
#define T_STRIDE_WRITER_0_2 994
#define ADDR_REMAP_INDEX_WRITER_0 995
// CSR Map for READER_WRITER_0
#define BASE_PTR_READER_WRITER_0_LOW 996
#define BASE_PTR_READER_WRITER_0_HIGH 997
#define S_STRIDE_READER_WRITER_0_0 998
#define S_STRIDE_READER_WRITER_0_1 999
#define T_BOUND_READER_WRITER_0_0 1000
#define T_BOUND_READER_WRITER_0_1 1001
#define T_BOUND_READER_WRITER_0_2 1002
#define T_STRIDE_READER_WRITER_0_0 1003
#define T_STRIDE_READER_WRITER_0_1 1004
#define T_STRIDE_READER_WRITER_0_2 1005
#define ADDR_REMAP_INDEX_READER_WRITER_0 1006
#define ENABLED_CHANNEL_READER_WRITER_0 1007
// CSR Map for READER_WRITER_1
#define BASE_PTR_READER_WRITER_1_LOW 1008
#define BASE_PTR_READER_WRITER_1_HIGH 1009
#define S_STRIDE_READER_WRITER_1_0 1010
#define S_STRIDE_READER_WRITER_1_1 1011
#define T_BOUND_READER_WRITER_1_0 1012
#define T_BOUND_READER_WRITER_1_1 1013
#define T_BOUND_READER_WRITER_1_2 1014
#define T_STRIDE_READER_WRITER_1_0 1015
#define T_STRIDE_READER_WRITER_1_1 1016
#define T_STRIDE_READER_WRITER_1_2 1017
#define ADDR_REMAP_INDEX_READER_WRITER_1 1018
#define TRANSPOSE_EXTENSION_ENABLE
#define TRANSPOSE_CSR_READER_0 1019
#define TRANSPOSE_CSR_READER_1 1020
#define C_BROADCAST_EXTENSION_ENABLE
#define C_BROADCAST_CSR_READER_WRITER_0 1021
// Other resgiters
// Status register
#define STREAMER_START_CSR 1022
// Read only CSRs
#define STREAMER_BUSY_CSR 1023
#define STREAMER_PERFORMANCE_COUNTER_CSR 1024
21 changes: 21 additions & 0 deletions sw/include/kultest/snax-kul-cluster-gemmx-test.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
// Copyright 2024 KU Leuven.
// Licensed under the Apache License, Version 2.0, see LICENSE for details.
// SPDX-License-Identifier: Apache-2.0
//
// Xiaoling Yi <xiaoling.yi@esat.kuleuven.be>

#pragma once

#include "snrt/snrt_TO.h"
// #include "snrt/csr.h"

#include "gemmx/data.h"

#include "gemmx/snax-gemmx-params.h"
#include "gemmx/snax-gemmx-lib.h"
#include "gemmx/streamer_csr_addr_map.h"

// This is the test function for the SNAX GEMM for Conv2d
// We use several nested loops to iterate over the input data and weights,
// achieving implicit im2col
int kul_cluster_gemmx_test();
22 changes: 22 additions & 0 deletions sw/include/kultest/snax-kul-cluster-xdma-test.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
// Copyright 2024 KU Leuven.
// Licensed under the Apache License, Version 2.0, see LICENSE for details.
// SPDX-License-Identifier: Apache-2.0
//
// Xiaoling Yi <xiaoling.yi@esat.kuleuven.be>

#pragma once

#include "snrt/snrt_TO.h"
#include "snrt/csr.h"

#include "xdma/data.h"

#include "xdma/snax-xdma-csr-addr.h"
#include "xdma/snax-xdma-lib.h"
// #include "xdma/streamer_csr_addr_map.h"


// This is the test function for the SNAX GEMM for Conv2d
// We use several nested loops to iterate over the input data and weights,
// achieving implicit im2col
int kul_cluster_xdma_test();
Loading