From 82fd85c29e39d7276444e85058649dfd7365df98 Mon Sep 17 00:00:00 2001 From: Philip Wiese Date: Tue, 24 Sep 2024 20:07:25 +0200 Subject: [PATCH 01/28] [feature] WIP Support arbitrary matrix shapes Important Changes: - Change scaling of Softmax from 2**7-1 to 2**8-1 Current Limitations: - Only works without biases - Only works with ReLU activation - FeedForward and MatMul do only work with one Tile --- .gitlab-ci.yml | 41 ++++++ .vscode/launch.json | 1 + PyITA/ITA.py | 88 +++++++++---- PyITA/ITA_onnx.py | 4 +- PyITA/softmax.py | 84 +++++++++++-- src/ita.sv | 22 +++- src/ita_controller.sv | 180 ++++++++++++++++++++++++--- src/ita_package.sv | 10 +- src/ita_requantization_controller.sv | 6 + src/ita_requantizer.sv | 11 +- src/ita_softmax.sv | 74 ++++++++--- src/ita_softmax_top.sv | 12 +- src/tb/ita_tb.sv | 10 +- 13 files changed, 454 insertions(+), 89 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 7b4c3ce..2b214ff 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -29,6 +29,11 @@ generate_testvectors: - python testGenerator.py -H 1 -S 64 -E 64 -P 64 -F 64 --activation gelu - python testGenerator.py -H 1 -S 128 -E 192 -P 256 -F 256 --activation gelu - python testGenerator.py -H 1 -S 192 -E 256 -P 128 -F 128 --activation relu + # - python testGenerator.py -H 1 -S 1 -E 2 -P 3 -F 3 --activation gelu --no-bias + - python testGenerator.py -H 1 -S 1 -E 2 -P 3 -F 3 --activation relu --no-bias + - python testGenerator.py -H 1 -S 63 -E 62 -P 61 -F 61 --activation relu --no-bias + # - python testGenerator.py -H 1 -S 65 -E 130 -P 195 -F 195 --activation relu --no-bias + # - python testGenerator.py -H 1 -S 127 -E 190 -P 253 -F 253 --activation relu --no-bias artifacts: paths: - simvectors @@ -94,6 +99,42 @@ run_sim: - make sim VSIM_FLAGS=-c s=$S e=$E p=$P f=$F bias=1 activation=$activation no_stalls=$no_stalls single_attention=$single_attention - ./modelsim/return_status.sh modelsim/build/transcript $S $E $P $F ita_tb +run_sim_padding: + stage: sim + needs: + - generate_testvectors + parallel: + matrix: + # - S: 1 + # E: 2 + # P: 3 + # F: 3 + # activation: gelu + - S: 1 + E: 2 + P: 3 + F: 3 + activation: relu + - S: 63 + E: 62 + P: 61 + F: 61 + activation: relu + # - S: 65 + # E: 130 + # P: 195 + # F: 195 + # activation: relu + # - S: 127 + # E: 190 + # P: 253 + # F: 253 + # activation: relu + script: + - make bender + - make sim VSIM_FLAGS=-c s=$S e=$E p=$P f=$F bias=0 activation=$activation + - ./modelsim/return_status.sh modelsim/build/transcript $S $E $P $F ita_tb + run_hwpe_sim: stage: sim needs: diff --git a/.vscode/launch.json b/.vscode/launch.json index 4e54398..42f08d8 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -17,6 +17,7 @@ "-S${input:seq_len}", "-E${input:emb_len}", "-P${input:prj_len}", + "--no-bias" ], } ], diff --git a/PyITA/ITA.py b/PyITA/ITA.py index 24f7b0b..19ff5a0 100644 --- a/PyITA/ITA.py +++ b/PyITA/ITA.py @@ -69,10 +69,10 @@ def __init__(self, self._init_paths(path) - self.S_ITA = max(64, S) - self.P_ITA = max(64, P) - self.E_ITA = max(64, E) - self.F_ITA = max(64, F) + self.S_ITA = ((S - 1) // self.ITA_M + 1) * self.ITA_M + self.P_ITA = ((P - 1) // self.ITA_M + 1) * self.ITA_M + self.E_ITA = ((E - 1) // self.ITA_M + 1) * self.ITA_M + self.F_ITA = ((F - 1) // self.ITA_M + 1) * self.ITA_M self.H_ITA = 4 self.split = self.ITA_M // self.ITA_N @@ -110,10 +110,10 @@ def _validate_matrix_constraints(self, K: ArrayLike, V: ArrayLike): assert (np.all(K == V)) # WIESEP: Current restrictions for ITA - assert (self.S % self.ITA_M == 0), "Sequence length must be divisible by ITA_M" - assert (self.P % self.ITA_M == 0), "Projection space must be divisible by ITA_M" - assert (self.E % self.ITA_M == 0), "Embedding size must be divisible by ITA_M" - assert (self.F % self.ITA_M == 0), "Feedforward size must be divisible by ITA_M" + # assert (self.S % self.ITA_M == 0), "Sequence length must be divisible by ITA_M" + # assert (self.P % self.ITA_M == 0), "Projection space must be divisible by ITA_M" + # assert (self.E % self.ITA_M == 0), "Embedding size must be divisible by ITA_M" + # assert (self.F % self.ITA_M == 0), "Feedforward size must be divisible by ITA_M" assert ( self.E <= 512 @@ -172,7 +172,8 @@ def _initialize_tensors(self, Q, V, Wq, Wk, Wv, Wo, Bq, Bk, Bv, Bo, FF_in, Wff, else: self.Bq_in = np.zeros((self.H, self.P), dtype = np.int8) self.Bq = np.pad(self.Bq_in, ((0, 0), (0, self.P_ITA - self.P))) - self.Bq_broadcast = np.reshape(np.repeat(self.Bq, self.S, axis = 0), (self.H, self.S, self.P)) + self.Bq_broadcast = np.reshape(np.repeat(self.Bq, self.S, axis = 0), (self.H, self.S, self.P_ITA)) + self.Bq_broadcast = np.pad(self.Bq_broadcast, ((0, 0), (0, self.S_ITA - self.S), (0, 0))) if self.bias: self.Bk_in = random_shuffled_tensor( @@ -180,7 +181,8 @@ def _initialize_tensors(self, Q, V, Wq, Wk, Wv, Wo, Bq, Bk, Bv, Bo, FF_in, Wff, else: self.Bk_in = np.zeros((self.H, self.P), dtype = np.int8) self.Bk = np.pad(self.Bk_in, ((0, 0), (0, self.P_ITA - self.P))) - self.Bk_broadcast = np.reshape(np.repeat(self.Bk, self.S, axis = 0), (self.H, self.S, self.P)) + self.Bk_broadcast = np.reshape(np.repeat(self.Bk, self.S, axis = 0), (self.H, self.S, self.P_ITA)) + self.Bk_broadcast = np.pad(self.Bk_broadcast, ((0, 0), (0, self.S_ITA - self.S), (0, 0))) if self.bias: self.Bv_in = random_shuffled_tensor( @@ -188,7 +190,8 @@ def _initialize_tensors(self, Q, V, Wq, Wk, Wv, Wo, Bq, Bk, Bv, Bo, FF_in, Wff, else: self.Bv_in = np.zeros((self.H, self.P), dtype = np.int8) self.Bv = np.pad(self.Bv_in, ((0, 0), (0, self.P_ITA - self.P))) - self.Bv_broadcast = np.reshape(np.repeat(self.Bv, self.S, axis = 0), (self.H, self.S, self.P)) + self.Bv_broadcast = np.reshape(np.repeat(self.Bv, self.S, axis = 0), (self.H, self.S, self.P_ITA)) + self.Bv_broadcast = np.pad(self.Bv_broadcast, ((0, 0), (0, self.S_ITA - self.S), (0, 0))) if self.bias: self.Bo_in = random_shuffled_tensor( @@ -196,7 +199,8 @@ def _initialize_tensors(self, Q, V, Wq, Wk, Wv, Wo, Bq, Bk, Bv, Bo, FF_in, Wff, else: self.Bo_in = np.zeros((self.H, self.E), dtype = np.int8) self.Bo = np.pad(self.Bo_in, ((0, 0), (0, self.E_ITA - self.E))) - self.Bo_broadcast = np.reshape(np.repeat(self.Bo, self.S, axis = 0), (self.H, self.S, self.E)) + self.Bo_broadcast = np.reshape(np.repeat(self.Bo, self.S, axis = 0), (self.H, self.S, self.E_ITA)) + self.Bo_broadcast = np.pad(self.Bo_broadcast, ((0, 0), (0, self.S_ITA - self.S), (0, 0))) if self.bias: self.Bff_in = random_shuffled_tensor( @@ -204,14 +208,16 @@ def _initialize_tensors(self, Q, V, Wq, Wk, Wv, Wo, Bq, Bk, Bv, Bo, FF_in, Wff, else: self.Bff_in = np.zeros((1, self.F), dtype = np.int8) self.Bff = np.pad(self.Bff_in, ((0, 0), (0, self.F_ITA - self.F))) - self.Bff_broadcast = np.reshape(np.repeat(self.Bff, self.S, axis = 0), (1, self.S, self.F)) + self.Bff_broadcast = np.reshape(np.repeat(self.Bff, self.S, axis = 0), (1, self.S, self.F_ITA)) + self.Bff_broadcast = np.pad(self.Bff_broadcast, ((0, 0), (0, self.S_ITA - self.S), (0, 0))) if self.bias: self.Bff2_in = random_shuffled_tensor( (1, self.E), int(np.log2(self.E)) + 8, type = np.int32) if Bff2 is None else Bff2 else: self.Bff2_in = np.zeros((1, self.E), dtype = np.int8) self.Bff2 = np.pad(self.Bff2_in, ((0, 0), (0, self.E_ITA - self.E))) - self.Bff2_broadcast = np.reshape(np.repeat(self.Bff2, self.S, axis = 0), (1, self.S, self.E)) + self.Bff2_broadcast = np.reshape(np.repeat(self.Bff2, self.S, axis = 0), (1, self.S, self.E_ITA)) + self.Bff2_broadcast = np.pad(self.Bff2_broadcast, ((0, 0), (0, self.S_ITA - self.S), (0, 0))) #### Intermediate tensors #### @@ -373,7 +379,7 @@ def tiler_QK(self, qk: np.ndarray, weight: np.ndarray, bias: np.ndarray, output: # Bias Bqk is H x P # Broadcast Bias Bqk to H x S x P - bias = np.tile(bias, [1, self.S, 1]) + bias = np.tile(bias, [1, self.S_ITA, 1]) for h in range(self.H): Bias = split_matrix(bias[h], (self.ITA_M, self.ITA_N)) write_matrix(Bias, f"{bias_file}_{h}", self.paths["standalone"]) @@ -416,7 +422,7 @@ def tiler_V(self, v, weight, bias, output, input_file, weight_file, bias_file, o # Bias Bv is H x P # Broadcast Bias Bv to H x S x P - bias = np.tile(bias, [1, self.S, 1]) + bias = np.tile(bias, [1, self.S_ITA, 1]) # Transpose Bias Bv to H x P x S bias = np.transpose(bias, (0, 2, 1)) for h in range(self.H): @@ -497,7 +503,7 @@ def tiler_Out(self, O, weight, bias, output, input_file, weight_file, bias_file, # Bias Bo is H x E # Broadcast Bias Bo to H x S x E - bias = np.tile(bias, [1, self.S, 1]) + bias = np.tile(bias, [1, self.S_ITA, 1]) for h in range(self.H): Bias = split_matrix(bias[h], (self.ITA_M, self.ITA_N)) write_matrix(Bias, f"{bias_file}_{h}", self.paths["standalone"]) @@ -513,6 +519,12 @@ def step1_Qp(self): self.Qp_requant = requantize(self.Qp, self.requant_eps_mult[0], self.requant_right_shift[0], self.requant_add[0]) + # Set padded values to zero + if (self.S_ITA - self.S) > 0: + self.Qp_requant[:, -(self.S_ITA - self.S):, :] = 0 + if (self.P_ITA - self.P) > 0: + self.Qp_requant[:, :, -(self.P_ITA - self.P):] = 0 + self.tiler_QK(self.Q, self.Wq, self.Bq, self.Qp_requant, "Q", "Wq", "Bq", "Qp") def step2_Kp(self): @@ -521,6 +533,11 @@ def step2_Kp(self): self.Kp_requant = requantize(self.Kp, self.requant_eps_mult[1], self.requant_right_shift[1], self.requant_add[1]) + if (self.S_ITA - self.S) > 0: + self.Kp_requant[:, -(self.S_ITA - self.S):, :] = 0 + if (self.P_ITA - self.P) > 0: + self.Kp_requant[:, :, -(self.P_ITA - self.P):] = 0 + self.tiler_QK(self.K, self.Wk, self.Bk, self.Kp_requant, "K", "Wk", "Bk", "Kp") def step3_Vp(self): @@ -529,6 +546,11 @@ def step3_Vp(self): self.Vp_requant = requantize(self.Vp, self.requant_eps_mult[2], self.requant_right_shift[2], self.requant_add[2]) + if (self.S_ITA - self.S) > 0: + self.Vp_requant[:, -(self.S_ITA - self.S):, :] = 0 + if (self.P_ITA - self.P) > 0: + self.Vp_requant[:, :, -(self.P_ITA - self.P):] = 0 + # Compute Vp in transposed form self.tiler_V(self.V, self.Wv, self.Bv, self.Vp_requant, "V", "Wv", "Bv", "Vp") @@ -537,16 +559,27 @@ def step4_QK(self, no_partial_softmax): [np.matmul(self.Qp_requant[i], np.transpose(self.Kp_requant[i]), dtype = np.int32) for i in range(self.H)]) self.A = np.clip(self.A, -2**(self.WO - 1), 2**(self.WO - 1) - 1) self.A_requant = requantize(self.A, self.requant_eps_mult[3], self.requant_right_shift[3], self.requant_add[3]) + + if (self.S_ITA - self.S) > 0: + self.A_requant[:, -(self.S_ITA - self.S):, :] = 0 + self.A_requant[:, :, -(self.S_ITA - self.S):] = 0 + self.soft(no_partial_softmax) self.tiler_AV(self.Qp_requant, self.Kp_requant, self.A_requant, "Qp_in", "Kp_in", "A") def soft(self, no_partial_softmax = False): - self.A_real_softmax = realSoftmax(self.A_requant) + self.A_real_softmax = realSoftmax(self.A_requant[:, :self.S, :self.S]) + self.A_real_softmax = np.pad(self.A_real_softmax, ((0, 0), (0, self.S_ITA - self.S), (0, self.S_ITA - self.S))) + if no_partial_softmax: - self.A_partial_softmax = fastSoftmax(self.A_requant) + self.A_partial_softmax = fastSoftmax(self.A_requant[:, :self.S, :self.S]) + self.A_partial_softmax = np.pad(self.A_partial_softmax, + ((0, 0), (0, self.S_ITA - self.S), (0, self.S_ITA - self.S))) else: - self.A_partial_softmax = streamingPartialSoftmax(self.A_requant) + self.A_partial_softmax = streamingPartialSoftmax(self.A_requant[:, :self.S, :self.S]) + self.A_partial_softmax = np.pad(self.A_partial_softmax, + ((0, 0), (0, self.S_ITA - self.S), (0, self.S_ITA - self.S))) if self.H == 1: A_save = [np.tile(self.A_partial_softmax[i], [self.split, 1]) for i in range(self.H)] @@ -564,6 +597,11 @@ def step5_AV(self): self.O_soft_requant = requantize(self.O_soft, self.requant_eps_mult[4], self.requant_right_shift[4], self.requant_add[4]) + if (self.S_ITA - self.S) > 0: + self.O_soft_requant[:, -(self.S_ITA - self.S):, :] = 0 + if (self.P_ITA - self.P) > 0: + self.O_soft_requant[:, :, -(self.P_ITA - self.P):] = 0 + self.tiler_AV(self.A_requant, np.transpose(self.Vp_requant, (0, 2, 1)), self.O_soft_requant, "A_stream_soft_in", "Vp_in", "O_soft") @@ -590,6 +628,12 @@ def step6_O(self): self.Out_soft = np.clip(self.Out_soft, -2**(self.WO - 1), 2**(self.WO - 1) - 1) self.Out_soft_requant = requantize(self.Out_soft, self.requant_eps_mult[5], self.requant_right_shift[5], self.requant_add[5]) + + if (self.S_ITA - self.S) > 0: + self.Out_soft_requant[:, -(self.S_ITA - self.S):, :] = 0 + if (self.E_ITA - self.E) > 0: + self.Out_soft_requant[:, :, -(self.E_ITA - self.E):] = 0 + self.tiler_Out(self.O_soft_requant, self.Wo, self.Bo, self.Out_soft_requant, "O_soft_in", "Wo", "Bo", "Out_soft") @@ -934,8 +978,8 @@ def export_mempool(self, path): def export_numpy(self): assert np.all(np.equal(self.K, self.V)), "For ITA, keys and values have to be equal" - q = self.Q - k = self.K + q = self.Q_in + k = self.K_in w1 = self.Wq_in b1 = self.Bq_in w2 = self.Wk_in diff --git a/PyITA/ITA_onnx.py b/PyITA/ITA_onnx.py index eda85f3..235cf00 100644 --- a/PyITA/ITA_onnx.py +++ b/PyITA/ITA_onnx.py @@ -259,8 +259,8 @@ def exportONNX(path, verbose = False, **kwargs): # Transform from MUL-DIV-ADD to MUL-ADD-DIV RQ_ADD = (RQ_ADD * 2**RQ_SHIFT.astype(np.float32)) - input0_values = np.expand_dims(inputs['q'][:(S * E // 64), :].reshape(S, E), axis = 0) - input1_values = np.expand_dims(inputs['k'][:(S * E // 64), :].reshape(S, E), axis = 0) + input0_values = np.expand_dims(inputs['q'].reshape(S, E), axis = 0) + input1_values = np.expand_dims(inputs['k'].reshape(S, E), axis = 0) np.savez(path + "inputs.npz", input0_values, input1_values) diff --git a/PyITA/softmax.py b/PyITA/softmax.py index 8cbc5cf..eabf432 100644 --- a/PyITA/softmax.py +++ b/PyITA/softmax.py @@ -14,6 +14,8 @@ # # ---------------------------------------------------------------------- +import argparse + import numpy as np @@ -71,10 +73,7 @@ def streamingPartialSoftmax(x, integerize = True): seq_length = x.shape[-1] n_heads = x.shape[-3] - width = 16 # 16 PE (processing units) - groups = seq_length // width - - assert seq_length % width == 0, f"Sequence length must be a multiple of width ({width})" + PE = 16 # 16 PE (processing units) # Number of bits B = 8 @@ -101,12 +100,14 @@ def streamingPartialSoftmax(x, integerize = True): global_max = np.full((n_heads, seq_length), -np.Infinity, dtype = np.float32) ## STAGE 1: Compute the denominator of the softmax - for i in range(groups): + for i in range((seq_length + PE - 1) // PE): + width = seq_length % PE if i * PE + PE > seq_length else PE + # Find the maximum for each row in the current column block (consisting of 16 columns) if integerize: - current_max = np.max(x[..., 0 + i * width:width + i * width].astype(np.int32), axis = -1) + current_max = np.max(x[..., 0 + i * PE:width + i * PE].astype(np.int32), axis = -1) else: - current_max = np.max(x[..., 0 + i * width:width + i * width].astype(np.float32), axis = -1) + current_max = np.max(x[..., 0 + i * PE:width + i * PE].astype(np.float32), axis = -1) # Initialize all shift values for each row to zero if integerize: @@ -129,11 +130,11 @@ def streamingPartialSoftmax(x, integerize = True): # Find the difference between the maximum and x in the current part of the row if integerize: - diff = np.repeat(global_max, width).reshape( - n_heads, seq_length, width) - x[..., 0 + i * width:width + i * width].astype(np.int32) + diff = np.repeat(global_max, width).reshape(n_heads, seq_length, + width) - x[..., 0 + i * PE:width + i * PE].astype(np.int32) else: - diff = np.repeat(global_max, width).reshape( - n_heads, seq_length, width) - x[..., 0 + i * width:width + i * width].astype(np.float32) + diff = np.repeat(global_max, width).reshape(n_heads, seq_length, + width) - x[..., 0 + i * PE:width + i * PE].astype(np.float32) # Shift the values by B-log2B -> multiply by B/2**B = log2e*eps_x # Make sure to do use round-half-up instead of round-half-to-even @@ -177,7 +178,7 @@ def streamingPartialSoftmax(x, integerize = True): # A_partial_softmax[0] = np.repeat(exp_partial_sum_inverse, seq_length).reshape(seq_length, seq_length) >> shift return np.floor( np.repeat(exp_partial_sum_inverse, seq_length).reshape(n_heads, seq_length, seq_length) / 2**shift).astype( - np.int8) + np.uint8) else: return np.repeat(exp_partial_sum_inverse, seq_length).reshape(n_heads, seq_length, seq_length) / 2**shift @@ -195,7 +196,66 @@ def realSoftmax(A_requant, integerize = True): x = A_requant.astype(np.float64) exp = np.exp(x - np.max(x, axis = 2).reshape(n_heads, -1, 1)) + + # Replace nan with zero + exp = np.nan_to_num(exp) + if integerize: return (exp / exp.sum(axis = 2).reshape(n_heads, -1, 1) * (2**7 - 1)).astype(A_requant.dtype) else: return exp / exp.sum(axis = 2).reshape(n_heads, -1, 1) + + +if __name__ == "__main__": + np.set_printoptions(linewidth = 120) + np.set_printoptions(precision = 4) + + # Always print whole array + np.set_printoptions(threshold = np.inf) + + parser = argparse.ArgumentParser(description = "Test Utility for Softmax.") + # Sequence length + parser.add_argument("-S", default = 64, type = int, help = "Sequence length") + + # ITA sequence length + parser.add_argument("-M", default = 64, type = int, help = "ITA sequence length") + + # Quantiztion (float or int) + parser.add_argument("--int", action = "store_true", help = "Quantize to int") + parser.add_argument('--seed', default = 0, type = int, help = 'Random seed') + + args = parser.parse_args() + + ITA_WI = 8 + WO = 26 + ITA_N = 16 + ITA_M = args.M + + if args.seed != -1: + np.random.seed(args.seed) + + if args.int: + x = np.random.randint(-128, 128, (1, 1, args.S, args.S)).astype(np.int8) + else: + x = np.random.randn(1, 1, 16, 16).astype(np.float32) + + print("Input:") + print(x) + + # Pad last two dimensions to be a multiple of ITA_M + pad_x = (ITA_M - x.shape[-1] % ITA_M) % ITA_M + pad_y = (ITA_M - x.shape[-2] % ITA_M) % ITA_M + pad_value = -2**(ITA_WI - 1) if args.int else -np.inf + + print(f"Padding x by ({pad_y}, {pad_x}) with {pad_value}") + x_pad = np.pad(x, ((0, 0), (0, 0), (0, pad_y), (0, pad_x)), mode = 'constant', constant_values = pad_value) + + res = realSoftmax(x, integerize = args.int) + res_pad = realSoftmax(x_pad, integerize = args.int) + + res_unpad = res_pad[:, :, :args.S, :args.S] + + # Compare results + print(f"Equal: {np.allclose(res, res_unpad, atol = 1e-3)}") + print(res) + print(res_unpad) diff --git a/src/ita.sv b/src/ita.sv index 2dad263..ceb302d 100644 --- a/src/ita.sv +++ b/src/ita.sv @@ -171,6 +171,12 @@ module ita assign oup_o = valid_o ? data_from_fifo : '0; + requant_oup_t requant_add_o; + + counter_t inner_tile; + counter_t tile_x; + counter_t tile_y; + ita_controller i_controller ( .clk_i (clk_i ), .rst_ni (rst_ni ), @@ -190,6 +196,11 @@ module ita .calc_en_o (calc_en ), .first_inner_tile_o (first_inner_tile ), .last_inner_tile_o (last_inner_tile ), + .tile_x_o (tile_x ), + .tile_y_o (tile_y ), + .inner_tile_o (inner_tile ), + .requant_add_i (requant_add ), + .requant_add_o (requant_add_o ), .busy_o (busy_o ) ); @@ -255,13 +266,16 @@ module ita .soft_addr_div_o (soft_addr_div ), .softmax_done_o (softmax_done ), .pop_softmax_fifo_o (pop_softmax_fifo ), - .inp_stream_soft_o (inp_stream_soft ) + .inp_stream_soft_o (inp_stream_soft ), + .tile_x_i (tile_x ), + .tile_y_i (tile_y ), + .inner_tile_i (inner_tile ) ); ita_requatization_controller i_requantization_controller ( .ctrl_i (ctrl_i ), - .requantizer_step_i (step_q4 ), + .requantizer_step_i (step_q4 ), .requant_mult_o (requant_mult ), .requant_shift_o (requant_shift ), .requant_add_o (requant_add ), @@ -282,8 +296,8 @@ module ita .calc_en_i ( calc_en_q4 && last_inner_tile_q4 ), .calc_en_q_i ( calc_en_q5 && last_inner_tile_q5 ), - .result_i ( accumulator_oup ), - .add_i ( {N {requant_add}} ), + .result_i ( accumulator_oup ), + .add_i ( requant_add_o ), .requant_oup_o( requant_oup ) ); diff --git a/src/ita_controller.sv b/src/ita_controller.sv index 0fa8034..0c6ebe4 100644 --- a/src/ita_controller.sv +++ b/src/ita_controller.sv @@ -10,24 +10,29 @@ module ita_controller import ita_package::*; ( - input logic clk_i , - input logic rst_ni , - input ctrl_t ctrl_i , - input logic inp_valid_i , - output logic inp_ready_o , - input logic weight_valid_i , - output logic weight_ready_o , - input logic bias_valid_i , - output logic bias_ready_o , - input logic oup_valid_i , - input logic oup_ready_i , - input logic pop_softmax_fifo_i , - output step_e step_o , - input counter_t soft_addr_div_i , - input logic softmax_done_i , - output logic calc_en_o , - output logic first_inner_tile_o , - output logic last_inner_tile_o , + input logic clk_i , + input logic rst_ni , + input ctrl_t ctrl_i , + input logic inp_valid_i , + output logic inp_ready_o , + input logic weight_valid_i , + output logic weight_ready_o , + input logic bias_valid_i , + output logic bias_ready_o , + input logic oup_valid_i , + input logic oup_ready_i , + input logic pop_softmax_fifo_i , + output step_e step_o , + input counter_t soft_addr_div_i , + input logic softmax_done_i , + output logic calc_en_o , + output logic first_inner_tile_o , + output logic last_inner_tile_o , + output counter_t tile_x_o , + output counter_t tile_y_o , + output counter_t inner_tile_o , + input requant_t requant_add_i , + output requant_oup_t requant_add_o , output logic busy_o ); @@ -35,19 +40,27 @@ module ita_controller counter_t count_d, count_q; counter_t tile_d, tile_q; counter_t inner_tile_d, inner_tile_q; + counter_t tile_x_d, tile_x_q; + counter_t tile_y_d, tile_y_q; counter_t softmax_tile_d, softmax_tile_q; ongoing_t ongoing_d, ongoing_q; ongoing_soft_t ongoing_soft_d, ongoing_soft_q; logic softmax_fifo, softmax_div, softmax_div_done_d, softmax_div_done_q, busy_d, busy_q; + requant_oup_t requant_add_d, requant_add_q; assign step_o = step_q; assign busy_o = busy_q; + assign tile_x_o = tile_x_q; + assign tile_y_o = tile_y_q; + assign inner_tile_o = inner_tile_q; always_comb begin count_d = count_q; tile_d = tile_q; inner_tile_d = inner_tile_q; + tile_x_d = tile_x_q; + tile_y_d = tile_y_q; first_inner_tile_o = (inner_tile_q == 0) ? 1'b1 : 1'b0; last_inner_tile_o = 1'b0; ongoing_d = ongoing_q; @@ -59,6 +72,8 @@ module ita_controller step_d = step_q; softmax_tile_d = softmax_tile_q; softmax_div_done_d = softmax_div_done_q; + requant_add_d = {N {requant_add_i}}; + requant_add_o = requant_add_q; busy_d = busy_q; softmax_fifo = 1'b0; @@ -108,6 +123,8 @@ module ita_controller case (step_q) Idle : begin inner_tile_d = '0; + tile_x_d = '0; + tile_y_d = '0; tile_d = '0; softmax_tile_d = '0; softmax_div_done_d = 1'b0; @@ -128,12 +145,33 @@ module ita_controller Q : begin if (inner_tile_q == ctrl_i.tile_e-1) begin last_inner_tile_o = 1'b1; + if ( ( ((count_q & (M-1)) + tile_y_q * M) > ( (ctrl_i.seq_length - 1) ) ) ) begin + requant_add_d = {N {1'b0}}; + end else begin + if ( (count_q + tile_x_q * M*M/N) >= (ctrl_i.proj_space / N) * M ) begin + if ( ((count_q / M ) * N + tile_x_q * M ) < ctrl_i.proj_space) begin + for (int i = (ctrl_i.proj_space & (N-1)); i < N; i++) begin + requant_add_d[i] = 1'b0; + end + end else begin + requant_add_d = {N {1'b0}}; + end + end + end end if (inner_tile_d == ctrl_i.tile_e) begin // end of inner tile inner_tile_d = '0; tile_d = tile_q + 1; + if (tile_x_q == (ctrl_i.tile_p-1)) begin // end of step Q + tile_x_d = '0; + tile_y_d = tile_y_q + 1; + end else begin + tile_x_d = tile_x_q + 1; + end if (tile_d == ctrl_i.tile_s*ctrl_i.tile_p) begin // end of step Q tile_d = '0; + tile_x_d = '0; + tile_y_d = '0; step_d = K; end end @@ -141,12 +179,33 @@ module ita_controller K: begin if (inner_tile_q == ctrl_i.tile_e-1) begin last_inner_tile_o = 1'b1; + if ( ( ((count_q & (M-1)) + tile_y_q * M) > ( (ctrl_i.seq_length - 1) ) ) ) begin + requant_add_d = {N {1'b0}}; + end else begin + if ( (count_q + tile_x_q * M*M/N) >= (ctrl_i.proj_space / N) * M ) begin + if ( ((count_q / M ) * N + tile_x_q * M ) < ctrl_i.proj_space) begin + for (int i = (ctrl_i.proj_space & (N-1)); i < N; i++) begin + requant_add_d[i] = 1'b0; + end + end else begin + requant_add_d = {N {1'b0}}; + end + end + end end if (inner_tile_d == ctrl_i.tile_e) begin // end of inner tile inner_tile_d = '0; tile_d = tile_q + 1; + if (tile_x_q == (ctrl_i.tile_p-1)) begin // end of step Q + tile_x_d = '0; + tile_y_d = tile_y_q + 1; + end else begin + tile_x_d = tile_x_q + 1; + end if (tile_d == ctrl_i.tile_s*ctrl_i.tile_p) begin // end of step K tile_d = '0; + tile_x_d = '0; + tile_y_d = '0; step_d = V; end end @@ -154,12 +213,33 @@ module ita_controller V: begin if (inner_tile_q == ctrl_i.tile_e-1) begin last_inner_tile_o = 1'b1; + if ( ( ((count_q & (M-1)) + tile_y_q * M) > ( (ctrl_i.proj_space - 1) ) ) ) begin + requant_add_d = {N {1'b0}}; + end else begin + if ( (count_q + tile_x_q * M*M/N) >= (ctrl_i.seq_length / N) * M ) begin + if ( ((count_q / M ) * N + tile_x_q * M ) < ctrl_i.seq_length) begin + for (int i = (ctrl_i.seq_length & (N-1)); i < N; i++) begin + requant_add_d[i] = 1'b0; + end + end else begin + requant_add_d = {N {1'b0}}; + end + end + end end if (inner_tile_d == ctrl_i.tile_e) begin // end of inner tile inner_tile_d = '0; tile_d = tile_q + 1; + if (tile_x_q == (ctrl_i.tile_s-1)) begin // end of step Q + tile_x_d = '0; + tile_y_d = tile_y_q + 1; + end else begin + tile_x_d = tile_x_q + 1; + end if (tile_d == ctrl_i.tile_s*ctrl_i.tile_p) begin // end of step V tile_d = '0; + tile_x_d = '0; + tile_y_d = '0; step_d = QK; end end @@ -167,10 +247,28 @@ module ita_controller QK : begin if (inner_tile_q == ctrl_i.tile_p-1) begin last_inner_tile_o = 1'b1; + if ( ( ((count_q & (M-1)) + tile_y_q * M) > ( (ctrl_i.seq_length - 1) ) ) ) begin + requant_add_d = {N {1'b0}}; + end else begin + if ( (count_q + tile_x_q * M*M/N) >= (ctrl_i.seq_length / N) * M ) begin + if ( ((count_q / M ) * N + tile_x_q * M ) < ctrl_i.seq_length) begin + for (int i = (ctrl_i.seq_length & (N-1)); i < N; i++) begin + requant_add_d[i] = 1'b0; + end + end else begin + requant_add_d = {N {1'b0}}; + end + end + end end if (inner_tile_d == ctrl_i.tile_p) begin // end of inner tile inner_tile_d = '0; tile_d = tile_q + 1; + if (tile_x_q == (ctrl_i.tile_s-1)) begin // end of step Q + tile_x_d = '0; + end else begin + tile_x_d = tile_x_q + 1; + end if (tile_d == ctrl_i.tile_s) begin // end of step QK tile_d = '0; step_d = AV; @@ -180,21 +278,42 @@ module ita_controller AV : begin if (inner_tile_q == ctrl_i.tile_s-1) begin last_inner_tile_o = 1'b1; + if ( ( ((count_q & (M-1)) + tile_y_q * M) > ( (ctrl_i.seq_length - 1) ) ) ) begin + requant_add_d = {N {1'b0}}; + end else begin + if ( (count_q + tile_x_q * M*M/N) >= (ctrl_i.proj_space / N) * M ) begin + if ( ((count_q / M ) * N + tile_x_q * M ) < ctrl_i.proj_space) begin + for (int i = (ctrl_i.proj_space & (N-1)); i < N; i++) begin + requant_add_d[i] = 1'b0; + end + end else begin + requant_add_d = {N {1'b0}}; + end + end + end end if (inner_tile_d == ctrl_i.tile_s) begin // end of inner tile inner_tile_d = '0; tile_d = tile_q + 1; + if (tile_x_q == (ctrl_i.tile_p-1)) begin // end of step Q + tile_x_d = '0; + end else begin + tile_x_d = tile_x_q + 1; + end if (tile_d == ctrl_i.tile_p) begin tile_d = '0; softmax_tile_d = softmax_tile_q + 1; if (softmax_tile_d == ctrl_i.tile_s) begin softmax_tile_d = '0; + tile_x_d = '0; + tile_y_d = '0; if (ctrl_i.layer == Attention) begin step_d = OW; end else if (ctrl_i.layer == SingleAttention) begin step_d = Idle; end end else begin + tile_y_d = tile_y_q + 1; step_d = QK; end end @@ -203,12 +322,33 @@ module ita_controller OW : begin if (inner_tile_q == ctrl_i.tile_p-1) begin last_inner_tile_o = 1'b1; + if ( ( ((count_q & (M-1)) + tile_y_q * M) > ( (ctrl_i.seq_length - 1) ) ) ) begin + requant_add_d = {N {1'b0}}; + end else begin + if ( (count_q + tile_x_q * M*M/N) >= (ctrl_i.embed_size / N) * M ) begin + if ( ((count_q / M ) * N + tile_x_q * M ) < ctrl_i.embed_size) begin + for (int i = (ctrl_i.embed_size & (N-1)); i < N; i++) begin + requant_add_d[i] = 1'b0; + end + end else begin + requant_add_d = {N {1'b0}}; + end + end + end end if (inner_tile_d == ctrl_i.tile_p) begin // end of inner tile inner_tile_d = '0; tile_d = tile_q + 1; + if (tile_x_q == (ctrl_i.tile_e-1)) begin // end of step Q + tile_x_d = '0; + tile_y_d = tile_y_q + 1; + end else begin + tile_x_d = tile_x_q + 1; + end if (tile_d == ctrl_i.tile_s*ctrl_i.tile_e) begin // end of step OW tile_d = '0; + tile_x_d = '0; + tile_y_d = '0; step_d = Idle; end end @@ -281,16 +421,20 @@ module ita_controller ongoing_q <= '0; ongoing_soft_q <= '0; softmax_div_done_q <= 1'b0; + requant_add_q <= '0; busy_q <= 1'b0; end else begin step_q <= step_d; count_q <= count_d; tile_q <= tile_d; + tile_x_q <= tile_x_d; + tile_y_q <= tile_y_d; inner_tile_q <= inner_tile_d; softmax_tile_q <= softmax_tile_d; ongoing_q <= ongoing_d; ongoing_soft_q <= ongoing_soft_d; softmax_div_done_q <= softmax_div_done_d; + requant_add_q <= requant_add_d; busy_q <= busy_d; end end diff --git a/src/ita_package.sv b/src/ita_package.sv index 335e173..04d0d53 100644 --- a/src/ita_package.sv +++ b/src/ita_package.sv @@ -45,13 +45,15 @@ package ita_package; typedef logic [N_REQUANT_CONSTS-1:0][EMS-1:0] requant_const_array_t; typedef logic signed [WI-1:0] requant_t; typedef logic signed [N_REQUANT_CONSTS-1:0][WI-1:0] requant_array_t; - typedef logic [idx_width(S+1)-1:0] seq_length_t; - typedef logic [idx_width(P+1)-1:0] proj_space_t; - typedef logic [idx_width(E+1)-1:0] embed_size_t; - typedef logic [idx_width(H+1)-1:0] n_heads_t; + typedef logic [WO-WI*2-2:0] seq_length_t; + typedef logic [WO-WI*2-2:0] proj_space_t; + typedef logic [WO-WI*2-2:0] embed_size_t; typedef logic [ 32-1:0] tile_t; typedef struct packed { logic start ; + seq_length_t seq_length ; + proj_space_t proj_space ; + embed_size_t embed_size ; layer_e layer ; activation_e activation ; requant_const_array_t eps_mult ; diff --git a/src/ita_requantization_controller.sv b/src/ita_requantization_controller.sv index 3b6c865..e844358 100644 --- a/src/ita_requantization_controller.sv +++ b/src/ita_requantization_controller.sv @@ -33,6 +33,12 @@ module ita_requatization_controller endcase end + // always_comb begin + // requant_mult = ctrl_i.eps_mult[step_q4]; + // requant_shift = ctrl_i.right_shift[step_q4]; + // requant_add = ctrl_i.add[step]; + // end + assign requant_mult_o = ctrl_i.eps_mult[constant_idx]; assign requant_shift_o = ctrl_i.right_shift[constant_idx]; assign requant_add_o = ctrl_i.add[constant_idx]; diff --git a/src/ita_requantizer.sv b/src/ita_requantizer.sv index 6033c09..c67b97c 100644 --- a/src/ita_requantizer.sv +++ b/src/ita_requantizer.sv @@ -22,7 +22,8 @@ module ita_requantizer logic signed [EMS+WO:0] product ; logic signed [EMS+WO:0] shifted_added; logic signed [ N-1:0][EMS+WO-1:0] shifted_d, shifted_q; - requant_oup_t add_q1, requant_oup_d, requant_oup_q; + requant_oup_t add_q1, add_q2, add_q3, add_q4; + requant_oup_t requant_oup_d, requant_oup_q; assign requant_oup_o = requant_oup_q; @@ -49,7 +50,7 @@ module ita_requantizer end end if (calc_en_q_i) begin - shifted_added = shifted_q[i] + (EMS+WO)'(signed'(add_q1[i])); + shifted_added = shifted_q[i] + (EMS+WO)'(signed'(add_q4[i])); requant_oup_d[i] = shifted_added[WI-1:0]; if (~shifted_added[EMS+WO-1] & (|(shifted_added[EMS+WO-2:WI-1]))) begin requant_oup_d[i] = '1; @@ -76,8 +77,14 @@ module ita_requantizer always_ff @(posedge clk_i, negedge rst_ni) begin if (!rst_ni) begin add_q1 <= '0; + add_q2 <= '0; + add_q3 <= '0; + add_q4 <= '0; end else begin add_q1 <= add_i; + add_q2 <= add_q1; + add_q3 <= add_q2; + add_q4 <= add_q3; end end endmodule diff --git a/src/ita_softmax.sv b/src/ita_softmax.sv index 675750c..ac61ed2 100644 --- a/src/ita_softmax.sv +++ b/src/ita_softmax.sv @@ -39,14 +39,19 @@ module ita_softmax input requant_t [1:0] read_max_data_i, output logic write_max_en_o, output logic [InputAddrWidth-1:0] write_max_addr_o, - output requant_t write_max_data_o + output requant_t write_max_data_o, + input counter_t tile_x_i, + input counter_t tile_y_i, + input counter_t inner_tile_i ); counter_t tile_d, tile_q1, tile_q2, tile_q3, tile_q4; counter_t count_d, count_q1, count_q2, count_q3, count_q4; + counter_t inner_tile_q; + counter_t tile_y_q; logic unsigned [SoftmaxAccDataWidth-1:0] exp_sum_d, exp_sum_q; - counter_t count_soft_d, count_soft_q; + counter_t count_soft_d, count_soft_q1, count_soft_q2; counter_t count_div_d, count_div_q, addr_div_d, addr_div_q; logic [NumDiv-1:0] div_read_d, div_read_q, div_write_d, div_write_q; @@ -69,13 +74,19 @@ module ita_softmax logic [SoftmaxAccDataWidth-1:0] data_to_fifo, data_from_fifo; soft_fifo_usage_t fifo_usage ; + logic [N-1:0] disable_shift; + logic disable_row; + logic [M-1:0]disable_col; + + assign disable_row = ((count_soft_q2 & (M-1)) + tile_y_q * M) > (ctrl_i.seq_length - 1); + assign pop_softmax_fifo_o = pop_from_fifo; assign soft_addr_div_o = addr_div_q; always_comb begin tile_d = tile_q1; count_d = count_q1; - count_soft_d = count_soft_q; + count_soft_d = count_soft_q1; count_div_d = count_div_q; div_read_d = div_read_q; div_write_d = div_write_q; @@ -135,13 +146,20 @@ module ita_softmax //************ Pipeline Stage 1 ************// if (calc_en_q1) begin // Find max and accumulate - max_o = requant_oup_q; max_d = max_i; for (int i = 0; i < N; i++) begin shift_diff[i] = max_i - requant_oup_q[i]; - shift_d[i] = unsigned'(shift_diff[i]) >> 5; - if (shift_diff[i][4]) - shift_d[i] = (unsigned'(shift_diff[i]) >> 5) + 1; + disable_shift[i] = ( (tile_q2*M+N*(count_q2 >> $clog2(M))+i ) >= ctrl_i.seq_length); + + if (disable_shift[i]) begin + max_o[i] = 8'h80; + shift_d[i] = 4'hF; + end else begin + max_o[i] = requant_oup_q[i]; + shift_d[i] = unsigned'(shift_diff[i]) >> 5; + if (shift_diff[i][4]) + shift_d[i] = (unsigned'(shift_diff[i]) >> 5) + 1; + end end if (tile_q2 != '0 || count_q2>=M) begin // If not first part of the first row, normalize previous sum read_acc_en_o[0] = 1; @@ -162,7 +180,8 @@ module ita_softmax write_max_addr_o = count_q3; write_max_data_o = max_q; for (int i = 0; i < N; i++) begin - exp_sum_d += unsigned'(9'h100)>>shift_q[i]; + if (shift_d[i] != 4'hF) + exp_sum_d += unsigned'(9'h100)>>shift_q[i]; end if (tile_q3 != '0 || count_q3>=M) begin // If not first part of the first row exp_sum_d += ( unsigned'(read_acc_data_i[0]) >> shift_sum_q); @@ -211,28 +230,39 @@ module ita_softmax //*********** Stream Softmax ***********// // Main controller checks if division is ready if (calc_stream_soft_en_i) begin - count_soft_d = count_soft_q + 1; + count_soft_d = count_soft_q1 + 1; read_acc_en_o[1] = 1; - read_acc_addr_o[1] = count_soft_q[5:0]; + read_acc_addr_o[1] = count_soft_q1[5:0]; read_max_en_o[1] = 1; - read_max_addr_o[1] = count_soft_q[5:0]; + read_max_addr_o[1] = count_soft_q1[5:0]; if (count_soft_d == M*M/N) begin count_soft_d = '0; end end if (calc_stream_soft_en_q) begin - for (int i = 0; i < M; i++) begin - shift_inp_diff[i] = read_max_data_i[1]-inp_i[i]; - shift_inp[i] = unsigned'(shift_inp_diff[i]) >> 5; - if (shift_inp_diff[i][4]) - shift_inp[i] = (unsigned'(shift_inp_diff[i]) >> 5) + 1; - inp_stream_soft_o[i] = read_acc_data_i[1] >> shift_inp[i]; + if (disable_row) begin + inp_stream_soft_o = { M { '0 } }; + end else begin + for (int i = 0; i < M; i++) begin + disable_col[i] = ((inner_tile_q*M + i) >= ctrl_i.seq_length); + if (disable_col[i]) begin + inp_stream_soft_o[i] = '0; + end else begin + shift_inp_diff[i] = read_max_data_i[1]-inp_i[i]; + shift_inp[i] = unsigned'(shift_inp_diff[i]) >> 5; + if (shift_inp_diff[i][4]) + shift_inp[i] = (unsigned'(shift_inp_diff[i]) >> 5) + 1; + inp_stream_soft_o[i] = read_acc_data_i[1] >> shift_inp[i]; + end + end end end end always_ff @(posedge clk_i or negedge rst_ni) begin if(~rst_ni) begin + inner_tile_q <= '0; + tile_y_q <= '0; tile_q4 <= '0; tile_q3 <= '0; tile_q2 <= '0; @@ -240,8 +270,9 @@ module ita_softmax count_q4 <= M*M/N; count_q3 <= M*M/N; count_q2 <= M*M/N; - count_q1 <= M*M/N; - count_soft_q <= '0; + count_q1 <= M*M/N; + count_soft_q1 <= '0; + count_soft_q2 <= '0; count_div_q <= '0; div_read_q <= '0; div_write_q <= '0; @@ -253,6 +284,8 @@ module ita_softmax shift_q <= '0; shift_sum_q <= '0; end else begin + inner_tile_q <= inner_tile_i; + tile_y_q <= tile_y_i; tile_q4 <= tile_q3; tile_q3 <= tile_q2; tile_q2 <= tile_q1; @@ -261,7 +294,8 @@ module ita_softmax count_q3 <= count_q2; count_q2 <= count_q1; count_q1 <= count_d; - count_soft_q <= count_soft_d; + count_soft_q1 <= count_soft_d; + count_soft_q2 <= count_soft_q1; count_div_q <= count_div_d; div_read_q <= div_read_d; div_write_q <= div_write_d; diff --git a/src/ita_softmax_top.sv b/src/ita_softmax_top.sv index e44fb4f..df2b421 100644 --- a/src/ita_softmax_top.sv +++ b/src/ita_softmax_top.sv @@ -19,7 +19,11 @@ module ita_softmax_top output counter_t soft_addr_div_o , output logic softmax_done_o , output logic pop_softmax_fifo_o , - output inp_t inp_stream_soft_o + output inp_t inp_stream_soft_o , + input counter_t tile_x_i , + input counter_t tile_y_i , + input counter_t inner_tile_i + ); logic [1:0] read_acc_en; @@ -113,7 +117,11 @@ module ita_softmax_top .write_max_en_o (write_max_en ), .write_max_addr_o (write_max_addr ), - .write_max_data_o (write_max_data ) + .write_max_data_o (write_max_data ), + + .tile_x_i (tile_x_i ), + .tile_y_i (tile_y_i ), + .inner_tile_i (inner_tile_i ) ); ita_register_file_1w_multi_port_read #( diff --git a/src/tb/ita_tb.sv b/src/tb/ita_tb.sv index e8f84a6..6d0f9fe 100644 --- a/src/tb/ita_tb.sv +++ b/src/tb/ita_tb.sv @@ -91,9 +91,10 @@ module ita_tb; "_", $sformatf( "%s", ACTIVATION) }; - N_TILES_SEQUENCE_DIM = SEQUENCE_LEN / M_TILE_LEN; - N_TILES_EMBEDDING_DIM = EMBEDDING_SIZE / M_TILE_LEN; - N_TILES_PROJECTION_DIM = PROJECTION_SPACE / M_TILE_LEN; + // Round up + N_TILES_SEQUENCE_DIM = (SEQUENCE_LEN + M_TILE_LEN -1 ) / M_TILE_LEN; + N_TILES_EMBEDDING_DIM = (EMBEDDING_SIZE+ M_TILE_LEN -1 ) / M_TILE_LEN; + N_TILES_PROJECTION_DIM = (PROJECTION_SPACE + M_TILE_LEN -1 ) / M_TILE_LEN; N_TILES_LINEAR_PROJECTION = N_TILES_SEQUENCE_DIM * N_TILES_EMBEDDING_DIM * N_TILES_PROJECTION_DIM; N_TILES_ATTENTION = N_TILES_SEQUENCE_DIM * N_TILES_PROJECTION_DIM; N_ENTRIES_PER_TILE = M_TILE_LEN * M_TILE_LEN / N_PE; @@ -489,6 +490,9 @@ task automatic apply_ITA_weights(input integer phase); ita_ctrl.tile_p = N_TILES_PROJECTION_DIM; ita_ctrl.tile_s = N_TILES_SEQUENCE_DIM; ita_ctrl.tile_f = N_TILES_FEEDFORWARD; + ita_ctrl.seq_length = SEQUENCE_LEN; + ita_ctrl.proj_space = PROJECTION_SPACE; + ita_ctrl.embed_size = EMBEDDING_SIZE; read_activation_constants(ita_ctrl.gelu_b, ita_ctrl.gelu_c, ita_ctrl.activation_requant_mult, ita_ctrl.activation_requant_shift, ita_ctrl.activation_requant_add); From 7d07f8436b0a97d6acffd13e8ec222ebc556b086 Mon Sep 17 00:00:00 2001 From: Philip Wiese Date: Tue, 24 Sep 2024 19:52:11 +0200 Subject: [PATCH 02/28] [change] Speedup CI by removing Python Dependencies --- README.md | 1 + requirements.dev.txt | 7 +++++++ requirements.txt | 3 --- 3 files changed, 8 insertions(+), 3 deletions(-) create mode 100644 requirements.dev.txt diff --git a/README.md b/README.md index 3a29e23..b58d20e 100644 --- a/README.md +++ b/README.md @@ -54,6 +54,7 @@ To install the required Python packages, create a virtual environment. Make sure $> python -m venv venv $> source venv/bin/activate $> pip install -r requirements.txt +$> pip install -r requirements.dev.txt # Only required for PyITA/test_gelu.py ``` If you want to enable pre-commit hooks, which perform code formatting and linting, run the following command: diff --git a/requirements.dev.txt b/requirements.dev.txt new file mode 100644 index 0000000..faa86d4 --- /dev/null +++ b/requirements.dev.txt @@ -0,0 +1,7 @@ +# Copyright 2023 ETH Zurich and University of Bologna. +# Licensed under the Apache License, Version 2.0, see LICENSE for details. +# SPDX-License-Identifier: Apache-2.0 + +torch +pytest +pytest-check diff --git a/requirements.txt b/requirements.txt index 4bfbc47..e8e03c9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,7 +8,4 @@ onnxruntime netron seaborn matplotlib -torch -pytest -pytest-check pre-commit From 24015877882d8021b8e9d90defead7f3244fd75e Mon Sep 17 00:00:00 2001 From: Marcel Kant Date: Sun, 13 Oct 2024 14:09:55 +0200 Subject: [PATCH 03/28] Added debug.py to print matrices --- .gitlab-ci.yml | 13 ++++----- PyITA/debug.py | 52 ++++++++++++++++++++++++++++++++++++ modelsim/sim_ita_tb_wave.tcl | 1 + 3 files changed, 60 insertions(+), 6 deletions(-) create mode 100644 PyITA/debug.py diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 2b214ff..31832bc 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -26,11 +26,12 @@ generate_testvectors: stage: test script: - !reference [.setup_test, script] - - python testGenerator.py -H 1 -S 64 -E 64 -P 64 -F 64 --activation gelu - - python testGenerator.py -H 1 -S 128 -E 192 -P 256 -F 256 --activation gelu - - python testGenerator.py -H 1 -S 192 -E 256 -P 128 -F 128 --activation relu - # - python testGenerator.py -H 1 -S 1 -E 2 -P 3 -F 3 --activation gelu --no-bias - - python testGenerator.py -H 1 -S 1 -E 2 -P 3 -F 3 --activation relu --no-bias + - python testGenerator.py -H 1 -S 128 -E 128 -P 192 -F 256 --activation identity --no-bias + # - python testGenerator.py -H 1 -S 64 -E 128 -P 192 -F 256 --activation identity --no-bias + # - python testGenerator.py -H 1 -S 128 -E 192 -P 256 -F 256 --activation gelu + # - python testGenerator.py -H 1 -S 192 -E 256 -P 128 -F 128 --activation relu + - python testGenerator.py -H 1 -S 1 -E 2 -P 3 -F 3 --activation gelu --no-bias + # - python testGenerator.py -H 1 -S 1 -E 2 -P 3 -F 3 --activation relu --no-bias - python testGenerator.py -H 1 -S 63 -E 62 -P 61 -F 61 --activation relu --no-bias # - python testGenerator.py -H 1 -S 65 -E 130 -P 195 -F 195 --activation relu --no-bias # - python testGenerator.py -H 1 -S 127 -E 190 -P 253 -F 253 --activation relu --no-bias @@ -126,7 +127,7 @@ run_sim_padding: # F: 195 # activation: relu # - S: 127 - # E: 190 + # E: 190gelu # P: 253 # F: 253 # activation: relu diff --git a/PyITA/debug.py b/PyITA/debug.py new file mode 100644 index 0000000..b30b1ac --- /dev/null +++ b/PyITA/debug.py @@ -0,0 +1,52 @@ +import os +import numpy as np +import seaborn as sns +import matplotlib.pyplot as plt + + +def print_matrix(from_txt: bool, matrix: np.array = None, + txt_file: str = 'Out_soft_0.txt', + test_vector: str = 'data_S128_E128_P192_F256_H1_B0', + row: int = 128, col: int = 128): + if (from_txt): + current_dir = os.path.dirname(os.path.abspath(__file__)) + filepath = os.path.join(os.path.dirname(current_dir), + 'simvectors', + test_vector, + 'standalone', + txt_file) + + array = np.loadtxt(filepath) + matrix = array.reshape(row, col) + + sns.set_theme() + sns.heatmap(matrix, annot=False, linewidths=0, linecolor='white', cmap='crest', xticklabels=False, yticklabels=False) + plt.show() + +# row = 128 +# col = 128 + +# txt_file = 'Out_soft_0.txt' +# test_vector = 'data_S128_E128_P192_F256_H1_B0' + +# current_dir = os.path.dirname(os.path.abspath(__file__)) +# filepath = os.path.join(os.path.dirname(current_dir), +# 'simvectors', +# test_vector, +# 'standalone', +# txt_file) + +# print(filepath) +# print(os.path.exists(filepath)) + +# array = np.loadtxt(filepath) +# matrix = array.reshape(row, col) + +# sns.set_theme() +# sns.heatmap(matrix, annot=False, linewidths=0, linecolor='white', cmap='crest', xticklabels=False, yticklabels=False) +# plt.show() + + + + + diff --git a/modelsim/sim_ita_tb_wave.tcl b/modelsim/sim_ita_tb_wave.tcl index 4f29360..ded354f 100644 --- a/modelsim/sim_ita_tb_wave.tcl +++ b/modelsim/sim_ita_tb_wave.tcl @@ -11,6 +11,7 @@ add wave -noupdate /ita_tb/dut/i_inp2_mux/rst_ni add wave -noupdate /ita_tb/dut/i_inp2_mux/weight_i add wave -noupdate /ita_tb/dut/i_inp2_mux/inp2_o add wave -noupdate /ita_tb/dut/i_controller/ctrl_i +add wave -noupdate /ita_tb/dut/oup_o add wave -noupdate -group Requantizer /ita_tb/dut/i_requantizer/* add wave -expand -group Controller /ita_tb/dut/i_controller/* add wave -group {Softmax Controller} ita_tb/dut/i_softmax_top/i_softmax/* From f6d1100cc181e9f733654101b7851dece0f12737 Mon Sep 17 00:00:00 2001 From: Marcel Kant Date: Sun, 13 Oct 2024 20:12:23 +0200 Subject: [PATCH 04/28] Small changes in the debug.py file --- PyITA/debug.py | 26 +++----------------------- 1 file changed, 3 insertions(+), 23 deletions(-) diff --git a/PyITA/debug.py b/PyITA/debug.py index b30b1ac..60b875e 100644 --- a/PyITA/debug.py +++ b/PyITA/debug.py @@ -6,8 +6,9 @@ def print_matrix(from_txt: bool, matrix: np.array = None, txt_file: str = 'Out_soft_0.txt', - test_vector: str = 'data_S128_E128_P192_F256_H1_B0', + test_vector: str = 'data_S30_E30_P50_F64_H1_B0', row: int = 128, col: int = 128): + if (from_txt): current_dir = os.path.dirname(os.path.abspath(__file__)) filepath = os.path.join(os.path.dirname(current_dir), @@ -23,28 +24,7 @@ def print_matrix(from_txt: bool, matrix: np.array = None, sns.heatmap(matrix, annot=False, linewidths=0, linecolor='white', cmap='crest', xticklabels=False, yticklabels=False) plt.show() -# row = 128 -# col = 128 - -# txt_file = 'Out_soft_0.txt' -# test_vector = 'data_S128_E128_P192_F256_H1_B0' - -# current_dir = os.path.dirname(os.path.abspath(__file__)) -# filepath = os.path.join(os.path.dirname(current_dir), -# 'simvectors', -# test_vector, -# 'standalone', -# txt_file) - -# print(filepath) -# print(os.path.exists(filepath)) - -# array = np.loadtxt(filepath) -# matrix = array.reshape(row, col) - -# sns.set_theme() -# sns.heatmap(matrix, annot=False, linewidths=0, linecolor='white', cmap='crest', xticklabels=False, yticklabels=False) -# plt.show() +# print_matrix(from_txt=True, txt_file="A_soft_0.txt") From c80f81f29bb011a65d884f07293cf4650aa15f18 Mon Sep 17 00:00:00 2001 From: Marcel Kant Date: Tue, 15 Oct 2024 18:55:05 +0200 Subject: [PATCH 05/28] Small changes in ita_controller.sv --- PyITA/debug.py | 15 +++-- src/ita_controller.sv | 136 +++++++++++++----------------------------- 2 files changed, 51 insertions(+), 100 deletions(-) diff --git a/PyITA/debug.py b/PyITA/debug.py index 60b875e..80d1e02 100644 --- a/PyITA/debug.py +++ b/PyITA/debug.py @@ -4,10 +4,12 @@ import matplotlib.pyplot as plt -def print_matrix(from_txt: bool, matrix: np.array = None, - txt_file: str = 'Out_soft_0.txt', - test_vector: str = 'data_S30_E30_P50_F64_H1_B0', - row: int = 128, col: int = 128): +def print_matrix(from_txt: bool, + cut: bool = False, + matrix: np.array = None, + txt_file: str = 'Qp_0.txt', + test_vector: str = 'data_S32_E32_P32_F64_H1_B1', + row: int = 64, col: int = 64): if (from_txt): current_dir = os.path.dirname(os.path.abspath(__file__)) @@ -18,10 +20,15 @@ def print_matrix(from_txt: bool, matrix: np.array = None, txt_file) array = np.loadtxt(filepath) + if (cut): + array = array[:4096] matrix = array.reshape(row, col) sns.set_theme() sns.heatmap(matrix, annot=False, linewidths=0, linecolor='white', cmap='crest', xticklabels=False, yticklabels=False) + plt.title(txt_file) + plt.xlabel(col) + plt.ylabel(row) plt.show() # print_matrix(from_txt=True, txt_file="A_soft_0.txt") diff --git a/src/ita_controller.sv b/src/ita_controller.sv index 0c6ebe4..fb3d534 100644 --- a/src/ita_controller.sv +++ b/src/ita_controller.sv @@ -46,6 +46,9 @@ module ita_controller ongoing_t ongoing_d, ongoing_q; ongoing_soft_t ongoing_soft_d, ongoing_soft_q; + tile_t inner_tile_dim; + logic [WO-WI*2-2:0] first_outer_dim, second_outer_dim; + logic softmax_fifo, softmax_div, softmax_div_done_d, softmax_div_done_q, busy_d, busy_q; requant_oup_t requant_add_d, requant_add_q; @@ -89,6 +92,25 @@ module ita_controller busy_d = 1'b1; end + if (step_q != Idle && step_q != F1 && step_q != F2 && step_q != MatMul) begin + if (inner_tile_q == inner_tile_dim) begin + last_inner_tile_o = 1'b1; + if ( ( ((count_q & (M-1)) + tile_y_q * M) > ( (first_outer_dim - 1) ) ) ) begin + requant_add_d = {N {1'b0}}; + end else begin + if ( (count_q + tile_x_q * M*M/N) >= (second_outer_dim / N) * M ) begin + if ( ((count_q / M ) * N + tile_x_q * M ) < second_outer_dim) begin + for (int i = (second_outer_dim & (N-1)); i < N; i++) begin + requant_add_d[i] = 1'b0; + end + end else begin + requant_add_d = {N {1'b0}}; + end + end + end + end + end + // default handshake if (step_q != Idle) begin // Check if division for softmax is going to FIFO @@ -143,22 +165,9 @@ module ita_controller end // Attention Q : begin - if (inner_tile_q == ctrl_i.tile_e-1) begin - last_inner_tile_o = 1'b1; - if ( ( ((count_q & (M-1)) + tile_y_q * M) > ( (ctrl_i.seq_length - 1) ) ) ) begin - requant_add_d = {N {1'b0}}; - end else begin - if ( (count_q + tile_x_q * M*M/N) >= (ctrl_i.proj_space / N) * M ) begin - if ( ((count_q / M ) * N + tile_x_q * M ) < ctrl_i.proj_space) begin - for (int i = (ctrl_i.proj_space & (N-1)); i < N; i++) begin - requant_add_d[i] = 1'b0; - end - end else begin - requant_add_d = {N {1'b0}}; - end - end - end - end + inner_tile_dim = ctrl_i.tile_e-1; + first_outer_dim = ctrl_i.seq_length; + second_outer_dim = ctrl_i.proj_space; if (inner_tile_d == ctrl_i.tile_e) begin // end of inner tile inner_tile_d = '0; tile_d = tile_q + 1; @@ -177,22 +186,9 @@ module ita_controller end end K: begin - if (inner_tile_q == ctrl_i.tile_e-1) begin - last_inner_tile_o = 1'b1; - if ( ( ((count_q & (M-1)) + tile_y_q * M) > ( (ctrl_i.seq_length - 1) ) ) ) begin - requant_add_d = {N {1'b0}}; - end else begin - if ( (count_q + tile_x_q * M*M/N) >= (ctrl_i.proj_space / N) * M ) begin - if ( ((count_q / M ) * N + tile_x_q * M ) < ctrl_i.proj_space) begin - for (int i = (ctrl_i.proj_space & (N-1)); i < N; i++) begin - requant_add_d[i] = 1'b0; - end - end else begin - requant_add_d = {N {1'b0}}; - end - end - end - end + inner_tile_dim = ctrl_i.tile_e-1; + first_outer_dim = ctrl_i.seq_length; + second_outer_dim = ctrl_i.proj_space; if (inner_tile_d == ctrl_i.tile_e) begin // end of inner tile inner_tile_d = '0; tile_d = tile_q + 1; @@ -211,22 +207,9 @@ module ita_controller end end V: begin - if (inner_tile_q == ctrl_i.tile_e-1) begin - last_inner_tile_o = 1'b1; - if ( ( ((count_q & (M-1)) + tile_y_q * M) > ( (ctrl_i.proj_space - 1) ) ) ) begin - requant_add_d = {N {1'b0}}; - end else begin - if ( (count_q + tile_x_q * M*M/N) >= (ctrl_i.seq_length / N) * M ) begin - if ( ((count_q / M ) * N + tile_x_q * M ) < ctrl_i.seq_length) begin - for (int i = (ctrl_i.seq_length & (N-1)); i < N; i++) begin - requant_add_d[i] = 1'b0; - end - end else begin - requant_add_d = {N {1'b0}}; - end - end - end - end + inner_tile_dim = ctrl_i.tile_e-1; + first_outer_dim = ctrl_i.proj_space; + second_outer_dim = ctrl_i.seq_length; if (inner_tile_d == ctrl_i.tile_e) begin // end of inner tile inner_tile_d = '0; tile_d = tile_q + 1; @@ -245,22 +228,9 @@ module ita_controller end end QK : begin - if (inner_tile_q == ctrl_i.tile_p-1) begin - last_inner_tile_o = 1'b1; - if ( ( ((count_q & (M-1)) + tile_y_q * M) > ( (ctrl_i.seq_length - 1) ) ) ) begin - requant_add_d = {N {1'b0}}; - end else begin - if ( (count_q + tile_x_q * M*M/N) >= (ctrl_i.seq_length / N) * M ) begin - if ( ((count_q / M ) * N + tile_x_q * M ) < ctrl_i.seq_length) begin - for (int i = (ctrl_i.seq_length & (N-1)); i < N; i++) begin - requant_add_d[i] = 1'b0; - end - end else begin - requant_add_d = {N {1'b0}}; - end - end - end - end + inner_tile_dim = ctrl_i.tile_p-1; + first_outer_dim = ctrl_i.seq_length; + second_outer_dim = ctrl_i.seq_length; if (inner_tile_d == ctrl_i.tile_p) begin // end of inner tile inner_tile_d = '0; tile_d = tile_q + 1; @@ -276,22 +246,9 @@ module ita_controller end end AV : begin - if (inner_tile_q == ctrl_i.tile_s-1) begin - last_inner_tile_o = 1'b1; - if ( ( ((count_q & (M-1)) + tile_y_q * M) > ( (ctrl_i.seq_length - 1) ) ) ) begin - requant_add_d = {N {1'b0}}; - end else begin - if ( (count_q + tile_x_q * M*M/N) >= (ctrl_i.proj_space / N) * M ) begin - if ( ((count_q / M ) * N + tile_x_q * M ) < ctrl_i.proj_space) begin - for (int i = (ctrl_i.proj_space & (N-1)); i < N; i++) begin - requant_add_d[i] = 1'b0; - end - end else begin - requant_add_d = {N {1'b0}}; - end - end - end - end + inner_tile_dim = ctrl_i.tile_s-1; + first_outer_dim = ctrl_i.seq_length; + second_outer_dim = ctrl_i.proj_space; if (inner_tile_d == ctrl_i.tile_s) begin // end of inner tile inner_tile_d = '0; tile_d = tile_q + 1; @@ -320,22 +277,9 @@ module ita_controller end end OW : begin - if (inner_tile_q == ctrl_i.tile_p-1) begin - last_inner_tile_o = 1'b1; - if ( ( ((count_q & (M-1)) + tile_y_q * M) > ( (ctrl_i.seq_length - 1) ) ) ) begin - requant_add_d = {N {1'b0}}; - end else begin - if ( (count_q + tile_x_q * M*M/N) >= (ctrl_i.embed_size / N) * M ) begin - if ( ((count_q / M ) * N + tile_x_q * M ) < ctrl_i.embed_size) begin - for (int i = (ctrl_i.embed_size & (N-1)); i < N; i++) begin - requant_add_d[i] = 1'b0; - end - end else begin - requant_add_d = {N {1'b0}}; - end - end - end - end + inner_tile_dim = ctrl_i.tile_p-1; + first_outer_dim = ctrl_i.seq_length; + second_outer_dim = ctrl_i.embed_size; if (inner_tile_d == ctrl_i.tile_p) begin // end of inner tile inner_tile_d = '0; tile_d = tile_q + 1; From d993c0402ea60629dbb52a27d23c9f8befd777ab Mon Sep 17 00:00:00 2001 From: Marcel Kant Date: Wed, 16 Oct 2024 14:01:06 +0200 Subject: [PATCH 06/28] Started with the bias padding not finished yet --- src/ita.sv | 6 ++++-- src/ita_controller.sv | 26 ++++++++++++++++++-------- 2 files changed, 22 insertions(+), 10 deletions(-) diff --git a/src/ita.sv b/src/ita.sv index ceb302d..0cee47e 100644 --- a/src/ita.sv +++ b/src/ita.sv @@ -40,7 +40,7 @@ module ita logic weight_valid, weight_ready; inp_t inp, inp_stream_soft; weight_t inp1, inp1_q, inp2, inp2_q; - bias_t inp_bias, inp_bias_q1, inp_bias_q2; + bias_t inp_bias, inp_bias_padded, inp_bias_q1, inp_bias_q2; oup_t oup, oup_q, accumulator_oup; requant_const_t requant_mult, requant_shift, activation_requant_mult, activation_requant_shift; requant_oup_t requant_oup; @@ -162,7 +162,7 @@ module ita oup_q <= oup; end if (calc_en_q1) begin - inp_bias_q1 <= inp_bias; + inp_bias_q1 <= inp_bias_padded; inp1_q <= inp1; inp2_q <= inp2; end @@ -201,6 +201,8 @@ module ita .inner_tile_o (inner_tile ), .requant_add_i (requant_add ), .requant_add_o (requant_add_o ), + .inp_bias_pad_i (inp_bias ), + .inp_bias_pad_o (inp_bias_padded ), .busy_o (busy_o ) ); diff --git a/src/ita_controller.sv b/src/ita_controller.sv index fb3d534..510ea6c 100644 --- a/src/ita_controller.sv +++ b/src/ita_controller.sv @@ -31,9 +31,11 @@ module ita_controller output counter_t tile_x_o , output counter_t tile_y_o , output counter_t inner_tile_o , - input requant_t requant_add_i , + input requant_t requant_add_i , output requant_oup_t requant_add_o , - output logic busy_o + input bias_t inp_bias_pad_i , + output bias_t inp_bias_pad_o , + output logic busy_o ); step_e step_d, step_q; @@ -46,17 +48,21 @@ module ita_controller ongoing_t ongoing_d, ongoing_q; ongoing_soft_t ongoing_soft_d, ongoing_soft_q; + bias_t inp_bias_padded; + tile_t inner_tile_dim; logic [WO-WI*2-2:0] first_outer_dim, second_outer_dim; logic softmax_fifo, softmax_div, softmax_div_done_d, softmax_div_done_q, busy_d, busy_q; requant_oup_t requant_add_d, requant_add_q; - assign step_o = step_q; - assign busy_o = busy_q; - assign tile_x_o = tile_x_q; - assign tile_y_o = tile_y_q; - assign inner_tile_o = inner_tile_q; + assign step_o = step_q; + assign busy_o = busy_q; + assign tile_x_o = tile_x_q; + assign tile_y_o = tile_y_q; + assign inner_tile_o = inner_tile_q; + assign requant_add_o = requant_add_q; + assign inp_bias_pad_o = inp_bias_padded; always_comb begin count_d = count_q; @@ -76,7 +82,8 @@ module ita_controller softmax_tile_d = softmax_tile_q; softmax_div_done_d = softmax_div_done_q; requant_add_d = {N {requant_add_i}}; - requant_add_o = requant_add_q; + inp_bias_padded = inp_bias_pad_i; + busy_d = busy_q; softmax_fifo = 1'b0; @@ -97,14 +104,17 @@ module ita_controller last_inner_tile_o = 1'b1; if ( ( ((count_q & (M-1)) + tile_y_q * M) > ( (first_outer_dim - 1) ) ) ) begin requant_add_d = {N {1'b0}}; + inp_bias_padded = {N {1'b0}}; end else begin if ( (count_q + tile_x_q * M*M/N) >= (second_outer_dim / N) * M ) begin if ( ((count_q / M ) * N + tile_x_q * M ) < second_outer_dim) begin for (int i = (second_outer_dim & (N-1)); i < N; i++) begin requant_add_d[i] = 1'b0; + inp_bias_padded[i] = 1'b0; end end else begin requant_add_d = {N {1'b0}}; + inp_bias_padded = {N {1'b0}}; end end end From 267850e7df53b6259cebb633a3efd8c3125c3021 Mon Sep 17 00:00:00 2001 From: Marcel Kant Date: Sun, 20 Oct 2024 15:20:54 +0200 Subject: [PATCH 07/28] Bias padding solution with exactly 8 errors for each phase --- PyITA/ITA.py | 8 +- PyITA/debug.py | 2 +- modelsim/sim_ita_tb_wave.tcl | 2 + modelsim/sim_ita_tb_wave_makant.tcl | 235 ++++++++++++++++++++++++++++ src/ita.sv | 9 +- src/ita_controller.sv | 29 ++-- 6 files changed, 269 insertions(+), 16 deletions(-) create mode 100644 modelsim/sim_ita_tb_wave_makant.tcl diff --git a/PyITA/ITA.py b/PyITA/ITA.py index 19ff5a0..d6fa0ca 100644 --- a/PyITA/ITA.py +++ b/PyITA/ITA.py @@ -22,6 +22,9 @@ import numpy as np from numpy.typing import ArrayLike, DTypeLike +import seaborn as sns +import matplotlib.pyplot as plt + from .softmax import fastSoftmax, realSoftmax, streamingPartialSoftmax from .gelu import gelu_requantize, i_gelu_requantized, get_i_gelu_constants, get_i_gelu_requantized_constants from .util import (generate_matrix_mem, pack_8b_to_word, pack_array_8b_to_word, pack_hex_24b, pack_multihead_8b_to_word, @@ -518,7 +521,10 @@ def step1_Qp(self): self.Qp = np.clip(self.Qp, -2**(self.WO - 1), 2**(self.WO - 1) - 1) self.Qp_requant = requantize(self.Qp, self.requant_eps_mult[0], self.requant_right_shift[0], self.requant_add[0]) - + print(self.Qp[0][0][16:32]) + sns.set_theme() + sns.heatmap(self.Qp[0], annot=False, linewidths=0, linecolor='white', cmap='crest', xticklabels=False, yticklabels=False) + plt.show() # Set padded values to zero if (self.S_ITA - self.S) > 0: self.Qp_requant[:, -(self.S_ITA - self.S):, :] = 0 diff --git a/PyITA/debug.py b/PyITA/debug.py index 80d1e02..3cb9d7a 100644 --- a/PyITA/debug.py +++ b/PyITA/debug.py @@ -8,7 +8,7 @@ def print_matrix(from_txt: bool, cut: bool = False, matrix: np.array = None, txt_file: str = 'Qp_0.txt', - test_vector: str = 'data_S32_E32_P32_F64_H1_B1', + test_vector: str = 'data_S30_E64_P64_F64_H1_B1', row: int = 64, col: int = 64): if (from_txt): diff --git a/modelsim/sim_ita_tb_wave.tcl b/modelsim/sim_ita_tb_wave.tcl index ded354f..a1e1697 100644 --- a/modelsim/sim_ita_tb_wave.tcl +++ b/modelsim/sim_ita_tb_wave.tcl @@ -12,6 +12,8 @@ add wave -noupdate /ita_tb/dut/i_inp2_mux/weight_i add wave -noupdate /ita_tb/dut/i_inp2_mux/inp2_o add wave -noupdate /ita_tb/dut/i_controller/ctrl_i add wave -noupdate /ita_tb/dut/oup_o +add wave -noupdate /ita_tb/dut/inp1_q +add wave -noupdate /ita_tb/dut/inp2_q add wave -noupdate -group Requantizer /ita_tb/dut/i_requantizer/* add wave -expand -group Controller /ita_tb/dut/i_controller/* add wave -group {Softmax Controller} ita_tb/dut/i_softmax_top/i_softmax/* diff --git a/modelsim/sim_ita_tb_wave_makant.tcl b/modelsim/sim_ita_tb_wave_makant.tcl new file mode 100644 index 0000000..671f5e9 --- /dev/null +++ b/modelsim/sim_ita_tb_wave_makant.tcl @@ -0,0 +1,235 @@ +onerror {resume} +quietly WaveActivateNextPane {} 0 +add wave -noupdate /ita_tb/dut/i_inp1_mux/clk_i +add wave -noupdate /ita_tb/dut/i_inp1_mux/rst_ni +add wave -noupdate /ita_tb/dut/i_inp1_mux/inp_i +add wave -noupdate /ita_tb/dut/i_inp1_mux/inp1_o +add wave -noupdate /ita_tb/dut/i_inp2_mux/clk_i +add wave -noupdate /ita_tb/dut/i_inp2_mux/rst_ni +add wave -noupdate /ita_tb/dut/i_inp2_mux/weight_i +add wave -noupdate /ita_tb/dut/i_inp2_mux/inp2_o +add wave -noupdate /ita_tb/dut/i_controller/ctrl_i +add wave -noupdate /ita_tb/dut/oup_o +add wave -noupdate /ita_tb/dut/inp1_q +add wave -noupdate /ita_tb/dut/inp2_q +add wave -noupdate -group Requantizer /ita_tb/dut/i_requantizer/clk_i +add wave -noupdate -group Requantizer /ita_tb/dut/i_requantizer/rst_ni +add wave -noupdate -group Requantizer /ita_tb/dut/i_requantizer/mode_i +add wave -noupdate -group Requantizer /ita_tb/dut/i_requantizer/eps_mult_i +add wave -noupdate -group Requantizer /ita_tb/dut/i_requantizer/right_shift_i +add wave -noupdate -group Requantizer /ita_tb/dut/i_requantizer/calc_en_i +add wave -noupdate -group Requantizer /ita_tb/dut/i_requantizer/calc_en_q_i +add wave -noupdate -group Requantizer /ita_tb/dut/i_requantizer/result_i +add wave -noupdate -group Requantizer /ita_tb/dut/i_requantizer/add_i +add wave -noupdate -group Requantizer /ita_tb/dut/i_requantizer/requant_oup_o +add wave -noupdate -group Requantizer /ita_tb/dut/i_requantizer/mult_signed +add wave -noupdate -group Requantizer /ita_tb/dut/i_requantizer/product +add wave -noupdate -group Requantizer /ita_tb/dut/i_requantizer/shifted_added +add wave -noupdate -group Requantizer /ita_tb/dut/i_requantizer/shifted_d +add wave -noupdate -group Requantizer /ita_tb/dut/i_requantizer/shifted_q +add wave -noupdate -group Requantizer /ita_tb/dut/i_requantizer/add_q1 +add wave -noupdate -group Requantizer /ita_tb/dut/i_requantizer/add_q2 +add wave -noupdate -group Requantizer /ita_tb/dut/i_requantizer/add_q3 +add wave -noupdate -group Requantizer /ita_tb/dut/i_requantizer/add_q4 +add wave -noupdate -group Requantizer /ita_tb/dut/i_requantizer/requant_oup_d +add wave -noupdate -group Requantizer /ita_tb/dut/i_requantizer/requant_oup_q +add wave -noupdate -expand -group Controller /ita_tb/dut/i_controller/clk_i +add wave -noupdate -expand -group Controller /ita_tb/dut/i_controller/rst_ni +add wave -noupdate -expand -group Controller /ita_tb/dut/i_controller/ctrl_i +add wave -noupdate -expand -group Controller /ita_tb/dut/i_controller/inp_valid_i +add wave -noupdate -expand -group Controller /ita_tb/dut/i_controller/inp_ready_o +add wave -noupdate -expand -group Controller /ita_tb/dut/i_controller/weight_valid_i +add wave -noupdate -expand -group Controller /ita_tb/dut/i_controller/weight_ready_o +add wave -noupdate -expand -group Controller /ita_tb/dut/i_controller/bias_valid_i +add wave -noupdate -expand -group Controller /ita_tb/dut/i_controller/bias_ready_o +add wave -noupdate -expand -group Controller /ita_tb/dut/i_controller/oup_valid_i +add wave -noupdate -expand -group Controller /ita_tb/dut/i_controller/oup_ready_i +add wave -noupdate -expand -group Controller /ita_tb/dut/i_controller/pop_softmax_fifo_i +add wave -noupdate -expand -group Controller /ita_tb/dut/i_controller/step_o +add wave -noupdate -expand -group Controller /ita_tb/dut/i_controller/soft_addr_div_i +add wave -noupdate -expand -group Controller /ita_tb/dut/i_controller/softmax_done_i +add wave -noupdate -expand -group Controller /ita_tb/dut/i_controller/calc_en_o +add wave -noupdate -expand -group Controller /ita_tb/dut/i_controller/first_inner_tile_o +add wave -noupdate -expand -group Controller /ita_tb/dut/i_controller/last_inner_tile_o +add wave -noupdate -expand -group Controller /ita_tb/dut/i_controller/tile_x_o +add wave -noupdate -expand -group Controller /ita_tb/dut/i_controller/tile_y_o +add wave -noupdate -expand -group Controller /ita_tb/dut/i_controller/inner_tile_o +add wave -noupdate -expand -group Controller /ita_tb/dut/i_controller/requant_add_i +add wave -noupdate -expand -group Controller /ita_tb/dut/i_controller/requant_add_o +add wave -noupdate -expand -group Controller -radix unsigned /ita_tb/dut/i_controller/count_q +add wave -noupdate -expand -group Controller -radix hexadecimal /ita_tb/dut/inp_bias_q1 +add wave -noupdate -expand -group Controller -radix hexadecimal -childformat {{{/ita_tb/dut/i_controller/inp_bias_pad_i[15]} -radix hexadecimal} {{/ita_tb/dut/i_controller/inp_bias_pad_i[14]} -radix hexadecimal} {{/ita_tb/dut/i_controller/inp_bias_pad_i[13]} -radix hexadecimal} {{/ita_tb/dut/i_controller/inp_bias_pad_i[12]} -radix hexadecimal} {{/ita_tb/dut/i_controller/inp_bias_pad_i[11]} -radix hexadecimal} {{/ita_tb/dut/i_controller/inp_bias_pad_i[10]} -radix hexadecimal} {{/ita_tb/dut/i_controller/inp_bias_pad_i[9]} -radix hexadecimal} {{/ita_tb/dut/i_controller/inp_bias_pad_i[8]} -radix hexadecimal} {{/ita_tb/dut/i_controller/inp_bias_pad_i[7]} -radix hexadecimal} {{/ita_tb/dut/i_controller/inp_bias_pad_i[6]} -radix hexadecimal} {{/ita_tb/dut/i_controller/inp_bias_pad_i[5]} -radix hexadecimal} {{/ita_tb/dut/i_controller/inp_bias_pad_i[4]} -radix hexadecimal} {{/ita_tb/dut/i_controller/inp_bias_pad_i[3]} -radix hexadecimal} {{/ita_tb/dut/i_controller/inp_bias_pad_i[2]} -radix hexadecimal} {{/ita_tb/dut/i_controller/inp_bias_pad_i[1]} -radix hexadecimal} {{/ita_tb/dut/i_controller/inp_bias_pad_i[0]} -radix hexadecimal}} -subitemconfig {{/ita_tb/dut/i_controller/inp_bias_pad_i[15]} {-radix hexadecimal} {/ita_tb/dut/i_controller/inp_bias_pad_i[14]} {-radix hexadecimal} {/ita_tb/dut/i_controller/inp_bias_pad_i[13]} {-radix hexadecimal} {/ita_tb/dut/i_controller/inp_bias_pad_i[12]} {-radix hexadecimal} {/ita_tb/dut/i_controller/inp_bias_pad_i[11]} {-radix hexadecimal} {/ita_tb/dut/i_controller/inp_bias_pad_i[10]} {-radix hexadecimal} {/ita_tb/dut/i_controller/inp_bias_pad_i[9]} {-radix hexadecimal} {/ita_tb/dut/i_controller/inp_bias_pad_i[8]} {-radix hexadecimal} {/ita_tb/dut/i_controller/inp_bias_pad_i[7]} {-radix hexadecimal} {/ita_tb/dut/i_controller/inp_bias_pad_i[6]} {-radix hexadecimal} {/ita_tb/dut/i_controller/inp_bias_pad_i[5]} {-radix hexadecimal} {/ita_tb/dut/i_controller/inp_bias_pad_i[4]} {-radix hexadecimal} {/ita_tb/dut/i_controller/inp_bias_pad_i[3]} {-radix hexadecimal} {/ita_tb/dut/i_controller/inp_bias_pad_i[2]} {-radix hexadecimal} {/ita_tb/dut/i_controller/inp_bias_pad_i[1]} {-radix hexadecimal} {/ita_tb/dut/i_controller/inp_bias_pad_i[0]} {-radix hexadecimal}} /ita_tb/dut/i_controller/inp_bias_pad_i +add wave -noupdate -expand -group Controller -radix hexadecimal -childformat {{{/ita_tb/dut/i_controller/inp_bias_pad_o[15]} -radix hexadecimal} {{/ita_tb/dut/i_controller/inp_bias_pad_o[14]} -radix hexadecimal} {{/ita_tb/dut/i_controller/inp_bias_pad_o[13]} -radix hexadecimal} {{/ita_tb/dut/i_controller/inp_bias_pad_o[12]} -radix hexadecimal} {{/ita_tb/dut/i_controller/inp_bias_pad_o[11]} -radix hexadecimal} {{/ita_tb/dut/i_controller/inp_bias_pad_o[10]} -radix hexadecimal} {{/ita_tb/dut/i_controller/inp_bias_pad_o[9]} -radix hexadecimal} {{/ita_tb/dut/i_controller/inp_bias_pad_o[8]} -radix hexadecimal} {{/ita_tb/dut/i_controller/inp_bias_pad_o[7]} -radix hexadecimal} {{/ita_tb/dut/i_controller/inp_bias_pad_o[6]} -radix hexadecimal} {{/ita_tb/dut/i_controller/inp_bias_pad_o[5]} -radix hexadecimal} {{/ita_tb/dut/i_controller/inp_bias_pad_o[4]} -radix hexadecimal} {{/ita_tb/dut/i_controller/inp_bias_pad_o[3]} -radix hexadecimal} {{/ita_tb/dut/i_controller/inp_bias_pad_o[2]} -radix hexadecimal} {{/ita_tb/dut/i_controller/inp_bias_pad_o[1]} -radix hexadecimal} {{/ita_tb/dut/i_controller/inp_bias_pad_o[0]} -radix hexadecimal}} -subitemconfig {{/ita_tb/dut/i_controller/inp_bias_pad_o[15]} {-radix hexadecimal} {/ita_tb/dut/i_controller/inp_bias_pad_o[14]} {-radix hexadecimal} {/ita_tb/dut/i_controller/inp_bias_pad_o[13]} {-radix hexadecimal} {/ita_tb/dut/i_controller/inp_bias_pad_o[12]} {-radix hexadecimal} {/ita_tb/dut/i_controller/inp_bias_pad_o[11]} {-radix hexadecimal} {/ita_tb/dut/i_controller/inp_bias_pad_o[10]} {-radix hexadecimal} {/ita_tb/dut/i_controller/inp_bias_pad_o[9]} {-radix hexadecimal} {/ita_tb/dut/i_controller/inp_bias_pad_o[8]} {-radix hexadecimal} {/ita_tb/dut/i_controller/inp_bias_pad_o[7]} {-radix hexadecimal} {/ita_tb/dut/i_controller/inp_bias_pad_o[6]} {-radix hexadecimal} {/ita_tb/dut/i_controller/inp_bias_pad_o[5]} {-radix hexadecimal} {/ita_tb/dut/i_controller/inp_bias_pad_o[4]} {-radix hexadecimal} {/ita_tb/dut/i_controller/inp_bias_pad_o[3]} {-radix hexadecimal} {/ita_tb/dut/i_controller/inp_bias_pad_o[2]} {-radix hexadecimal} {/ita_tb/dut/i_controller/inp_bias_pad_o[1]} {-radix hexadecimal} {/ita_tb/dut/i_controller/inp_bias_pad_o[0]} {-radix hexadecimal}} /ita_tb/dut/i_controller/inp_bias_pad_o +add wave -noupdate -expand -group Controller -radix hexadecimal /ita_tb/dut/inp_bias_q2 +add wave -noupdate -expand -group Controller /ita_tb/dut/calc_en_q1 +add wave -noupdate -expand -group Controller /ita_tb/dut/calc_en_q2 +add wave -noupdate -expand -group Controller /ita_tb/dut/calc_en_q3 +add wave -noupdate -expand -group Controller /ita_tb/dut/calc_en_q4 +add wave -noupdate -expand -group Controller /ita_tb/dut/calc_en_q5 +add wave -noupdate -expand -group Controller /ita_tb/dut/calc_en_q6 +add wave -noupdate -expand -group Controller /ita_tb/dut/calc_en_q7 +add wave -noupdate -expand -group Controller /ita_tb/dut/calc_en_q8 +add wave -noupdate -expand -group Controller /ita_tb/dut/calc_en_q9 +add wave -noupdate -expand -group Controller /ita_tb/dut/calc_en_q10 +add wave -noupdate -expand -group Controller /ita_tb/dut/last_inner_tile_q10 +add wave -noupdate -expand -group Controller -radix hexadecimal /ita_tb/dut/oup_o +add wave -noupdate -expand -group Controller /ita_tb/dut/valid_o +add wave -noupdate -expand -group Controller /ita_tb/dut/i_controller/busy_o +add wave -noupdate -expand -group Controller /ita_tb/dut/i_controller/step_d +add wave -noupdate -expand -group Controller /ita_tb/dut/i_controller/step_q +add wave -noupdate -expand -group Controller -radix unsigned /ita_tb/dut/i_controller/count_d +add wave -noupdate -expand -group Controller /ita_tb/dut/i_controller/tile_d +add wave -noupdate -expand -group Controller /ita_tb/dut/i_controller/tile_q +add wave -noupdate -expand -group Controller /ita_tb/dut/i_controller/inner_tile_d +add wave -noupdate -expand -group Controller /ita_tb/dut/i_controller/inner_tile_q +add wave -noupdate -expand -group Controller /ita_tb/dut/i_controller/tile_x_d +add wave -noupdate -expand -group Controller /ita_tb/dut/i_controller/tile_x_q +add wave -noupdate -expand -group Controller /ita_tb/dut/i_controller/tile_y_d +add wave -noupdate -expand -group Controller /ita_tb/dut/i_controller/tile_y_q +add wave -noupdate -expand -group Controller /ita_tb/dut/i_controller/softmax_tile_d +add wave -noupdate -expand -group Controller /ita_tb/dut/i_controller/softmax_tile_q +add wave -noupdate -expand -group Controller /ita_tb/dut/i_controller/ongoing_d +add wave -noupdate -expand -group Controller /ita_tb/dut/i_controller/ongoing_q +add wave -noupdate -expand -group Controller /ita_tb/dut/i_controller/ongoing_soft_d +add wave -noupdate -expand -group Controller /ita_tb/dut/i_controller/ongoing_soft_q +add wave -noupdate -expand -group Controller /ita_tb/dut/i_controller/inp_bias_padded +add wave -noupdate -expand -group Controller /ita_tb/dut/i_controller/inner_tile_dim +add wave -noupdate -expand -group Controller /ita_tb/dut/i_controller/first_outer_dim +add wave -noupdate -expand -group Controller /ita_tb/dut/i_controller/second_outer_dim +add wave -noupdate -expand -group Controller /ita_tb/dut/i_controller/softmax_fifo +add wave -noupdate -expand -group Controller /ita_tb/dut/i_controller/softmax_div +add wave -noupdate -expand -group Controller /ita_tb/dut/i_controller/softmax_div_done_d +add wave -noupdate -expand -group Controller /ita_tb/dut/i_controller/softmax_div_done_q +add wave -noupdate -expand -group Controller /ita_tb/dut/i_controller/busy_d +add wave -noupdate -expand -group Controller /ita_tb/dut/i_controller/busy_q +add wave -noupdate -expand -group Controller /ita_tb/dut/i_controller/requant_add_d +add wave -noupdate -expand -group Controller /ita_tb/dut/i_controller/requant_add_q +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/clk_i +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/rst_ni +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/ctrl_i +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/step_i +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/calc_en_i +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/requant_oup_i +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/calc_stream_soft_en_i +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/soft_addr_div_o +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/softmax_done_o +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/pop_softmax_fifo_o +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/inp_i +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/inp_stream_soft_o +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/div_inp_o +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/div_valid_o +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/div_ready_i +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/div_valid_i +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/div_ready_o +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/div_oup_i +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/read_acc_en_o +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/read_acc_addr_o +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/read_acc_data_i +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/write_acc_en_o +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/write_acc_addr_o +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/write_acc_data_o +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/prev_max_o +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/max_i +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/max_o +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/read_max_en_o +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/read_max_addr_o +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/read_max_data_i +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/write_max_en_o +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/write_max_addr_o +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/write_max_data_o +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/tile_x_i +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/tile_y_i +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/inner_tile_i +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/tile_d +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/tile_q1 +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/tile_q2 +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/tile_q3 +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/tile_q4 +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/count_d +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/count_q1 +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/count_q2 +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/count_q3 +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/count_q4 +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/inner_tile_q +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/tile_y_q +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/exp_sum_d +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/exp_sum_q +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/count_soft_d +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/count_soft_q1 +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/count_soft_q2 +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/count_div_d +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/count_div_q +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/addr_div_d +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/addr_div_q +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/div_read_d +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/div_read_q +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/div_write_d +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/div_write_q +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/requant_oup_q +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/max_d +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/max_q +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/shift_d +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/shift_q +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/shift_diff +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/shift_sum_d +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/shift_sum_q +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/max_diff +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/shift_inp +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/shift_inp_diff +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/calc_stream_soft_en_q +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/calc_en_d +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/calc_en_q1 +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/calc_en_q2 +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/calc_en_q3 +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/fifo_full +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/fifo_empty +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/push_to_fifo +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/pop_from_fifo +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/data_to_fifo +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/data_from_fifo +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/fifo_usage +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/disable_shift +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/disable_row +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/disable_col +add wave -noupdate -group Accumulator /ita_tb/dut/i_accumulator/clk_i +add wave -noupdate -group Accumulator /ita_tb/dut/i_accumulator/rst_ni +add wave -noupdate -group Accumulator /ita_tb/dut/i_accumulator/calc_en_i +add wave -noupdate -group Accumulator /ita_tb/dut/i_accumulator/calc_en_q_i +add wave -noupdate -group Accumulator /ita_tb/dut/i_accumulator/first_tile_i +add wave -noupdate -group Accumulator /ita_tb/dut/i_accumulator/first_tile_q_i +add wave -noupdate -group Accumulator /ita_tb/dut/i_accumulator/last_tile_i +add wave -noupdate -group Accumulator /ita_tb/dut/i_accumulator/last_tile_q_i +add wave -noupdate -group Accumulator /ita_tb/dut/i_accumulator/oup_i +add wave -noupdate -group Accumulator /ita_tb/dut/i_accumulator/inp_bias_i +add wave -noupdate -group Accumulator /ita_tb/dut/i_accumulator/result_o +add wave -noupdate -group Accumulator /ita_tb/dut/i_accumulator/read_en +add wave -noupdate -group Accumulator /ita_tb/dut/i_accumulator/read_addr +add wave -noupdate -group Accumulator /ita_tb/dut/i_accumulator/read_data +add wave -noupdate -group Accumulator /ita_tb/dut/i_accumulator/read_data_unused +add wave -noupdate -group Accumulator /ita_tb/dut/i_accumulator/write_en +add wave -noupdate -group Accumulator /ita_tb/dut/i_accumulator/write_addr +add wave -noupdate -group Accumulator /ita_tb/dut/i_accumulator/write_data +add wave -noupdate -group Accumulator /ita_tb/dut/i_accumulator/read_addr_d +add wave -noupdate -group Accumulator /ita_tb/dut/i_accumulator/read_addr_q +add wave -noupdate -group Accumulator /ita_tb/dut/i_accumulator/write_addr_d +add wave -noupdate -group Accumulator /ita_tb/dut/i_accumulator/write_addr_q +add wave -noupdate -group Accumulator /ita_tb/dut/i_accumulator/result_d +add wave -noupdate -group Accumulator /ita_tb/dut/i_accumulator/result_q +TreeUpdate [SetDefaultTree] +WaveRestoreCursors {{Cursor 1} {414600 ps} 1} {{Cursor 2} {550600 ps} 1} {{Cursor 4} {400994 ps} 0} +quietly wave cursor active 1 +configure wave -namecolwidth 150 +configure wave -valuecolwidth 100 +configure wave -justifyvalue left +configure wave -signalnamewidth 1 +configure wave -snapdistance 10 +configure wave -datasetprefix 0 +configure wave -rowmargin 4 +configure wave -childrowmargin 2 +configure wave -gridoffset 0 +configure wave -gridperiod 1 +configure wave -griddelta 40 +configure wave -timeline 0 +configure wave -timelineunits ns +update +WaveRestoreZoom {394011 ps} {422576 ps} diff --git a/src/ita.sv b/src/ita.sv index 0cee47e..41120c0 100644 --- a/src/ita.sv +++ b/src/ita.sv @@ -40,7 +40,7 @@ module ita logic weight_valid, weight_ready; inp_t inp, inp_stream_soft; weight_t inp1, inp1_q, inp2, inp2_q; - bias_t inp_bias, inp_bias_padded, inp_bias_q1, inp_bias_q2; + bias_t inp_bias, inp_bias_pad_q1, inp_bias_q2; oup_t oup, oup_q, accumulator_oup; requant_const_t requant_mult, requant_shift, activation_requant_mult, activation_requant_shift; requant_oup_t requant_oup; @@ -154,15 +154,13 @@ module ita inp1_q <= '0; inp2_q <= '0; inp_bias_q2 <= '0; - inp_bias_q1 <= '0; oup_q <= '0; end else begin if (calc_en_q2) begin - inp_bias_q2 <= inp_bias_q1; + inp_bias_q2 <= inp_bias_pad_q1; oup_q <= oup; end if (calc_en_q1) begin - inp_bias_q1 <= inp_bias_padded; inp1_q <= inp1; inp2_q <= inp2; end @@ -202,7 +200,8 @@ module ita .requant_add_i (requant_add ), .requant_add_o (requant_add_o ), .inp_bias_pad_i (inp_bias ), - .inp_bias_pad_o (inp_bias_padded ), + .inp_bias_pad_o (inp_bias_pad_q1 ), + .calc_en_q1_i (calc_en_q1 ), .busy_o (busy_o ) ); diff --git a/src/ita_controller.sv b/src/ita_controller.sv index 510ea6c..5f05dea 100644 --- a/src/ita_controller.sv +++ b/src/ita_controller.sv @@ -35,6 +35,7 @@ module ita_controller output requant_oup_t requant_add_o , input bias_t inp_bias_pad_i , output bias_t inp_bias_pad_o , + input logic calc_en_q1_i , output logic busy_o ); @@ -48,7 +49,7 @@ module ita_controller ongoing_t ongoing_d, ongoing_q; ongoing_soft_t ongoing_soft_d, ongoing_soft_q; - bias_t inp_bias_padded; + bias_t inp_bias_pad_d, inp_bias_pad_q; tile_t inner_tile_dim; logic [WO-WI*2-2:0] first_outer_dim, second_outer_dim; @@ -62,7 +63,7 @@ module ita_controller assign tile_y_o = tile_y_q; assign inner_tile_o = inner_tile_q; assign requant_add_o = requant_add_q; - assign inp_bias_pad_o = inp_bias_padded; + assign inp_bias_pad_o = inp_bias_pad_q; always_comb begin count_d = count_q; @@ -82,7 +83,7 @@ module ita_controller softmax_tile_d = softmax_tile_q; softmax_div_done_d = softmax_div_done_q; requant_add_d = {N {requant_add_i}}; - inp_bias_padded = inp_bias_pad_i; + inp_bias_pad_d = inp_bias_pad_i; busy_d = busy_q; @@ -99,22 +100,22 @@ module ita_controller busy_d = 1'b1; end - if (step_q != Idle && step_q != F1 && step_q != F2 && step_q != MatMul) begin + if (step_q != Idle && step_q != MatMul) begin if (inner_tile_q == inner_tile_dim) begin last_inner_tile_o = 1'b1; - if ( ( ((count_q & (M-1)) + tile_y_q * M) > ( (first_outer_dim - 1) ) ) ) begin + if ( ( (((count_q & (M-1)) + tile_y_q * M)) > ( (first_outer_dim - 1) ) ) ) begin requant_add_d = {N {1'b0}}; - inp_bias_padded = {N {1'b0}}; + inp_bias_pad_d = {N {1'b0}}; end else begin if ( (count_q + tile_x_q * M*M/N) >= (second_outer_dim / N) * M ) begin - if ( ((count_q / M ) * N + tile_x_q * M ) < second_outer_dim) begin + if ( ((count_q / M) * N + tile_x_q * M ) < second_outer_dim) begin for (int i = (second_outer_dim & (N-1)); i < N; i++) begin requant_add_d[i] = 1'b0; - inp_bias_padded[i] = 1'b0; + inp_bias_pad_d[i] = 1'b0; end end else begin requant_add_d = {N {1'b0}}; - inp_bias_padded = {N {1'b0}}; + inp_bias_pad_d = {N {1'b0}}; end end end @@ -309,6 +310,9 @@ module ita_controller end // Feedforward F1: begin + inner_tile_dim = ctrl_i.tile_e-1; + first_outer_dim = ctrl_i.seq_length; + second_outer_dim = ctrl_i.embed_size; if (inner_tile_q == ctrl_i.tile_e-1) begin last_inner_tile_o = 1'b1; end @@ -322,6 +326,9 @@ module ita_controller end end F2: begin + inner_tile_dim = ctrl_i.tile_e-1; + first_outer_dim = ctrl_i.seq_length; + second_outer_dim = ctrl_i.embed_size; if (inner_tile_q == ctrl_i.tile_f-1) begin last_inner_tile_o = 1'b1; end @@ -376,6 +383,7 @@ module ita_controller ongoing_soft_q <= '0; softmax_div_done_q <= 1'b0; requant_add_q <= '0; + inp_bias_pad_q <= '0; busy_q <= 1'b0; end else begin step_q <= step_d; @@ -389,6 +397,9 @@ module ita_controller ongoing_soft_q <= ongoing_soft_d; softmax_div_done_q <= softmax_div_done_d; requant_add_q <= requant_add_d; + if (calc_en_q1_i) begin + inp_bias_pad_q <= inp_bias_pad_d; + end busy_q <= busy_d; end end From 85348502465df90a15772f9ba397a374429dc11c Mon Sep 17 00:00:00 2001 From: Marcel Kant Date: Mon, 21 Oct 2024 10:43:12 +0200 Subject: [PATCH 08/28] Added additional buffer for bias values --- PyITA/ITA.py | 5 +++++ src/ita.sv | 8 ++++++-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/PyITA/ITA.py b/PyITA/ITA.py index d6fa0ca..04a3979 100644 --- a/PyITA/ITA.py +++ b/PyITA/ITA.py @@ -178,6 +178,11 @@ def _initialize_tensors(self, Q, V, Wq, Wk, Wv, Wo, Bq, Bk, Bv, Bo, FF_in, Wff, self.Bq_broadcast = np.reshape(np.repeat(self.Bq, self.S, axis = 0), (self.H, self.S, self.P_ITA)) self.Bq_broadcast = np.pad(self.Bq_broadcast, ((0, 0), (0, self.S_ITA - self.S), (0, 0))) + print(self.Bq_broadcast[0][0][16:32]) + sns.set_theme() + sns.heatmap(self.Bq_broadcast[0], annot=False, linewidths=0, linecolor='white', cmap='crest', xticklabels=False, yticklabels=False) + plt.show() + if self.bias: self.Bk_in = random_shuffled_tensor( (self.H, self.P), int(np.log2(self.P)) + 8, type = np.int32) if Bk is None else Bk diff --git a/src/ita.sv b/src/ita.sv index 41120c0..000658a 100644 --- a/src/ita.sv +++ b/src/ita.sv @@ -40,7 +40,7 @@ module ita logic weight_valid, weight_ready; inp_t inp, inp_stream_soft; weight_t inp1, inp1_q, inp2, inp2_q; - bias_t inp_bias, inp_bias_pad_q1, inp_bias_q2; + bias_t inp_bias, inp_bias_pad_q1, inp_bias_q2, inp_bias_q3; oup_t oup, oup_q, accumulator_oup; requant_const_t requant_mult, requant_shift, activation_requant_mult, activation_requant_shift; requant_oup_t requant_oup; @@ -155,7 +155,11 @@ module ita inp2_q <= '0; inp_bias_q2 <= '0; oup_q <= '0; + inp_bias_q3 <= '0; end else begin + if (calc_en_q3) begin + inp_bias_q3 <= inp_bias_q2; + end if (calc_en_q2) begin inp_bias_q2 <= inp_bias_pad_q1; oup_q <= oup; @@ -251,7 +255,7 @@ module ita .last_tile_q_i (last_inner_tile_q3 ), .oup_i (oup_q ), - .inp_bias_i (inp_bias_q2 ), + .inp_bias_i (inp_bias_q3 ), .result_o (accumulator_oup ) ); From 08fa963c98373bbc3c103ed1a585b52945e85130 Mon Sep 17 00:00:00 2001 From: Marcel Kant Date: Mon, 21 Oct 2024 18:25:56 +0200 Subject: [PATCH 09/28] No buffering in the controller --- Makefile | 3 +- modelsim/Makefile | 2 +- modelsim/sim_ita_tb_wave_makant.tcl | 153 ++++++++++++++-------------- src/ita.sv | 17 ++-- src/ita_controller.sv | 60 +++++------ 5 files changed, 117 insertions(+), 118 deletions(-) diff --git a/Makefile b/Makefile index 3359ca7..f85f76b 100644 --- a/Makefile +++ b/Makefile @@ -12,7 +12,8 @@ BENDER_INSTALL_DIR = ${INSTALL_DIR}/bender VENV_BIN=venv/bin/ BENDER_VERSION = 0.28.1 -SIM_PATH ?= modelsim/build +SIM_FOLDER ?= build +SIM_PATH ?= modelsim/${SIM_FOLDER} SYNTH_PATH = synopsys BENDER_TARGETS = -t rtl -t test diff --git a/modelsim/Makefile b/modelsim/Makefile index 7d181aa..8aec4cf 100644 --- a/modelsim/Makefile +++ b/modelsim/Makefile @@ -6,7 +6,7 @@ all: lib build QUESTA_SEPP ?= questa-2023.4 -buildpath ?= build +buildpath ?= $(SIM_FOLDER) VOPT ?= $(QUESTA_SEPP) vopt VSIM ?= $(QUESTA_SEPP) vsim VLIB ?= $(QUESTA_SEPP) vlib diff --git a/modelsim/sim_ita_tb_wave_makant.tcl b/modelsim/sim_ita_tb_wave_makant.tcl index 671f5e9..d79b638 100644 --- a/modelsim/sim_ita_tb_wave_makant.tcl +++ b/modelsim/sim_ita_tb_wave_makant.tcl @@ -4,14 +4,30 @@ add wave -noupdate /ita_tb/dut/i_inp1_mux/clk_i add wave -noupdate /ita_tb/dut/i_inp1_mux/rst_ni add wave -noupdate /ita_tb/dut/i_inp1_mux/inp_i add wave -noupdate /ita_tb/dut/i_inp1_mux/inp1_o -add wave -noupdate /ita_tb/dut/i_inp2_mux/clk_i add wave -noupdate /ita_tb/dut/i_inp2_mux/rst_ni add wave -noupdate /ita_tb/dut/i_inp2_mux/weight_i add wave -noupdate /ita_tb/dut/i_inp2_mux/inp2_o add wave -noupdate /ita_tb/dut/i_controller/ctrl_i -add wave -noupdate /ita_tb/dut/oup_o add wave -noupdate /ita_tb/dut/inp1_q add wave -noupdate /ita_tb/dut/inp2_q +add wave -noupdate /ita_tb/dut/oup_o +add wave -noupdate /ita_tb/dut/inp_bias +add wave -noupdate /ita_tb/dut/inp_bias_pad_q1 +add wave -noupdate /ita_tb/dut/inp_bias_q2 +add wave -noupdate -radix unsigned /ita_tb/dut/i_controller/count_d +add wave -noupdate -radix unsigned /ita_tb/dut/i_controller/count_q +add wave -noupdate /ita_tb/dut/i_inp2_mux/clk_i +add wave -noupdate -expand -group Calc_en /ita_tb/dut/calc_en +add wave -noupdate -expand -group Calc_en /ita_tb/dut/calc_en_q1 +add wave -noupdate -expand -group Calc_en /ita_tb/dut/calc_en_q2 +add wave -noupdate -expand -group Calc_en /ita_tb/dut/calc_en_q3 +add wave -noupdate -expand -group Calc_en /ita_tb/dut/calc_en_q4 +add wave -noupdate -expand -group Calc_en /ita_tb/dut/calc_en_q5 +add wave -noupdate -expand -group Calc_en /ita_tb/dut/calc_en_q6 +add wave -noupdate -expand -group Calc_en /ita_tb/dut/calc_en_q7 +add wave -noupdate -expand -group Calc_en /ita_tb/dut/calc_en_q8 +add wave -noupdate -expand -group Calc_en /ita_tb/dut/calc_en_q9 +add wave -noupdate -expand -group Calc_en /ita_tb/dut/calc_en_q10 add wave -noupdate -group Requantizer /ita_tb/dut/i_requantizer/clk_i add wave -noupdate -group Requantizer /ita_tb/dut/i_requantizer/rst_ni add wave -noupdate -group Requantizer /ita_tb/dut/i_requantizer/mode_i @@ -33,77 +49,62 @@ add wave -noupdate -group Requantizer /ita_tb/dut/i_requantizer/add_q3 add wave -noupdate -group Requantizer /ita_tb/dut/i_requantizer/add_q4 add wave -noupdate -group Requantizer /ita_tb/dut/i_requantizer/requant_oup_d add wave -noupdate -group Requantizer /ita_tb/dut/i_requantizer/requant_oup_q -add wave -noupdate -expand -group Controller /ita_tb/dut/i_controller/clk_i -add wave -noupdate -expand -group Controller /ita_tb/dut/i_controller/rst_ni -add wave -noupdate -expand -group Controller /ita_tb/dut/i_controller/ctrl_i -add wave -noupdate -expand -group Controller /ita_tb/dut/i_controller/inp_valid_i -add wave -noupdate -expand -group Controller /ita_tb/dut/i_controller/inp_ready_o -add wave -noupdate -expand -group Controller /ita_tb/dut/i_controller/weight_valid_i -add wave -noupdate -expand -group Controller /ita_tb/dut/i_controller/weight_ready_o -add wave -noupdate -expand -group Controller /ita_tb/dut/i_controller/bias_valid_i -add wave -noupdate -expand -group Controller /ita_tb/dut/i_controller/bias_ready_o -add wave -noupdate -expand -group Controller /ita_tb/dut/i_controller/oup_valid_i -add wave -noupdate -expand -group Controller /ita_tb/dut/i_controller/oup_ready_i -add wave -noupdate -expand -group Controller /ita_tb/dut/i_controller/pop_softmax_fifo_i -add wave -noupdate -expand -group Controller /ita_tb/dut/i_controller/step_o -add wave -noupdate -expand -group Controller /ita_tb/dut/i_controller/soft_addr_div_i -add wave -noupdate -expand -group Controller /ita_tb/dut/i_controller/softmax_done_i -add wave -noupdate -expand -group Controller /ita_tb/dut/i_controller/calc_en_o -add wave -noupdate -expand -group Controller /ita_tb/dut/i_controller/first_inner_tile_o -add wave -noupdate -expand -group Controller /ita_tb/dut/i_controller/last_inner_tile_o -add wave -noupdate -expand -group Controller /ita_tb/dut/i_controller/tile_x_o -add wave -noupdate -expand -group Controller /ita_tb/dut/i_controller/tile_y_o -add wave -noupdate -expand -group Controller /ita_tb/dut/i_controller/inner_tile_o -add wave -noupdate -expand -group Controller /ita_tb/dut/i_controller/requant_add_i -add wave -noupdate -expand -group Controller /ita_tb/dut/i_controller/requant_add_o -add wave -noupdate -expand -group Controller -radix unsigned /ita_tb/dut/i_controller/count_q -add wave -noupdate -expand -group Controller -radix hexadecimal /ita_tb/dut/inp_bias_q1 -add wave -noupdate -expand -group Controller -radix hexadecimal -childformat {{{/ita_tb/dut/i_controller/inp_bias_pad_i[15]} -radix hexadecimal} {{/ita_tb/dut/i_controller/inp_bias_pad_i[14]} -radix hexadecimal} {{/ita_tb/dut/i_controller/inp_bias_pad_i[13]} -radix hexadecimal} {{/ita_tb/dut/i_controller/inp_bias_pad_i[12]} -radix hexadecimal} {{/ita_tb/dut/i_controller/inp_bias_pad_i[11]} -radix hexadecimal} {{/ita_tb/dut/i_controller/inp_bias_pad_i[10]} -radix hexadecimal} {{/ita_tb/dut/i_controller/inp_bias_pad_i[9]} -radix hexadecimal} {{/ita_tb/dut/i_controller/inp_bias_pad_i[8]} -radix hexadecimal} {{/ita_tb/dut/i_controller/inp_bias_pad_i[7]} -radix hexadecimal} {{/ita_tb/dut/i_controller/inp_bias_pad_i[6]} -radix hexadecimal} {{/ita_tb/dut/i_controller/inp_bias_pad_i[5]} -radix hexadecimal} {{/ita_tb/dut/i_controller/inp_bias_pad_i[4]} -radix hexadecimal} {{/ita_tb/dut/i_controller/inp_bias_pad_i[3]} -radix hexadecimal} {{/ita_tb/dut/i_controller/inp_bias_pad_i[2]} -radix hexadecimal} {{/ita_tb/dut/i_controller/inp_bias_pad_i[1]} -radix hexadecimal} {{/ita_tb/dut/i_controller/inp_bias_pad_i[0]} -radix hexadecimal}} -subitemconfig {{/ita_tb/dut/i_controller/inp_bias_pad_i[15]} {-radix hexadecimal} {/ita_tb/dut/i_controller/inp_bias_pad_i[14]} {-radix hexadecimal} {/ita_tb/dut/i_controller/inp_bias_pad_i[13]} {-radix hexadecimal} {/ita_tb/dut/i_controller/inp_bias_pad_i[12]} {-radix hexadecimal} {/ita_tb/dut/i_controller/inp_bias_pad_i[11]} {-radix hexadecimal} {/ita_tb/dut/i_controller/inp_bias_pad_i[10]} {-radix hexadecimal} {/ita_tb/dut/i_controller/inp_bias_pad_i[9]} {-radix hexadecimal} {/ita_tb/dut/i_controller/inp_bias_pad_i[8]} {-radix hexadecimal} {/ita_tb/dut/i_controller/inp_bias_pad_i[7]} {-radix hexadecimal} {/ita_tb/dut/i_controller/inp_bias_pad_i[6]} {-radix hexadecimal} {/ita_tb/dut/i_controller/inp_bias_pad_i[5]} {-radix hexadecimal} {/ita_tb/dut/i_controller/inp_bias_pad_i[4]} {-radix hexadecimal} {/ita_tb/dut/i_controller/inp_bias_pad_i[3]} {-radix hexadecimal} {/ita_tb/dut/i_controller/inp_bias_pad_i[2]} {-radix hexadecimal} {/ita_tb/dut/i_controller/inp_bias_pad_i[1]} {-radix hexadecimal} {/ita_tb/dut/i_controller/inp_bias_pad_i[0]} {-radix hexadecimal}} /ita_tb/dut/i_controller/inp_bias_pad_i -add wave -noupdate -expand -group Controller -radix hexadecimal -childformat {{{/ita_tb/dut/i_controller/inp_bias_pad_o[15]} -radix hexadecimal} {{/ita_tb/dut/i_controller/inp_bias_pad_o[14]} -radix hexadecimal} {{/ita_tb/dut/i_controller/inp_bias_pad_o[13]} -radix hexadecimal} {{/ita_tb/dut/i_controller/inp_bias_pad_o[12]} -radix hexadecimal} {{/ita_tb/dut/i_controller/inp_bias_pad_o[11]} -radix hexadecimal} {{/ita_tb/dut/i_controller/inp_bias_pad_o[10]} -radix hexadecimal} {{/ita_tb/dut/i_controller/inp_bias_pad_o[9]} -radix hexadecimal} {{/ita_tb/dut/i_controller/inp_bias_pad_o[8]} -radix hexadecimal} {{/ita_tb/dut/i_controller/inp_bias_pad_o[7]} -radix hexadecimal} {{/ita_tb/dut/i_controller/inp_bias_pad_o[6]} -radix hexadecimal} {{/ita_tb/dut/i_controller/inp_bias_pad_o[5]} -radix hexadecimal} {{/ita_tb/dut/i_controller/inp_bias_pad_o[4]} -radix hexadecimal} {{/ita_tb/dut/i_controller/inp_bias_pad_o[3]} -radix hexadecimal} {{/ita_tb/dut/i_controller/inp_bias_pad_o[2]} -radix hexadecimal} {{/ita_tb/dut/i_controller/inp_bias_pad_o[1]} -radix hexadecimal} {{/ita_tb/dut/i_controller/inp_bias_pad_o[0]} -radix hexadecimal}} -subitemconfig {{/ita_tb/dut/i_controller/inp_bias_pad_o[15]} {-radix hexadecimal} {/ita_tb/dut/i_controller/inp_bias_pad_o[14]} {-radix hexadecimal} {/ita_tb/dut/i_controller/inp_bias_pad_o[13]} {-radix hexadecimal} {/ita_tb/dut/i_controller/inp_bias_pad_o[12]} {-radix hexadecimal} {/ita_tb/dut/i_controller/inp_bias_pad_o[11]} {-radix hexadecimal} {/ita_tb/dut/i_controller/inp_bias_pad_o[10]} {-radix hexadecimal} {/ita_tb/dut/i_controller/inp_bias_pad_o[9]} {-radix hexadecimal} {/ita_tb/dut/i_controller/inp_bias_pad_o[8]} {-radix hexadecimal} {/ita_tb/dut/i_controller/inp_bias_pad_o[7]} {-radix hexadecimal} {/ita_tb/dut/i_controller/inp_bias_pad_o[6]} {-radix hexadecimal} {/ita_tb/dut/i_controller/inp_bias_pad_o[5]} {-radix hexadecimal} {/ita_tb/dut/i_controller/inp_bias_pad_o[4]} {-radix hexadecimal} {/ita_tb/dut/i_controller/inp_bias_pad_o[3]} {-radix hexadecimal} {/ita_tb/dut/i_controller/inp_bias_pad_o[2]} {-radix hexadecimal} {/ita_tb/dut/i_controller/inp_bias_pad_o[1]} {-radix hexadecimal} {/ita_tb/dut/i_controller/inp_bias_pad_o[0]} {-radix hexadecimal}} /ita_tb/dut/i_controller/inp_bias_pad_o -add wave -noupdate -expand -group Controller -radix hexadecimal /ita_tb/dut/inp_bias_q2 -add wave -noupdate -expand -group Controller /ita_tb/dut/calc_en_q1 -add wave -noupdate -expand -group Controller /ita_tb/dut/calc_en_q2 -add wave -noupdate -expand -group Controller /ita_tb/dut/calc_en_q3 -add wave -noupdate -expand -group Controller /ita_tb/dut/calc_en_q4 -add wave -noupdate -expand -group Controller /ita_tb/dut/calc_en_q5 -add wave -noupdate -expand -group Controller /ita_tb/dut/calc_en_q6 -add wave -noupdate -expand -group Controller /ita_tb/dut/calc_en_q7 -add wave -noupdate -expand -group Controller /ita_tb/dut/calc_en_q8 -add wave -noupdate -expand -group Controller /ita_tb/dut/calc_en_q9 -add wave -noupdate -expand -group Controller /ita_tb/dut/calc_en_q10 -add wave -noupdate -expand -group Controller /ita_tb/dut/last_inner_tile_q10 -add wave -noupdate -expand -group Controller -radix hexadecimal /ita_tb/dut/oup_o -add wave -noupdate -expand -group Controller /ita_tb/dut/valid_o -add wave -noupdate -expand -group Controller /ita_tb/dut/i_controller/busy_o -add wave -noupdate -expand -group Controller /ita_tb/dut/i_controller/step_d -add wave -noupdate -expand -group Controller /ita_tb/dut/i_controller/step_q -add wave -noupdate -expand -group Controller -radix unsigned /ita_tb/dut/i_controller/count_d -add wave -noupdate -expand -group Controller /ita_tb/dut/i_controller/tile_d -add wave -noupdate -expand -group Controller /ita_tb/dut/i_controller/tile_q -add wave -noupdate -expand -group Controller /ita_tb/dut/i_controller/inner_tile_d -add wave -noupdate -expand -group Controller /ita_tb/dut/i_controller/inner_tile_q -add wave -noupdate -expand -group Controller /ita_tb/dut/i_controller/tile_x_d -add wave -noupdate -expand -group Controller /ita_tb/dut/i_controller/tile_x_q -add wave -noupdate -expand -group Controller /ita_tb/dut/i_controller/tile_y_d -add wave -noupdate -expand -group Controller /ita_tb/dut/i_controller/tile_y_q -add wave -noupdate -expand -group Controller /ita_tb/dut/i_controller/softmax_tile_d -add wave -noupdate -expand -group Controller /ita_tb/dut/i_controller/softmax_tile_q -add wave -noupdate -expand -group Controller /ita_tb/dut/i_controller/ongoing_d -add wave -noupdate -expand -group Controller /ita_tb/dut/i_controller/ongoing_q -add wave -noupdate -expand -group Controller /ita_tb/dut/i_controller/ongoing_soft_d -add wave -noupdate -expand -group Controller /ita_tb/dut/i_controller/ongoing_soft_q -add wave -noupdate -expand -group Controller /ita_tb/dut/i_controller/inp_bias_padded -add wave -noupdate -expand -group Controller /ita_tb/dut/i_controller/inner_tile_dim -add wave -noupdate -expand -group Controller /ita_tb/dut/i_controller/first_outer_dim -add wave -noupdate -expand -group Controller /ita_tb/dut/i_controller/second_outer_dim -add wave -noupdate -expand -group Controller /ita_tb/dut/i_controller/softmax_fifo -add wave -noupdate -expand -group Controller /ita_tb/dut/i_controller/softmax_div -add wave -noupdate -expand -group Controller /ita_tb/dut/i_controller/softmax_div_done_d -add wave -noupdate -expand -group Controller /ita_tb/dut/i_controller/softmax_div_done_q -add wave -noupdate -expand -group Controller /ita_tb/dut/i_controller/busy_d -add wave -noupdate -expand -group Controller /ita_tb/dut/i_controller/busy_q -add wave -noupdate -expand -group Controller /ita_tb/dut/i_controller/requant_add_d -add wave -noupdate -expand -group Controller /ita_tb/dut/i_controller/requant_add_q +add wave -noupdate -group Controller /ita_tb/dut/i_controller/clk_i +add wave -noupdate -group Controller /ita_tb/dut/i_controller/rst_ni +add wave -noupdate -group Controller /ita_tb/dut/i_controller/ctrl_i +add wave -noupdate -group Controller /ita_tb/dut/i_controller/inp_valid_i +add wave -noupdate -group Controller /ita_tb/dut/i_controller/inp_ready_o +add wave -noupdate -group Controller /ita_tb/dut/i_controller/weight_valid_i +add wave -noupdate -group Controller /ita_tb/dut/i_controller/weight_ready_o +add wave -noupdate -group Controller /ita_tb/dut/i_controller/bias_valid_i +add wave -noupdate -group Controller /ita_tb/dut/i_controller/bias_ready_o +add wave -noupdate -group Controller /ita_tb/dut/i_controller/oup_valid_i +add wave -noupdate -group Controller /ita_tb/dut/i_controller/oup_ready_i +add wave -noupdate -group Controller /ita_tb/dut/i_controller/pop_softmax_fifo_i +add wave -noupdate -group Controller /ita_tb/dut/i_controller/step_o +add wave -noupdate -group Controller /ita_tb/dut/i_controller/soft_addr_div_i +add wave -noupdate -group Controller /ita_tb/dut/i_controller/softmax_done_i +add wave -noupdate -group Controller /ita_tb/dut/i_controller/calc_en_o +add wave -noupdate -group Controller /ita_tb/dut/i_controller/first_inner_tile_o +add wave -noupdate -group Controller /ita_tb/dut/i_controller/last_inner_tile_o +add wave -noupdate -group Controller /ita_tb/dut/i_controller/tile_x_o +add wave -noupdate -group Controller /ita_tb/dut/i_controller/tile_y_o +add wave -noupdate -group Controller /ita_tb/dut/i_controller/inner_tile_o +add wave -noupdate -group Controller /ita_tb/dut/i_controller/requant_add_i +add wave -noupdate -group Controller /ita_tb/dut/i_controller/requant_add_o +add wave -noupdate -group Controller /ita_tb/dut/i_controller/inp_bias_pad_i +add wave -noupdate -group Controller /ita_tb/dut/i_controller/inp_bias_pad_o +add wave -noupdate -group Controller /ita_tb/dut/i_controller/calc_en_q1_i +add wave -noupdate -group Controller /ita_tb/dut/i_controller/busy_o +add wave -noupdate -group Controller /ita_tb/dut/i_controller/step_d +add wave -noupdate -group Controller /ita_tb/dut/i_controller/step_q +add wave -noupdate -group Controller /ita_tb/dut/i_controller/tile_d +add wave -noupdate -group Controller /ita_tb/dut/i_controller/tile_q +add wave -noupdate -group Controller /ita_tb/dut/i_controller/inner_tile_d +add wave -noupdate -group Controller /ita_tb/dut/i_controller/inner_tile_q +add wave -noupdate -group Controller /ita_tb/dut/i_controller/tile_x_d +add wave -noupdate -group Controller /ita_tb/dut/i_controller/tile_x_q +add wave -noupdate -group Controller /ita_tb/dut/i_controller/tile_y_d +add wave -noupdate -group Controller /ita_tb/dut/i_controller/tile_y_q +add wave -noupdate -group Controller /ita_tb/dut/i_controller/softmax_tile_d +add wave -noupdate -group Controller /ita_tb/dut/i_controller/softmax_tile_q +add wave -noupdate -group Controller /ita_tb/dut/i_controller/ongoing_d +add wave -noupdate -group Controller /ita_tb/dut/i_controller/ongoing_q +add wave -noupdate -group Controller /ita_tb/dut/i_controller/ongoing_soft_d +add wave -noupdate -group Controller /ita_tb/dut/i_controller/ongoing_soft_q +add wave -noupdate -group Controller /ita_tb/dut/i_controller/inp_bias_pad_d +add wave -noupdate -group Controller /ita_tb/dut/i_controller/inp_bias_pad_q +add wave -noupdate -group Controller /ita_tb/dut/i_controller/inner_tile_dim +add wave -noupdate -group Controller /ita_tb/dut/i_controller/first_outer_dim +add wave -noupdate -group Controller /ita_tb/dut/i_controller/second_outer_dim +add wave -noupdate -group Controller /ita_tb/dut/i_controller/softmax_fifo +add wave -noupdate -group Controller /ita_tb/dut/i_controller/softmax_div +add wave -noupdate -group Controller /ita_tb/dut/i_controller/softmax_div_done_d +add wave -noupdate -group Controller /ita_tb/dut/i_controller/softmax_div_done_q +add wave -noupdate -group Controller /ita_tb/dut/i_controller/busy_d +add wave -noupdate -group Controller /ita_tb/dut/i_controller/busy_q +add wave -noupdate -group Controller /ita_tb/dut/i_controller/requant_add_d +add wave -noupdate -group Controller /ita_tb/dut/i_controller/requant_add_q add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/clk_i add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/rst_ni add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/ctrl_i @@ -216,8 +217,8 @@ add wave -noupdate -group Accumulator /ita_tb/dut/i_accumulator/write_addr_q add wave -noupdate -group Accumulator /ita_tb/dut/i_accumulator/result_d add wave -noupdate -group Accumulator /ita_tb/dut/i_accumulator/result_q TreeUpdate [SetDefaultTree] -WaveRestoreCursors {{Cursor 1} {414600 ps} 1} {{Cursor 2} {550600 ps} 1} {{Cursor 4} {400994 ps} 0} -quietly wave cursor active 1 +WaveRestoreCursors {{Wrong Output} {2784600 ps} 1} {{Start of Calc} {2775007 ps} 1} {{Calc finish} {2783000 ps} 1} {{Cursor 4} {2783576 ps} 0} +quietly wave cursor active 4 configure wave -namecolwidth 150 configure wave -valuecolwidth 100 configure wave -justifyvalue left @@ -232,4 +233,4 @@ configure wave -griddelta 40 configure wave -timeline 0 configure wave -timelineunits ns update -WaveRestoreZoom {394011 ps} {422576 ps} +WaveRestoreZoom {2768202 ps} {2794772 ps} diff --git a/src/ita.sv b/src/ita.sv index 000658a..6a9c1a2 100644 --- a/src/ita.sv +++ b/src/ita.sv @@ -40,7 +40,7 @@ module ita logic weight_valid, weight_ready; inp_t inp, inp_stream_soft; weight_t inp1, inp1_q, inp2, inp2_q; - bias_t inp_bias, inp_bias_pad_q1, inp_bias_q2, inp_bias_q3; + bias_t inp_bias, inp_bias_padded, inp_bias_q1, inp_bias_q2; oup_t oup, oup_q, accumulator_oup; requant_const_t requant_mult, requant_shift, activation_requant_mult, activation_requant_shift; requant_oup_t requant_oup; @@ -153,18 +153,16 @@ module ita if (!rst_ni) begin inp1_q <= '0; inp2_q <= '0; + inp_bias_q1 <= '0; inp_bias_q2 <= '0; oup_q <= '0; - inp_bias_q3 <= '0; end else begin - if (calc_en_q3) begin - inp_bias_q3 <= inp_bias_q2; - end if (calc_en_q2) begin - inp_bias_q2 <= inp_bias_pad_q1; + inp_bias_q2 <= inp_bias_q1; oup_q <= oup; end if (calc_en_q1) begin + inp_bias_q1 <= inp_bias_padded; inp1_q <= inp1; inp2_q <= inp2; end @@ -203,9 +201,8 @@ module ita .inner_tile_o (inner_tile ), .requant_add_i (requant_add ), .requant_add_o (requant_add_o ), - .inp_bias_pad_i (inp_bias ), - .inp_bias_pad_o (inp_bias_pad_q1 ), - .calc_en_q1_i (calc_en_q1 ), + .inp_bias_i (inp_bias ), + .inp_bias_pad_o (inp_bias_padded ), .busy_o (busy_o ) ); @@ -255,7 +252,7 @@ module ita .last_tile_q_i (last_inner_tile_q3 ), .oup_i (oup_q ), - .inp_bias_i (inp_bias_q3 ), + .inp_bias_i (inp_bias_q2 ), .result_o (accumulator_oup ) ); diff --git a/src/ita_controller.sv b/src/ita_controller.sv index 5f05dea..de9e834 100644 --- a/src/ita_controller.sv +++ b/src/ita_controller.sv @@ -33,9 +33,8 @@ module ita_controller output counter_t inner_tile_o , input requant_t requant_add_i , output requant_oup_t requant_add_o , - input bias_t inp_bias_pad_i , + input bias_t inp_bias_i , output bias_t inp_bias_pad_o , - input logic calc_en_q1_i , output logic busy_o ); @@ -49,7 +48,7 @@ module ita_controller ongoing_t ongoing_d, ongoing_q; ongoing_soft_t ongoing_soft_d, ongoing_soft_q; - bias_t inp_bias_pad_d, inp_bias_pad_q; + bias_t inp_bias, inp_bias_padded; tile_t inner_tile_dim; logic [WO-WI*2-2:0] first_outer_dim, second_outer_dim; @@ -63,7 +62,7 @@ module ita_controller assign tile_y_o = tile_y_q; assign inner_tile_o = inner_tile_q; assign requant_add_o = requant_add_q; - assign inp_bias_pad_o = inp_bias_pad_q; + assign inp_bias_pad_o = inp_bias_padded; always_comb begin count_d = count_q; @@ -83,7 +82,7 @@ module ita_controller softmax_tile_d = softmax_tile_q; softmax_div_done_d = softmax_div_done_q; requant_add_d = {N {requant_add_i}}; - inp_bias_pad_d = inp_bias_pad_i; + inp_bias = inp_bias_i; busy_d = busy_q; @@ -100,27 +99,7 @@ module ita_controller busy_d = 1'b1; end - if (step_q != Idle && step_q != MatMul) begin - if (inner_tile_q == inner_tile_dim) begin - last_inner_tile_o = 1'b1; - if ( ( (((count_q & (M-1)) + tile_y_q * M)) > ( (first_outer_dim - 1) ) ) ) begin - requant_add_d = {N {1'b0}}; - inp_bias_pad_d = {N {1'b0}}; - end else begin - if ( (count_q + tile_x_q * M*M/N) >= (second_outer_dim / N) * M ) begin - if ( ((count_q / M) * N + tile_x_q * M ) < second_outer_dim) begin - for (int i = (second_outer_dim & (N-1)); i < N; i++) begin - requant_add_d[i] = 1'b0; - inp_bias_pad_d[i] = 1'b0; - end - end else begin - requant_add_d = {N {1'b0}}; - inp_bias_pad_d = {N {1'b0}}; - end - end - end - end - end + // default handshake if (step_q != Idle) begin @@ -356,6 +335,31 @@ module ita_controller end end endcase + + if (step_q != Idle && step_q != MatMul) begin + if (inner_tile_q == inner_tile_dim) begin + last_inner_tile_o = 1'b1; + if ( ( (((count_q & (M-1)) + tile_y_q * M)) > ( (first_outer_dim - 1) ) ) ) begin + requant_add_d = {N {1'b0}}; + inp_bias = {N {1'b0}}; + end else begin + if ( (count_q + tile_x_q * M*M/N) >= (second_outer_dim / N) * M ) begin + if ( ((count_q / M) * N + tile_x_q * M ) < second_outer_dim) begin + for (int i = (second_outer_dim & (N-1)); i < N; i++) begin + requant_add_d[i] = 1'b0; + inp_bias[i] = 1'b0; + end + end else begin + requant_add_d = {N {1'b0}}; + inp_bias = {N {1'b0}}; + end + end + end + end + end + + inp_bias_padded = inp_bias; + if (inp_valid_i && inp_ready_o && oup_valid_i && oup_ready_i && last_inner_tile_o) begin ongoing_d = ongoing_q; end else if (inp_valid_i && inp_ready_o && last_inner_tile_o) begin @@ -383,7 +387,6 @@ module ita_controller ongoing_soft_q <= '0; softmax_div_done_q <= 1'b0; requant_add_q <= '0; - inp_bias_pad_q <= '0; busy_q <= 1'b0; end else begin step_q <= step_d; @@ -397,9 +400,6 @@ module ita_controller ongoing_soft_q <= ongoing_soft_d; softmax_div_done_q <= softmax_div_done_d; requant_add_q <= requant_add_d; - if (calc_en_q1_i) begin - inp_bias_pad_q <= inp_bias_pad_d; - end busy_q <= busy_d; end end From abf4d184d6cf4d86dab97efa7985c65022b69829 Mon Sep 17 00:00:00 2001 From: Marcel Kant Date: Wed, 23 Oct 2024 15:31:21 +0200 Subject: [PATCH 10/28] Changed count_q foto (count_q-1) for the bias padding --- modelsim/sim_ita_tb_wave_important.tcl | 237 +++++++++++++++++++++++++ src/ita_controller.sv | 18 +- 2 files changed, 253 insertions(+), 2 deletions(-) create mode 100644 modelsim/sim_ita_tb_wave_important.tcl diff --git a/modelsim/sim_ita_tb_wave_important.tcl b/modelsim/sim_ita_tb_wave_important.tcl new file mode 100644 index 0000000..7ddab73 --- /dev/null +++ b/modelsim/sim_ita_tb_wave_important.tcl @@ -0,0 +1,237 @@ +onerror {resume} +quietly WaveActivateNextPane {} 0 +add wave -noupdate /ita_tb/dut/i_inp1_mux/clk_i +add wave -noupdate /ita_tb/dut/i_inp1_mux/rst_ni +add wave -noupdate /ita_tb/dut/i_inp1_mux/inp_i +add wave -noupdate /ita_tb/dut/i_inp1_mux/inp1_o +add wave -noupdate /ita_tb/dut/i_inp2_mux/rst_ni +add wave -noupdate /ita_tb/dut/i_inp2_mux/weight_i +add wave -noupdate /ita_tb/dut/i_inp2_mux/inp2_o +add wave -noupdate /ita_tb/dut/i_controller/ctrl_i +add wave -noupdate /ita_tb/dut/inp1_q +add wave -noupdate /ita_tb/dut/inp2_q +add wave -noupdate /ita_tb/dut/i_inp2_mux/clk_i +add wave -noupdate -radix unsigned /ita_tb/dut/i_controller/count_d +add wave -noupdate -radix unsigned /ita_tb/dut/i_controller/count_q +add wave -noupdate /ita_tb/dut/calc_en +add wave -noupdate /ita_tb/dut/calc_en_q1 +add wave -noupdate /ita_tb/dut/calc_en_q2 +add wave -noupdate /ita_tb/dut/calc_en_q3 +add wave -noupdate -expand -group Bias /ita_tb/dut/inp_bias +add wave -noupdate -expand -group Bias /ita_tb/dut/inp_bias_padded +add wave -noupdate -expand -group Bias /ita_tb/dut/inp_bias_q1 +add wave -noupdate -expand -group Bias /ita_tb/dut/inp_bias_q2 +add wave -noupdate /ita_tb/dut/i_accumulator/oup_i +add wave -noupdate /ita_tb/dut/i_accumulator/result_d +add wave -noupdate /ita_tb/dut/i_accumulator/result_o +add wave -noupdate /ita_tb/dut/i_requantizer/requant_oup_o +add wave -noupdate /ita_tb/dut/i_activation/data_i +add wave -noupdate /ita_tb/dut/i_activation/data_q1 +add wave -noupdate /ita_tb/dut/i_activation/data_q2 +add wave -noupdate /ita_tb/dut/i_activation/data_q3 +add wave -noupdate /ita_tb/dut/i_activation/data_q4 +add wave -noupdate /ita_tb/dut/i_activation/data_o +add wave -noupdate /ita_tb/dut/i_fifo/data_i +add wave -noupdate /ita_tb/dut/i_fifo/data_o +add wave -noupdate /ita_tb/dut/oup_o +add wave -noupdate -expand -group Softmax /ita_tb/dut/i_softmax_top/i_softmax/requant_oup_q +add wave -noupdate -group Requantizer /ita_tb/dut/i_requantizer/clk_i +add wave -noupdate -group Requantizer /ita_tb/dut/i_requantizer/rst_ni +add wave -noupdate -group Requantizer /ita_tb/dut/i_requantizer/mode_i +add wave -noupdate -group Requantizer /ita_tb/dut/i_requantizer/eps_mult_i +add wave -noupdate -group Requantizer /ita_tb/dut/i_requantizer/right_shift_i +add wave -noupdate -group Requantizer /ita_tb/dut/i_requantizer/calc_en_i +add wave -noupdate -group Requantizer /ita_tb/dut/i_requantizer/calc_en_q_i +add wave -noupdate -group Requantizer /ita_tb/dut/i_requantizer/mult_signed +add wave -noupdate -group Requantizer /ita_tb/dut/i_requantizer/product +add wave -noupdate -group Requantizer /ita_tb/dut/i_requantizer/shifted_added +add wave -noupdate -group Requantizer /ita_tb/dut/i_requantizer/shifted_d +add wave -noupdate -group Requantizer /ita_tb/dut/i_requantizer/shifted_q +add wave -noupdate -group Requantizer /ita_tb/dut/i_requantizer/add_q1 +add wave -noupdate -group Requantizer /ita_tb/dut/i_requantizer/add_q2 +add wave -noupdate -group Requantizer /ita_tb/dut/i_requantizer/add_q3 +add wave -noupdate -group Requantizer /ita_tb/dut/i_requantizer/add_q4 +add wave -noupdate -group Requantizer /ita_tb/dut/i_requantizer/requant_oup_d +add wave -noupdate -group Requantizer /ita_tb/dut/i_requantizer/requant_oup_q +add wave -noupdate -group Requantizer /ita_tb/dut/i_requantizer/add_i +add wave -noupdate -group Controller /ita_tb/dut/i_controller/clk_i +add wave -noupdate -group Controller /ita_tb/dut/i_controller/rst_ni +add wave -noupdate -group Controller /ita_tb/dut/i_controller/ctrl_i +add wave -noupdate -group Controller /ita_tb/dut/i_controller/inp_valid_i +add wave -noupdate -group Controller /ita_tb/dut/i_controller/inp_ready_o +add wave -noupdate -group Controller /ita_tb/dut/i_controller/weight_valid_i +add wave -noupdate -group Controller /ita_tb/dut/i_controller/weight_ready_o +add wave -noupdate -group Controller /ita_tb/dut/i_controller/bias_valid_i +add wave -noupdate -group Controller /ita_tb/dut/i_controller/bias_ready_o +add wave -noupdate -group Controller /ita_tb/dut/i_controller/oup_valid_i +add wave -noupdate -group Controller /ita_tb/dut/i_controller/oup_ready_i +add wave -noupdate -group Controller /ita_tb/dut/i_controller/pop_softmax_fifo_i +add wave -noupdate -group Controller /ita_tb/dut/i_controller/step_o +add wave -noupdate -group Controller /ita_tb/dut/i_controller/soft_addr_div_i +add wave -noupdate -group Controller /ita_tb/dut/i_controller/softmax_done_i +add wave -noupdate -group Controller /ita_tb/dut/i_controller/calc_en_o +add wave -noupdate -group Controller /ita_tb/dut/i_controller/first_inner_tile_o +add wave -noupdate -group Controller /ita_tb/dut/i_controller/last_inner_tile_o +add wave -noupdate -group Controller /ita_tb/dut/i_controller/tile_x_o +add wave -noupdate -group Controller /ita_tb/dut/i_controller/tile_y_o +add wave -noupdate -group Controller /ita_tb/dut/i_controller/inner_tile_o +add wave -noupdate -group Controller /ita_tb/dut/i_controller/requant_add_i +add wave -noupdate -group Controller /ita_tb/dut/i_controller/requant_add_o +add wave -noupdate -group Controller /ita_tb/dut/i_controller/inp_bias_i +add wave -noupdate -group Controller /ita_tb/dut/i_controller/inp_bias_pad_o +add wave -noupdate -group Controller /ita_tb/dut/i_controller/busy_o +add wave -noupdate -group Controller /ita_tb/dut/i_controller/step_d +add wave -noupdate -group Controller /ita_tb/dut/i_controller/step_q +add wave -noupdate -group Controller /ita_tb/dut/i_controller/tile_d +add wave -noupdate -group Controller /ita_tb/dut/i_controller/tile_q +add wave -noupdate -group Controller /ita_tb/dut/i_controller/inner_tile_d +add wave -noupdate -group Controller /ita_tb/dut/i_controller/inner_tile_q +add wave -noupdate -group Controller /ita_tb/dut/i_controller/tile_x_d +add wave -noupdate -group Controller /ita_tb/dut/i_controller/tile_x_q +add wave -noupdate -group Controller /ita_tb/dut/i_controller/tile_y_d +add wave -noupdate -group Controller /ita_tb/dut/i_controller/tile_y_q +add wave -noupdate -group Controller /ita_tb/dut/i_controller/softmax_tile_d +add wave -noupdate -group Controller /ita_tb/dut/i_controller/softmax_tile_q +add wave -noupdate -group Controller /ita_tb/dut/i_controller/ongoing_d +add wave -noupdate -group Controller /ita_tb/dut/i_controller/ongoing_q +add wave -noupdate -group Controller /ita_tb/dut/i_controller/ongoing_soft_d +add wave -noupdate -group Controller /ita_tb/dut/i_controller/ongoing_soft_q +add wave -noupdate -group Controller /ita_tb/dut/i_controller/inp_bias +add wave -noupdate -group Controller /ita_tb/dut/i_controller/inp_bias_padded +add wave -noupdate -group Controller /ita_tb/dut/i_controller/inner_tile_dim +add wave -noupdate -group Controller /ita_tb/dut/i_controller/first_outer_dim +add wave -noupdate -group Controller /ita_tb/dut/i_controller/second_outer_dim +add wave -noupdate -group Controller /ita_tb/dut/i_controller/softmax_fifo +add wave -noupdate -group Controller /ita_tb/dut/i_controller/softmax_div +add wave -noupdate -group Controller /ita_tb/dut/i_controller/softmax_div_done_d +add wave -noupdate -group Controller /ita_tb/dut/i_controller/softmax_div_done_q +add wave -noupdate -group Controller /ita_tb/dut/i_controller/busy_d +add wave -noupdate -group Controller /ita_tb/dut/i_controller/busy_q +add wave -noupdate -group Controller /ita_tb/dut/i_controller/requant_add_d +add wave -noupdate -group Controller /ita_tb/dut/i_controller/requant_add_q +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/clk_i +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/rst_ni +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/ctrl_i +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/step_i +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/calc_en_i +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/requant_oup_i +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/calc_stream_soft_en_i +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/soft_addr_div_o +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/softmax_done_o +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/pop_softmax_fifo_o +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/inp_i +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/inp_stream_soft_o +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/div_inp_o +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/div_valid_o +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/div_ready_i +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/div_valid_i +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/div_ready_o +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/div_oup_i +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/read_acc_en_o +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/read_acc_addr_o +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/read_acc_data_i +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/write_acc_en_o +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/write_acc_addr_o +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/write_acc_data_o +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/prev_max_o +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/max_i +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/max_o +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/read_max_en_o +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/read_max_addr_o +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/read_max_data_i +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/write_max_en_o +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/write_max_addr_o +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/write_max_data_o +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/tile_x_i +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/tile_y_i +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/inner_tile_i +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/tile_d +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/tile_q1 +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/tile_q2 +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/tile_q3 +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/tile_q4 +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/count_d +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/count_q1 +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/count_q2 +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/count_q3 +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/count_q4 +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/inner_tile_q +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/tile_y_q +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/exp_sum_d +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/exp_sum_q +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/count_soft_d +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/count_soft_q1 +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/count_soft_q2 +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/count_div_d +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/count_div_q +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/addr_div_d +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/addr_div_q +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/div_read_d +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/div_read_q +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/div_write_d +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/div_write_q +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/requant_oup_q +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/max_d +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/max_q +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/shift_d +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/shift_q +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/shift_diff +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/shift_sum_d +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/shift_sum_q +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/max_diff +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/shift_inp +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/shift_inp_diff +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/calc_stream_soft_en_q +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/calc_en_d +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/calc_en_q1 +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/calc_en_q2 +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/calc_en_q3 +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/fifo_full +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/fifo_empty +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/push_to_fifo +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/pop_from_fifo +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/data_to_fifo +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/data_from_fifo +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/fifo_usage +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/disable_shift +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/disable_row +add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/disable_col +add wave -noupdate -expand -group Accumulator /ita_tb/dut/i_accumulator/clk_i +add wave -noupdate -expand -group Accumulator /ita_tb/dut/i_accumulator/rst_ni +add wave -noupdate -expand -group Accumulator /ita_tb/dut/i_accumulator/calc_en_i +add wave -noupdate -expand -group Accumulator /ita_tb/dut/i_accumulator/calc_en_q_i +add wave -noupdate -expand -group Accumulator /ita_tb/dut/i_accumulator/first_tile_i +add wave -noupdate -expand -group Accumulator /ita_tb/dut/i_accumulator/first_tile_q_i +add wave -noupdate -expand -group Accumulator /ita_tb/dut/i_accumulator/last_tile_i +add wave -noupdate -expand -group Accumulator /ita_tb/dut/i_accumulator/last_tile_q_i +add wave -noupdate -expand -group Accumulator /ita_tb/dut/i_accumulator/inp_bias_i +add wave -noupdate -expand -group Accumulator /ita_tb/dut/i_accumulator/read_en +add wave -noupdate -expand -group Accumulator /ita_tb/dut/i_accumulator/read_addr +add wave -noupdate -expand -group Accumulator /ita_tb/dut/i_accumulator/read_data +add wave -noupdate -expand -group Accumulator /ita_tb/dut/i_accumulator/read_data_unused +add wave -noupdate -expand -group Accumulator /ita_tb/dut/i_accumulator/write_en +add wave -noupdate -expand -group Accumulator /ita_tb/dut/i_accumulator/write_addr +add wave -noupdate -expand -group Accumulator /ita_tb/dut/i_accumulator/write_data +add wave -noupdate -expand -group Accumulator /ita_tb/dut/i_accumulator/read_addr_d +add wave -noupdate -expand -group Accumulator /ita_tb/dut/i_accumulator/read_addr_q +add wave -noupdate -expand -group Accumulator /ita_tb/dut/i_accumulator/write_addr_d +add wave -noupdate -expand -group Accumulator /ita_tb/dut/i_accumulator/write_addr_q +add wave -noupdate -expand -group Accumulator /ita_tb/dut/i_accumulator/result_q +TreeUpdate [SetDefaultTree] +WaveRestoreCursors {{Cursor 1} {414600 ps} 1} {{Cursor 2} {550600 ps} 1} {{Cursor 3} {710600 ps} 1} {{Cursor 4} {390540 ps} 0} +quietly wave cursor active 4 +configure wave -namecolwidth 176 +configure wave -valuecolwidth 100 +configure wave -justifyvalue left +configure wave -signalnamewidth 1 +configure wave -snapdistance 10 +configure wave -datasetprefix 0 +configure wave -rowmargin 4 +configure wave -childrowmargin 2 +configure wave -gridoffset 0 +configure wave -gridperiod 1 +configure wave -griddelta 40 +configure wave -timeline 0 +configure wave -timelineunits ns +update +WaveRestoreZoom {371422 ps} {416865 ps} diff --git a/src/ita_controller.sv b/src/ita_controller.sv index de9e834..c042ac1 100644 --- a/src/ita_controller.sv +++ b/src/ita_controller.sv @@ -341,16 +341,30 @@ module ita_controller last_inner_tile_o = 1'b1; if ( ( (((count_q & (M-1)) + tile_y_q * M)) > ( (first_outer_dim - 1) ) ) ) begin requant_add_d = {N {1'b0}}; - inp_bias = {N {1'b0}}; + //inp_bias = {N {1'b0}}; end else begin if ( (count_q + tile_x_q * M*M/N) >= (second_outer_dim / N) * M ) begin if ( ((count_q / M) * N + tile_x_q * M ) < second_outer_dim) begin for (int i = (second_outer_dim & (N-1)); i < N; i++) begin requant_add_d[i] = 1'b0; - inp_bias[i] = 1'b0; + //inp_bias[i] = 1'b0; end end else begin requant_add_d = {N {1'b0}}; + //inp_bias = {N {1'b0}}; + end + end + end + + if ( ( ((((count_q-1) & (M-1)) + tile_y_q * M)) > ( (first_outer_dim - 1) ) ) ) begin + inp_bias = {N {1'b0}}; + end else begin + if ( ((count_q-1) + tile_x_q * M*M/N) >= (second_outer_dim / N) * M ) begin + if ( (((count_q-1) / M) * N + tile_x_q * M ) < second_outer_dim) begin + for (int i = (second_outer_dim & (N-1)); i < N; i++) begin + inp_bias[i] = 1'b0; + end + end else begin inp_bias = {N {1'b0}}; end end From fd7ae8306e122d6b364308b658ae4596720f6404 Mon Sep 17 00:00:00 2001 From: Marcel Kant Date: Wed, 23 Oct 2024 17:13:13 +0200 Subject: [PATCH 11/28] Added waves --- modelsim/sim_ita_tb_wave.tcl | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/modelsim/sim_ita_tb_wave.tcl b/modelsim/sim_ita_tb_wave.tcl index a1e1697..500122e 100644 --- a/modelsim/sim_ita_tb_wave.tcl +++ b/modelsim/sim_ita_tb_wave.tcl @@ -14,6 +14,29 @@ add wave -noupdate /ita_tb/dut/i_controller/ctrl_i add wave -noupdate /ita_tb/dut/oup_o add wave -noupdate /ita_tb/dut/inp1_q add wave -noupdate /ita_tb/dut/inp2_q +add wave -noupdate -radix unsigned /ita_tb/dut/i_controller/count_d +add wave -noupdate -radix unsigned /ita_tb/dut/i_controller/count_q +add wave -noupdate /ita_tb/dut/calc_en +add wave -noupdate /ita_tb/dut/calc_en_q1 +add wave -noupdate /ita_tb/dut/calc_en_q2 +add wave -noupdate /ita_tb/dut/calc_en_q3 +add wave -noupdate -expand -group Bias /ita_tb/dut/inp_bias +add wave -noupdate -expand -group Bias /ita_tb/dut/inp_bias_padded +add wave -noupdate -expand -group Bias /ita_tb/dut/inp_bias_q1 +add wave -noupdate -expand -group Bias /ita_tb/dut/inp_bias_q2 +add wave -noupdate /ita_tb/dut/i_accumulator/oup_i +add wave -noupdate /ita_tb/dut/i_accumulator/result_d +add wave -noupdate /ita_tb/dut/i_accumulator/result_o +add wave -noupdate /ita_tb/dut/i_requantizer/requant_oup_o +add wave -noupdate /ita_tb/dut/i_activation/data_i +add wave -noupdate /ita_tb/dut/i_activation/data_q1 +add wave -noupdate /ita_tb/dut/i_activation/data_q2 +add wave -noupdate /ita_tb/dut/i_activation/data_q3 +add wave -noupdate /ita_tb/dut/i_activation/data_q4 +add wave -noupdate /ita_tb/dut/i_activation/data_o +add wave -noupdate /ita_tb/dut/i_fifo/data_i +add wave -noupdate /ita_tb/dut/i_fifo/data_o +add wave -noupdate /ita_tb/dut/oup_o add wave -noupdate -group Requantizer /ita_tb/dut/i_requantizer/* add wave -expand -group Controller /ita_tb/dut/i_controller/* add wave -group {Softmax Controller} ita_tb/dut/i_softmax_top/i_softmax/* From 22e835ca994227a5b2f5035929db9c375b09392d Mon Sep 17 00:00:00 2001 From: Marcel Kant Date: Fri, 25 Oct 2024 14:31:05 +0200 Subject: [PATCH 12/28] Added ctrl.ff_size for feedforward layer --- src/ita_controller.sv | 5 +---- src/ita_package.sv | 2 ++ src/tb/ita_tb.sv | 1 + 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/ita_controller.sv b/src/ita_controller.sv index c042ac1..a31284e 100644 --- a/src/ita_controller.sv +++ b/src/ita_controller.sv @@ -291,7 +291,7 @@ module ita_controller F1: begin inner_tile_dim = ctrl_i.tile_e-1; first_outer_dim = ctrl_i.seq_length; - second_outer_dim = ctrl_i.embed_size; + second_outer_dim = ctrl_i.ff_size; if (inner_tile_q == ctrl_i.tile_e-1) begin last_inner_tile_o = 1'b1; end @@ -341,17 +341,14 @@ module ita_controller last_inner_tile_o = 1'b1; if ( ( (((count_q & (M-1)) + tile_y_q * M)) > ( (first_outer_dim - 1) ) ) ) begin requant_add_d = {N {1'b0}}; - //inp_bias = {N {1'b0}}; end else begin if ( (count_q + tile_x_q * M*M/N) >= (second_outer_dim / N) * M ) begin if ( ((count_q / M) * N + tile_x_q * M ) < second_outer_dim) begin for (int i = (second_outer_dim & (N-1)); i < N; i++) begin requant_add_d[i] = 1'b0; - //inp_bias[i] = 1'b0; end end else begin requant_add_d = {N {1'b0}}; - //inp_bias = {N {1'b0}}; end end end diff --git a/src/ita_package.sv b/src/ita_package.sv index 04d0d53..3a2c25f 100644 --- a/src/ita_package.sv +++ b/src/ita_package.sv @@ -48,12 +48,14 @@ package ita_package; typedef logic [WO-WI*2-2:0] seq_length_t; typedef logic [WO-WI*2-2:0] proj_space_t; typedef logic [WO-WI*2-2:0] embed_size_t; + typedef logic [WO-WI*2-2:0] ff_size_t; typedef logic [ 32-1:0] tile_t; typedef struct packed { logic start ; seq_length_t seq_length ; proj_space_t proj_space ; embed_size_t embed_size ; + ff_size_t ff_size ; layer_e layer ; activation_e activation ; requant_const_array_t eps_mult ; diff --git a/src/tb/ita_tb.sv b/src/tb/ita_tb.sv index 6d0f9fe..78280c0 100644 --- a/src/tb/ita_tb.sv +++ b/src/tb/ita_tb.sv @@ -493,6 +493,7 @@ task automatic apply_ITA_weights(input integer phase); ita_ctrl.seq_length = SEQUENCE_LEN; ita_ctrl.proj_space = PROJECTION_SPACE; ita_ctrl.embed_size = EMBEDDING_SIZE; + ita_ctrl.ff_size = FEEDFORWARD_SIZE; read_activation_constants(ita_ctrl.gelu_b, ita_ctrl.gelu_c, ita_ctrl.activation_requant_mult, ita_ctrl.activation_requant_shift, ita_ctrl.activation_requant_add); From 64732e4e0006899c1778b42df377a100a4f78fda Mon Sep 17 00:00:00 2001 From: Marcel Kant Date: Fri, 25 Oct 2024 20:09:07 +0200 Subject: [PATCH 13/28] Bias padding works now but with quick fix --- modelsim/sim_ita_tb_wave.tcl | 1 + src/ita_controller.sv | 25 ++++++++++++++++++------- 2 files changed, 19 insertions(+), 7 deletions(-) diff --git a/modelsim/sim_ita_tb_wave.tcl b/modelsim/sim_ita_tb_wave.tcl index 500122e..5a701ab 100644 --- a/modelsim/sim_ita_tb_wave.tcl +++ b/modelsim/sim_ita_tb_wave.tcl @@ -16,6 +16,7 @@ add wave -noupdate /ita_tb/dut/inp1_q add wave -noupdate /ita_tb/dut/inp2_q add wave -noupdate -radix unsigned /ita_tb/dut/i_controller/count_d add wave -noupdate -radix unsigned /ita_tb/dut/i_controller/count_q +add wave -noupdate -radix unsigned /ita_tb/dut/i_controller/count_q2 add wave -noupdate /ita_tb/dut/calc_en add wave -noupdate /ita_tb/dut/calc_en_q1 add wave -noupdate /ita_tb/dut/calc_en_q2 diff --git a/src/ita_controller.sv b/src/ita_controller.sv index a31284e..831d972 100644 --- a/src/ita_controller.sv +++ b/src/ita_controller.sv @@ -39,7 +39,7 @@ module ita_controller ); step_e step_d, step_q; - counter_t count_d, count_q; + counter_t count_d, count_q, count_d2, count_q2; counter_t tile_d, tile_q; counter_t inner_tile_d, inner_tile_q; counter_t tile_x_d, tile_x_q; @@ -49,6 +49,7 @@ module ita_controller ongoing_soft_t ongoing_soft_d, ongoing_soft_q; bias_t inp_bias, inp_bias_padded; + logic last_time; tile_t inner_tile_dim; logic [WO-WI*2-2:0] first_outer_dim, second_outer_dim; @@ -66,6 +67,7 @@ module ita_controller always_comb begin count_d = count_q; + count_d2 = count_q2; tile_d = tile_q; inner_tile_d = inner_tile_q; tile_x_d = tile_x_q; @@ -82,6 +84,7 @@ module ita_controller softmax_tile_d = softmax_tile_q; softmax_div_done_d = softmax_div_done_q; requant_add_d = {N {requant_add_i}}; + last_time = 1'b0; inp_bias = inp_bias_i; @@ -122,6 +125,7 @@ module ita_controller if (inp_valid_i && weight_valid_i && bias_valid_i) begin calc_en_o = 1; count_d = count_q + 1; + count_d2 = count_q; busy_d = 1'b1; if (count_d == M*M/N) begin // end of tile busy_d = 1'b0; // Generate done signal for current tile @@ -305,7 +309,7 @@ module ita_controller end end F2: begin - inner_tile_dim = ctrl_i.tile_e-1; + inner_tile_dim = ctrl_i.tile_f-1; first_outer_dim = ctrl_i.seq_length; second_outer_dim = ctrl_i.embed_size; if (inner_tile_q == ctrl_i.tile_f-1) begin @@ -335,11 +339,16 @@ module ita_controller end end endcase + + if (step_q == Idle && count_q2 == 8'd255) begin + last_time = 1'b1; + count_d2 = 1'b0; + end - if (step_q != Idle && step_q != MatMul) begin + if ((step_q != Idle && step_q != MatMul) || last_time) begin if (inner_tile_q == inner_tile_dim) begin last_inner_tile_o = 1'b1; - if ( ( (((count_q & (M-1)) + tile_y_q * M)) > ( (first_outer_dim - 1) ) ) ) begin + if (((((count_q & (M-1)) + tile_y_q * M)) > ((first_outer_dim - 1)))) begin requant_add_d = {N {1'b0}}; end else begin if ( (count_q + tile_x_q * M*M/N) >= (second_outer_dim / N) * M ) begin @@ -353,11 +362,11 @@ module ita_controller end end - if ( ( ((((count_q-1) & (M-1)) + tile_y_q * M)) > ( (first_outer_dim - 1) ) ) ) begin + if ((((((count_q2) & (M-1)) + tile_y_q * M)) > ((first_outer_dim - 1)))) begin inp_bias = {N {1'b0}}; end else begin - if ( ((count_q-1) + tile_x_q * M*M/N) >= (second_outer_dim / N) * M ) begin - if ( (((count_q-1) / M) * N + tile_x_q * M ) < second_outer_dim) begin + if ( ((count_q2) + tile_x_q * M*M/N) >= (second_outer_dim / N) * M ) begin + if ( (((count_q2) / M) * N + tile_x_q * M ) < second_outer_dim) begin for (int i = (second_outer_dim & (N-1)); i < N; i++) begin inp_bias[i] = 1'b0; end @@ -391,6 +400,7 @@ module ita_controller if(~rst_ni) begin step_q <= Idle; count_q <= '0; + count_q2 <= '0; tile_q <= '0; inner_tile_q <= '0; softmax_tile_q <= '0; @@ -402,6 +412,7 @@ module ita_controller end else begin step_q <= step_d; count_q <= count_d; + count_q2 <= count_d2; tile_q <= tile_d; tile_x_q <= tile_x_d; tile_y_q <= tile_y_d; From 8f0d19a62e56525157c7f4cc5552b1f5faf4eba5 Mon Sep 17 00:00:00 2001 From: Marcel Kant Date: Tue, 29 Oct 2024 14:35:48 +0100 Subject: [PATCH 14/28] count_q - 1 solution works for one tile --- modelsim/sim_ita_tb_wave.tcl | 1 - src/ita_controller.sv | 67 ++++++++++++++++++++---------------- 2 files changed, 38 insertions(+), 30 deletions(-) diff --git a/modelsim/sim_ita_tb_wave.tcl b/modelsim/sim_ita_tb_wave.tcl index 5a701ab..500122e 100644 --- a/modelsim/sim_ita_tb_wave.tcl +++ b/modelsim/sim_ita_tb_wave.tcl @@ -16,7 +16,6 @@ add wave -noupdate /ita_tb/dut/inp1_q add wave -noupdate /ita_tb/dut/inp2_q add wave -noupdate -radix unsigned /ita_tb/dut/i_controller/count_d add wave -noupdate -radix unsigned /ita_tb/dut/i_controller/count_q -add wave -noupdate -radix unsigned /ita_tb/dut/i_controller/count_q2 add wave -noupdate /ita_tb/dut/calc_en add wave -noupdate /ita_tb/dut/calc_en_q1 add wave -noupdate /ita_tb/dut/calc_en_q2 diff --git a/src/ita_controller.sv b/src/ita_controller.sv index 831d972..b594fee 100644 --- a/src/ita_controller.sv +++ b/src/ita_controller.sv @@ -39,7 +39,7 @@ module ita_controller ); step_e step_d, step_q; - counter_t count_d, count_q, count_d2, count_q2; + counter_t count_d, count_q, bias_count; counter_t tile_d, tile_q; counter_t inner_tile_d, inner_tile_q; counter_t tile_x_d, tile_x_q; @@ -67,7 +67,7 @@ module ita_controller always_comb begin count_d = count_q; - count_d2 = count_q2; + bias_count = count_q - 1; tile_d = tile_q; inner_tile_d = inner_tile_q; tile_x_d = tile_x_q; @@ -86,7 +86,6 @@ module ita_controller requant_add_d = {N {requant_add_i}}; last_time = 1'b0; inp_bias = inp_bias_i; - busy_d = busy_q; softmax_fifo = 1'b0; @@ -102,8 +101,6 @@ module ita_controller busy_d = 1'b1; end - - // default handshake if (step_q != Idle) begin // Check if division for softmax is going to FIFO @@ -114,6 +111,9 @@ module ita_controller if (softmax_div_done_q != 1'b1 && step_q == AV && inner_tile_q == 0 && tile_q == 0 && count_q < M && count_q >= soft_addr_div_i) begin softmax_div = 1'b1; end + // if (count_q == M*M/N) begin //Problem is that the counter does not go to 256 + // count_d = '0; + // end if (ongoing_q>=FifoDepth || (softmax_fifo && ongoing_soft_q>=SoftFifoDepth) || softmax_div) begin inp_ready_o = 1'b0; weight_ready_o = 1'b0; @@ -123,13 +123,17 @@ module ita_controller weight_ready_o = inp_valid_i; bias_ready_o = weight_valid_i; if (inp_valid_i && weight_valid_i && bias_valid_i) begin + // if (count_q == M*M/N) begin + // count_d = 1; + // end else begin + // count_d = count_q + 1; + // end calc_en_o = 1; count_d = count_q + 1; - count_d2 = count_q; busy_d = 1'b1; if (count_d == M*M/N) begin // end of tile busy_d = 1'b0; // Generate done signal for current tile - count_d = '0; + count_d = '0; inner_tile_d = inner_tile_q + 1; end end @@ -340,41 +344,48 @@ module ita_controller end endcase - if (step_q == Idle && count_q2 == 8'd255) begin - last_time = 1'b1; - count_d2 = 1'b0; + // if (step_q == Idle && count_q2 == 8'd255) begin + // last_time = 1'b1; + // count_d2 = 1'b0; + // end + + if (count_q == 0 && (tile_x_q > 0 || tile_y_q > 0)) begin + bias_count = 255; end - if ((step_q != Idle && step_q != MatMul) || last_time) begin + if ((step_q != Idle && step_q != MatMul)) begin if (inner_tile_q == inner_tile_dim) begin last_inner_tile_o = 1'b1; - if (((((count_q & (M-1)) + tile_y_q * M)) > ((first_outer_dim - 1)))) begin + if ((((((bias_count) & (M-1)) + tile_y_q * M)) > ((first_outer_dim - 1)))) begin requant_add_d = {N {1'b0}}; - end else begin - if ( (count_q + tile_x_q * M*M/N) >= (second_outer_dim / N) * M ) begin - if ( ((count_q / M) * N + tile_x_q * M ) < second_outer_dim) begin - for (int i = (second_outer_dim & (N-1)); i < N; i++) begin - requant_add_d[i] = 1'b0; - end - end else begin - requant_add_d = {N {1'b0}}; - end - end - end - - if ((((((count_q2) & (M-1)) + tile_y_q * M)) > ((first_outer_dim - 1)))) begin inp_bias = {N {1'b0}}; end else begin - if ( ((count_q2) + tile_x_q * M*M/N) >= (second_outer_dim / N) * M ) begin - if ( (((count_q2) / M) * N + tile_x_q * M ) < second_outer_dim) begin + if ( ((bias_count) + tile_x_q * M*M/N) >= (second_outer_dim / N) * M ) begin + if ( (((bias_count) / M) * N + tile_x_q * M ) < second_outer_dim) begin for (int i = (second_outer_dim & (N-1)); i < N; i++) begin + requant_add_d[i] = 1'b0; inp_bias[i] = 1'b0; end end else begin + requant_add_d = {N {1'b0}}; inp_bias = {N {1'b0}}; end end end + + // if ((((((count_q2) & (M-1)) + tile_y_q * M)) > ((first_outer_dim - 1)))) begin + // inp_bias = {N {1'b0}}; + // end else begin + // if ( ((count_q2) + tile_x_q * M*M/N) >= (second_outer_dim / N) * M ) begin + // if ( (((count_q2) / M) * N + tile_x_q * M ) < second_outer_dim) begin + // for (int i = (second_outer_dim & (N-1)); i < N; i++) begin + // inp_bias[i] = 1'b0; + // end + // end else begin + // inp_bias = {N {1'b0}}; + // end + // end + // end end end @@ -400,7 +411,6 @@ module ita_controller if(~rst_ni) begin step_q <= Idle; count_q <= '0; - count_q2 <= '0; tile_q <= '0; inner_tile_q <= '0; softmax_tile_q <= '0; @@ -412,7 +422,6 @@ module ita_controller end else begin step_q <= step_d; count_q <= count_d; - count_q2 <= count_d2; tile_q <= tile_d; tile_x_q <= tile_x_d; tile_y_q <= tile_y_d; From 3c6040c749a86f3006d87be320b9ef73908e10c1 Mon Sep 17 00:00:00 2001 From: Marcel Kant Date: Tue, 29 Oct 2024 19:15:39 +0100 Subject: [PATCH 15/28] This version works for data_S127_E50_P64_F64_H1_B1 but not for data_S63_E127_P64_F64_H1_B1 --- modelsim/sim_ita_tb_wave.tcl | 5 +++ src/ita_controller.sv | 59 ++++++++++++++++++++++++------------ 2 files changed, 45 insertions(+), 19 deletions(-) diff --git a/modelsim/sim_ita_tb_wave.tcl b/modelsim/sim_ita_tb_wave.tcl index 500122e..bc9fed6 100644 --- a/modelsim/sim_ita_tb_wave.tcl +++ b/modelsim/sim_ita_tb_wave.tcl @@ -16,6 +16,11 @@ add wave -noupdate /ita_tb/dut/inp1_q add wave -noupdate /ita_tb/dut/inp2_q add wave -noupdate -radix unsigned /ita_tb/dut/i_controller/count_d add wave -noupdate -radix unsigned /ita_tb/dut/i_controller/count_q +add wave -noupdate -radix unsigned /ita_tb/dut/i_controller/bias_count +add wave -noupdate -radix unsigned /ita_tb/dut/i_controller/bias_tile_x_d +add wave -noupdate -radix unsigned /ita_tb/dut/i_controller/bias_tile_x_q +add wave -noupdate -radix unsigned /ita_tb/dut/i_controller/bias_tile_y_d +add wave -noupdate -radix unsigned /ita_tb/dut/i_controller/bias_tile_y_q add wave -noupdate /ita_tb/dut/calc_en add wave -noupdate /ita_tb/dut/calc_en_q1 add wave -noupdate /ita_tb/dut/calc_en_q2 diff --git a/src/ita_controller.sv b/src/ita_controller.sv index b594fee..a081521 100644 --- a/src/ita_controller.sv +++ b/src/ita_controller.sv @@ -42,8 +42,8 @@ module ita_controller counter_t count_d, count_q, bias_count; counter_t tile_d, tile_q; counter_t inner_tile_d, inner_tile_q; - counter_t tile_x_d, tile_x_q; - counter_t tile_y_d, tile_y_q; + counter_t tile_x_d, tile_x_q, bias_tile_x_d, bias_tile_x_q; + counter_t tile_y_d, tile_y_q, bias_tile_y_d, bias_tile_y_q; counter_t softmax_tile_d, softmax_tile_q; ongoing_t ongoing_d, ongoing_q; ongoing_soft_t ongoing_soft_d, ongoing_soft_q; @@ -67,7 +67,6 @@ module ita_controller always_comb begin count_d = count_q; - bias_count = count_q - 1; tile_d = tile_q; inner_tile_d = inner_tile_q; tile_x_d = tile_x_q; @@ -300,14 +299,19 @@ module ita_controller inner_tile_dim = ctrl_i.tile_e-1; first_outer_dim = ctrl_i.seq_length; second_outer_dim = ctrl_i.ff_size; - if (inner_tile_q == ctrl_i.tile_e-1) begin - last_inner_tile_o = 1'b1; - end if (inner_tile_d == ctrl_i.tile_e) begin // end of inner tile inner_tile_d = '0; tile_d = tile_q + 1; - if (tile_d == ctrl_i.tile_s*ctrl_i.tile_f) begin + if (tile_x_q == (ctrl_i.tile_f-1)) begin // end of step Q + tile_x_d = '0; + tile_y_d = tile_y_q + 1; + end else begin + tile_x_d = tile_x_q + 1; + end + if (tile_d == ctrl_i.tile_s*ctrl_i.tile_f) begin // end of step Q tile_d = '0; + tile_x_d = '0; + tile_y_d = '0; step_d = F2; end end @@ -316,14 +320,19 @@ module ita_controller inner_tile_dim = ctrl_i.tile_f-1; first_outer_dim = ctrl_i.seq_length; second_outer_dim = ctrl_i.embed_size; - if (inner_tile_q == ctrl_i.tile_f-1) begin - last_inner_tile_o = 1'b1; - end if (inner_tile_d == ctrl_i.tile_f) begin // end of inner tile inner_tile_d = '0; tile_d = tile_q + 1; - if (tile_d == ctrl_i.tile_s*ctrl_i.tile_e) begin + if (tile_x_q == (ctrl_i.tile_f-1)) begin // end of step Q + tile_x_d = '0; + tile_y_d = tile_y_q + 1; + end else begin + tile_x_d = tile_x_q + 1; + end + if (tile_d == ctrl_i.tile_s*ctrl_i.tile_e) begin // end of step Q tile_d = '0; + tile_x_d = '0; + tile_y_d = '0; step_d = Idle; end end @@ -349,19 +358,25 @@ module ita_controller // count_d2 = 1'b0; // end - if (count_q == 0 && (tile_x_q > 0 || tile_y_q > 0)) begin - bias_count = 255; - end - - if ((step_q != Idle && step_q != MatMul)) begin + // if (count_q == 0 && (tile_x_q > 0 || tile_y_q > 0)) begin + // bias_count = 255; + // end + + + bias_count = (count_q == 0) ? 255 : count_q - 1; + // bias_count = (count_q == 0 && (tile_x_q > 0 || tile_y_q > 0)) ? 255 : count_q - 1; + bias_tile_x_d = (count_q == 0) ? bias_tile_x_q : tile_x_q; + bias_tile_y_d = (count_q == 0) ? bias_tile_y_q : tile_y_q; + + if ((step_q != Idle && step_q != MatMul) || (step_q == Idle && bias_count == 255)) begin if (inner_tile_q == inner_tile_dim) begin last_inner_tile_o = 1'b1; - if ((((((bias_count) & (M-1)) + tile_y_q * M)) > ((first_outer_dim - 1)))) begin + if ((((((bias_count) & (M-1)) + bias_tile_y_d * M)) > ((first_outer_dim - 1)))) begin requant_add_d = {N {1'b0}}; inp_bias = {N {1'b0}}; end else begin - if ( ((bias_count) + tile_x_q * M*M/N) >= (second_outer_dim / N) * M ) begin - if ( (((bias_count) / M) * N + tile_x_q * M ) < second_outer_dim) begin + if ( ((bias_count) + bias_tile_x_d * M*M/N) >= (second_outer_dim / N) * M ) begin + if ( (((bias_count) / M) * N + bias_tile_x_d * M ) < second_outer_dim) begin for (int i = (second_outer_dim & (N-1)); i < N; i++) begin requant_add_d[i] = 1'b0; inp_bias[i] = 1'b0; @@ -412,6 +427,8 @@ module ita_controller step_q <= Idle; count_q <= '0; tile_q <= '0; + tile_x_q <= '0; + tile_y_q <= '0; inner_tile_q <= '0; softmax_tile_q <= '0; ongoing_q <= '0; @@ -419,6 +436,8 @@ module ita_controller softmax_div_done_q <= 1'b0; requant_add_q <= '0; busy_q <= 1'b0; + bias_tile_x_q <= '0; + bias_tile_y_q <= '0; end else begin step_q <= step_d; count_q <= count_d; @@ -432,6 +451,8 @@ module ita_controller softmax_div_done_q <= softmax_div_done_d; requant_add_q <= requant_add_d; busy_q <= busy_d; + bias_tile_x_q <= bias_tile_x_d; + bias_tile_y_q <= bias_tile_y_d; end end endmodule From 0094c1148b8442ae79be23a98059249cb7714ec8 Mon Sep 17 00:00:00 2001 From: Marcel Kant Date: Wed, 30 Oct 2024 10:48:18 +0100 Subject: [PATCH 16/28] No ebugs for bias padding detected one bug without bias in phase 5 --- src/ita_controller.sv | 44 +++++++++++++++++++++++++------------------ 1 file changed, 26 insertions(+), 18 deletions(-) diff --git a/src/ita_controller.sv b/src/ita_controller.sv index a081521..b2b5d39 100644 --- a/src/ita_controller.sv +++ b/src/ita_controller.sv @@ -53,6 +53,8 @@ module ita_controller tile_t inner_tile_dim; logic [WO-WI*2-2:0] first_outer_dim, second_outer_dim; + logic [WO-WI*2-2:0] first_outer_dim_d, first_outer_dim_q; + logic [WO-WI*2-2:0] second_outer_dim_d, second_outer_dim_q; logic softmax_fifo, softmax_div, softmax_div_done_d, softmax_div_done_q, busy_d, busy_q; requant_oup_t requant_add_d, requant_add_q; @@ -302,13 +304,13 @@ module ita_controller if (inner_tile_d == ctrl_i.tile_e) begin // end of inner tile inner_tile_d = '0; tile_d = tile_q + 1; - if (tile_x_q == (ctrl_i.tile_f-1)) begin // end of step Q + if (tile_x_q == (ctrl_i.tile_f-1)) begin tile_x_d = '0; tile_y_d = tile_y_q + 1; end else begin tile_x_d = tile_x_q + 1; end - if (tile_d == ctrl_i.tile_s*ctrl_i.tile_f) begin // end of step Q + if (tile_d == ctrl_i.tile_s*ctrl_i.tile_f) begin tile_d = '0; tile_x_d = '0; tile_y_d = '0; @@ -323,13 +325,13 @@ module ita_controller if (inner_tile_d == ctrl_i.tile_f) begin // end of inner tile inner_tile_d = '0; tile_d = tile_q + 1; - if (tile_x_q == (ctrl_i.tile_f-1)) begin // end of step Q + if (tile_x_q == (ctrl_i.tile_e-1)) begin tile_x_d = '0; tile_y_d = tile_y_q + 1; end else begin tile_x_d = tile_x_q + 1; end - if (tile_d == ctrl_i.tile_s*ctrl_i.tile_e) begin // end of step Q + if (tile_d == ctrl_i.tile_s*ctrl_i.tile_e) begin tile_d = '0; tile_x_d = '0; tile_y_d = '0; @@ -367,26 +369,29 @@ module ita_controller // bias_count = (count_q == 0 && (tile_x_q > 0 || tile_y_q > 0)) ? 255 : count_q - 1; bias_tile_x_d = (count_q == 0) ? bias_tile_x_q : tile_x_q; bias_tile_y_d = (count_q == 0) ? bias_tile_y_q : tile_y_q; + first_outer_dim_d = (count_q == 0) ? first_outer_dim_q : first_outer_dim; + second_outer_dim_d = (count_q == 0) ? second_outer_dim_q : second_outer_dim; if ((step_q != Idle && step_q != MatMul) || (step_q == Idle && bias_count == 255)) begin if (inner_tile_q == inner_tile_dim) begin last_inner_tile_o = 1'b1; - if ((((((bias_count) & (M-1)) + bias_tile_y_d * M)) > ((first_outer_dim - 1)))) begin - requant_add_d = {N {1'b0}}; - inp_bias = {N {1'b0}}; - end else begin - if ( ((bias_count) + bias_tile_x_d * M*M/N) >= (second_outer_dim / N) * M ) begin - if ( (((bias_count) / M) * N + bias_tile_x_d * M ) < second_outer_dim) begin - for (int i = (second_outer_dim & (N-1)); i < N; i++) begin - requant_add_d[i] = 1'b0; - inp_bias[i] = 1'b0; - end - end else begin - requant_add_d = {N {1'b0}}; - inp_bias = {N {1'b0}}; + end + if ((((((bias_count) & (M-1)) + bias_tile_y_d * M)) > ((first_outer_dim_d - 1)))) begin + requant_add_d = {N {1'b0}}; + inp_bias = {N {1'b0}}; + end else begin + if ( ((bias_count) + bias_tile_x_d * M*M/N) >= (second_outer_dim_d / N) * M ) begin + if ( (((bias_count) / M) * N + bias_tile_x_d * M ) < second_outer_dim_d) begin + for (int i = (second_outer_dim_d & (N-1)); i < N; i++) begin + requant_add_d[i] = 1'b0; + inp_bias[i] = 1'b0; end + end else begin + requant_add_d = {N {1'b0}}; + inp_bias = {N {1'b0}}; end end + end // if ((((((count_q2) & (M-1)) + tile_y_q * M)) > ((first_outer_dim - 1)))) begin // inp_bias = {N {1'b0}}; @@ -401,7 +406,6 @@ module ita_controller // end // end // end - end end inp_bias_padded = inp_bias; @@ -438,6 +442,8 @@ module ita_controller busy_q <= 1'b0; bias_tile_x_q <= '0; bias_tile_y_q <= '0; + first_outer_dim_q <= '0; + second_outer_dim_q <= '0; end else begin step_q <= step_d; count_q <= count_d; @@ -453,6 +459,8 @@ module ita_controller busy_q <= busy_d; bias_tile_x_q <= bias_tile_x_d; bias_tile_y_q <= bias_tile_y_d; + first_outer_dim_q <= first_outer_dim_d; + second_outer_dim_q <= second_outer_dim_d; end end endmodule From 2a04d893e8076f30aed5a8bcff813d066abf75b0 Mon Sep 17 00:00:00 2001 From: Marcel Kant Date: Wed, 30 Oct 2024 14:58:55 +0100 Subject: [PATCH 17/28] Just errors in phase 5 and 6 --- PyITA/ITA.py | 27 ++++++++++++++++++--------- modelsim/sim_ita_tb_wave.tcl | 7 +++---- 2 files changed, 21 insertions(+), 13 deletions(-) diff --git a/PyITA/ITA.py b/PyITA/ITA.py index 04a3979..8c304f6 100644 --- a/PyITA/ITA.py +++ b/PyITA/ITA.py @@ -178,10 +178,6 @@ def _initialize_tensors(self, Q, V, Wq, Wk, Wv, Wo, Bq, Bk, Bv, Bo, FF_in, Wff, self.Bq_broadcast = np.reshape(np.repeat(self.Bq, self.S, axis = 0), (self.H, self.S, self.P_ITA)) self.Bq_broadcast = np.pad(self.Bq_broadcast, ((0, 0), (0, self.S_ITA - self.S), (0, 0))) - print(self.Bq_broadcast[0][0][16:32]) - sns.set_theme() - sns.heatmap(self.Bq_broadcast[0], annot=False, linewidths=0, linecolor='white', cmap='crest', xticklabels=False, yticklabels=False) - plt.show() if self.bias: self.Bk_in = random_shuffled_tensor( @@ -362,6 +358,9 @@ def tiler_QK(self, qk: np.ndarray, weight: np.ndarray, bias: np.ndarray, output: # Weight Wqk is H x E x P # Transpose Wqk to H x P x E + # print(f"qk: {qk.shape}") + # print(f"qk: {weight.shape}") + weight = np.transpose(weight, (0, 2, 1)) tile_x = qk.shape[0] // self.ITA_M # S // ITA_M @@ -376,6 +375,19 @@ def tiler_QK(self, qk: np.ndarray, weight: np.ndarray, bias: np.ndarray, output: Input = np.tile(Input, [1, 1, self.split, 1]) # Repeat each tile number of output row tiles times Input = np.tile(Input, [1, tile_y, 1, 1]).reshape((-1, self.ITA_M)) + # fig, ax = plt.subplots(1, 2) # Create a figure with two subplots + # im0 = ax[0].imshow(Input, cmap='viridis') + # im1 = ax[1].imshow(np.squeeze(weight, axis=0)) + + # # Add colorbars for each image if needed + # fig.colorbar(im0, ax=ax[0]) + # fig.colorbar(im1, ax=ax[1]) + + # # Set titles for each subplot + # ax[0].set_title("Inputs") + # ax[1].set_title("Weights") + + plt.show() write_matrix(Input, input_file, self.paths["standalone"]) # Transposed Weight Wqk is H x P x E @@ -526,10 +538,7 @@ def step1_Qp(self): self.Qp = np.clip(self.Qp, -2**(self.WO - 1), 2**(self.WO - 1) - 1) self.Qp_requant = requantize(self.Qp, self.requant_eps_mult[0], self.requant_right_shift[0], self.requant_add[0]) - print(self.Qp[0][0][16:32]) - sns.set_theme() - sns.heatmap(self.Qp[0], annot=False, linewidths=0, linecolor='white', cmap='crest', xticklabels=False, yticklabels=False) - plt.show() + # Set padded values to zero if (self.S_ITA - self.S) > 0: self.Qp_requant[:, -(self.S_ITA - self.S):, :] = 0 @@ -654,7 +663,7 @@ def feedforward_layer(self): self.FFp_requant = requantize(self.FFp, self.requant_eps_mult_ffn[0], self.requant_right_shift_ffn[0], self.requant_add_ffn[0]) self.FFp_requant = self.apply_activation(self.FFp_requant, self.activation) - + self.tiler_QK(self.FF, self.Wff, self.Bff, self.FFp_requant, "FF", "Wff", "Bff", "FFp") self.FF2p = np.matmul(self.FFp_requant, self.Wff2, dtype = np.int32) + self.Bff2_broadcast diff --git a/modelsim/sim_ita_tb_wave.tcl b/modelsim/sim_ita_tb_wave.tcl index bc9fed6..072c1cd 100644 --- a/modelsim/sim_ita_tb_wave.tcl +++ b/modelsim/sim_ita_tb_wave.tcl @@ -21,10 +21,9 @@ add wave -noupdate -radix unsigned /ita_tb/dut/i_controller/bias_tile_x_d add wave -noupdate -radix unsigned /ita_tb/dut/i_controller/bias_tile_x_q add wave -noupdate -radix unsigned /ita_tb/dut/i_controller/bias_tile_y_d add wave -noupdate -radix unsigned /ita_tb/dut/i_controller/bias_tile_y_q -add wave -noupdate /ita_tb/dut/calc_en -add wave -noupdate /ita_tb/dut/calc_en_q1 -add wave -noupdate /ita_tb/dut/calc_en_q2 -add wave -noupdate /ita_tb/dut/calc_en_q3 +add wave -noupdate -expand -group Requant /ita_tb/dut/i_controller/requant_add_i +add wave -noupdate -expand -group Requant /ita_tb/dut/i_controller/requant_add_o +add wave -noupdate -expand -group Requant /ita_tb/dut/i_controller/step_q add wave -noupdate -expand -group Bias /ita_tb/dut/inp_bias add wave -noupdate -expand -group Bias /ita_tb/dut/inp_bias_padded add wave -noupdate -expand -group Bias /ita_tb/dut/inp_bias_q1 From aab0df5b8835c5588ee2482767e5f5ac75b5b7fc Mon Sep 17 00:00:00 2001 From: Marcel Kant Date: Wed, 30 Oct 2024 16:56:27 +0100 Subject: [PATCH 18/28] No bugs in all phases --- src/ita_controller.sv | 11 ----------- src/tb/ita_tb.sv | 2 +- 2 files changed, 1 insertion(+), 12 deletions(-) diff --git a/src/ita_controller.sv b/src/ita_controller.sv index b2b5d39..f37474f 100644 --- a/src/ita_controller.sv +++ b/src/ita_controller.sv @@ -355,18 +355,7 @@ module ita_controller end endcase - // if (step_q == Idle && count_q2 == 8'd255) begin - // last_time = 1'b1; - // count_d2 = 1'b0; - // end - - // if (count_q == 0 && (tile_x_q > 0 || tile_y_q > 0)) begin - // bias_count = 255; - // end - - bias_count = (count_q == 0) ? 255 : count_q - 1; - // bias_count = (count_q == 0 && (tile_x_q > 0 || tile_y_q > 0)) ? 255 : count_q - 1; bias_tile_x_d = (count_q == 0) ? bias_tile_x_q : tile_x_q; bias_tile_y_d = (count_q == 0) ? bias_tile_y_q : tile_y_q; first_outer_dim_d = (count_q == 0) ? first_outer_dim_q : first_outer_dim; diff --git a/src/tb/ita_tb.sv b/src/tb/ita_tb.sv index 78280c0..1b2a077 100644 --- a/src/tb/ita_tb.sv +++ b/src/tb/ita_tb.sv @@ -95,6 +95,7 @@ module ita_tb; N_TILES_SEQUENCE_DIM = (SEQUENCE_LEN + M_TILE_LEN -1 ) / M_TILE_LEN; N_TILES_EMBEDDING_DIM = (EMBEDDING_SIZE+ M_TILE_LEN -1 ) / M_TILE_LEN; N_TILES_PROJECTION_DIM = (PROJECTION_SPACE + M_TILE_LEN -1 ) / M_TILE_LEN; + N_TILES_FEEDFORWARD = (FEEDFORWARD_SIZE + M_TILE_LEN -1) / M_TILE_LEN; N_TILES_LINEAR_PROJECTION = N_TILES_SEQUENCE_DIM * N_TILES_EMBEDDING_DIM * N_TILES_PROJECTION_DIM; N_TILES_ATTENTION = N_TILES_SEQUENCE_DIM * N_TILES_PROJECTION_DIM; N_ENTRIES_PER_TILE = M_TILE_LEN * M_TILE_LEN / N_PE; @@ -104,7 +105,6 @@ module ita_tb; N_ENTRIES_PER_SEQUENCE_DIM = N_ENTRIES_PER_TILE * N_TILES_SEQUENCE_DIM; N_ATTENTION_TILE_ROWS = N_TILES_SEQUENCE_DIM; N_GROUPS = 2 * N_ATTENTION_TILE_ROWS; - N_TILES_FEEDFORWARD = FEEDFORWARD_SIZE / M_TILE_LEN; N_TILES_INNER_DIM_LINEAR_PROJECTION[0] = N_TILES_EMBEDDING_DIM; N_TILES_INNER_DIM_LINEAR_PROJECTION[1] = N_TILES_EMBEDDING_DIM; N_TILES_INNER_DIM_LINEAR_PROJECTION[2] = N_TILES_EMBEDDING_DIM; From 64978c1ebaeb282c07ee1049061f4a5dc963fe6c Mon Sep 17 00:00:00 2001 From: Marcel Kant Date: Thu, 31 Oct 2024 09:52:10 +0100 Subject: [PATCH 19/28] Bias padding for all phases without bugs --- src/ita_controller.sv | 32 +++++--------------------------- 1 file changed, 5 insertions(+), 27 deletions(-) diff --git a/src/ita_controller.sv b/src/ita_controller.sv index f37474f..5f5bc9d 100644 --- a/src/ita_controller.sv +++ b/src/ita_controller.sv @@ -112,9 +112,6 @@ module ita_controller if (softmax_div_done_q != 1'b1 && step_q == AV && inner_tile_q == 0 && tile_q == 0 && count_q < M && count_q >= soft_addr_div_i) begin softmax_div = 1'b1; end - // if (count_q == M*M/N) begin //Problem is that the counter does not go to 256 - // count_d = '0; - // end if (ongoing_q>=FifoDepth || (softmax_fifo && ongoing_soft_q>=SoftFifoDepth) || softmax_div) begin inp_ready_o = 1'b0; weight_ready_o = 1'b0; @@ -124,11 +121,6 @@ module ita_controller weight_ready_o = inp_valid_i; bias_ready_o = weight_valid_i; if (inp_valid_i && weight_valid_i && bias_valid_i) begin - // if (count_q == M*M/N) begin - // count_d = 1; - // end else begin - // count_d = count_q + 1; - // end calc_en_o = 1; count_d = count_q + 1; busy_d = 1'b1; @@ -191,7 +183,7 @@ module ita_controller if (inner_tile_d == ctrl_i.tile_e) begin // end of inner tile inner_tile_d = '0; tile_d = tile_q + 1; - if (tile_x_q == (ctrl_i.tile_p-1)) begin // end of step Q + if (tile_x_q == (ctrl_i.tile_p-1)) begin tile_x_d = '0; tile_y_d = tile_y_q + 1; end else begin @@ -212,7 +204,7 @@ module ita_controller if (inner_tile_d == ctrl_i.tile_e) begin // end of inner tile inner_tile_d = '0; tile_d = tile_q + 1; - if (tile_x_q == (ctrl_i.tile_s-1)) begin // end of step Q + if (tile_x_q == (ctrl_i.tile_s-1)) begin tile_x_d = '0; tile_y_d = tile_y_q + 1; end else begin @@ -233,7 +225,7 @@ module ita_controller if (inner_tile_d == ctrl_i.tile_p) begin // end of inner tile inner_tile_d = '0; tile_d = tile_q + 1; - if (tile_x_q == (ctrl_i.tile_s-1)) begin // end of step Q + if (tile_x_q == (ctrl_i.tile_s-1)) begin tile_x_d = '0; end else begin tile_x_d = tile_x_q + 1; @@ -251,7 +243,7 @@ module ita_controller if (inner_tile_d == ctrl_i.tile_s) begin // end of inner tile inner_tile_d = '0; tile_d = tile_q + 1; - if (tile_x_q == (ctrl_i.tile_p-1)) begin // end of step Q + if (tile_x_q == (ctrl_i.tile_p-1)) begin tile_x_d = '0; end else begin tile_x_d = tile_x_q + 1; @@ -282,7 +274,7 @@ module ita_controller if (inner_tile_d == ctrl_i.tile_p) begin // end of inner tile inner_tile_d = '0; tile_d = tile_q + 1; - if (tile_x_q == (ctrl_i.tile_e-1)) begin // end of step Q + if (tile_x_q == (ctrl_i.tile_e-1)) begin tile_x_d = '0; tile_y_d = tile_y_q + 1; end else begin @@ -381,20 +373,6 @@ module ita_controller end end end - - // if ((((((count_q2) & (M-1)) + tile_y_q * M)) > ((first_outer_dim - 1)))) begin - // inp_bias = {N {1'b0}}; - // end else begin - // if ( ((count_q2) + tile_x_q * M*M/N) >= (second_outer_dim / N) * M ) begin - // if ( (((count_q2) / M) * N + tile_x_q * M ) < second_outer_dim) begin - // for (int i = (second_outer_dim & (N-1)); i < N; i++) begin - // inp_bias[i] = 1'b0; - // end - // end else begin - // inp_bias = {N {1'b0}}; - // end - // end - // end end inp_bias_padded = inp_bias; From d43362540d7c583c001f3b4fc21b8ef3bafcf8a8 Mon Sep 17 00:00:00 2001 From: Marcel Kant Date: Fri, 8 Nov 2024 12:02:21 +0100 Subject: [PATCH 20/28] Added test vectors in the gitlab-ci --- .gitlab-ci.yml | 38 ++++++++++++++++++++++++++++---------- 1 file changed, 28 insertions(+), 10 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 31832bc..4ab51a1 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -26,15 +26,18 @@ generate_testvectors: stage: test script: - !reference [.setup_test, script] - - python testGenerator.py -H 1 -S 128 -E 128 -P 192 -F 256 --activation identity --no-bias - # - python testGenerator.py -H 1 -S 64 -E 128 -P 192 -F 256 --activation identity --no-bias - # - python testGenerator.py -H 1 -S 128 -E 192 -P 256 -F 256 --activation gelu - # - python testGenerator.py -H 1 -S 192 -E 256 -P 128 -F 128 --activation relu - - python testGenerator.py -H 1 -S 1 -E 2 -P 3 -F 3 --activation gelu --no-bias - # - python testGenerator.py -H 1 -S 1 -E 2 -P 3 -F 3 --activation relu --no-bias - - python testGenerator.py -H 1 -S 63 -E 62 -P 61 -F 61 --activation relu --no-bias - # - python testGenerator.py -H 1 -S 65 -E 130 -P 195 -F 195 --activation relu --no-bias - # - python testGenerator.py -H 1 -S 127 -E 190 -P 253 -F 253 --activation relu --no-bias + - python testGenerator.py -H 1 -S 64 -E 64 -P 64 -F 64 --activation gelu --skip-vector-validation + - python testGenerator.py -H 1 -S 128 -E 192 -P 256 -F 256 --activation gelu --skip-vector-validation + - python testGenerator.py -H 1 -S 192 -E 256 -P 128 -F 128 --activation relu --skip-vector-validation + - python testGenerator.py -H 1 -S 1 -E 2 -P 3 -F 3 --activation gelu --no-bias --skip-vector-validation + - python testGenerator.py -H 1 -S 1 -E 2 -P 3 -F 3 --activation relu --no-bias --skip-vector-validation + - python testGenerator.py -H 1 -S 63 -E 62 -P 61 -F 61 --activation relu --no-bias --skip-vector-validation + - python testGenerator.py -H 1 -S 65 -E 130 -P 195 -F 195 --activation relu --no-bias --skip-vector-validation + - python testGenerator.py -H 1 -S 127 -E 190 -P 253 -F 253 --activation relu --no-bias --skip-vector-validation + + - python testGenerator.py -H 1 -S 511 -E 511 -P 127 -F 63 --activation relu --skip-vector-validation + - python testGenerator.py -H 1 -S 63 -E 63 -P 50 -F 129 --activation gelu --skip-vector-validation + - python testGenerator.py -H 1 -S 255 -E 63 -P 511 -F 511 --activation identity --skip-vector-validation artifacts: paths: - simvectors @@ -131,9 +134,24 @@ run_sim_padding: # P: 253 # F: 253 # activation: relu + - S: 511 + E: 511 + P: 127 + F: 63 + activation: relu + - S: 63 + E: 63 + P: 50 + F: 129 + activation: relu + - S: 255 + E: 63 + P: 511 + F: 511 + activation: relu script: - make bender - - make sim VSIM_FLAGS=-c s=$S e=$E p=$P f=$F bias=0 activation=$activation + - make sim VSIM_FLAGS=-c s=$S e=$E p=$P f=$F bias=1 activation=$activation no_stalls=$no_stalls single_attention=$single_attention - ./modelsim/return_status.sh modelsim/build/transcript $S $E $P $F ita_tb run_hwpe_sim: From 8d4de04a7d0ee16ea24fcd5ec17dc4a501059055 Mon Sep 17 00:00:00 2001 From: Marcel Kant Date: Fri, 8 Nov 2024 13:08:35 +0100 Subject: [PATCH 21/28] Fixes in gitlab-ci --- .gitlab-ci.yml | 18 ++++++++++++++++-- PyITA/debug.py | 39 --------------------------------------- 2 files changed, 16 insertions(+), 41 deletions(-) delete mode 100644 PyITA/debug.py diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 4ab51a1..63ac46b 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -119,36 +119,50 @@ run_sim_padding: P: 3 F: 3 activation: relu + no_stalls: 0 + single_attention: 0 - S: 63 E: 62 P: 61 F: 61 activation: relu + no_stalls: 0 + single_attention: 0 # - S: 65 # E: 130 # P: 195 # F: 195 # activation: relu + # no_stalls: 0 + # single_attention: 0 # - S: 127 # E: 190gelu # P: 253 # F: 253 # activation: relu + # no_stalls: 0 + # single_attention: 0 - S: 511 E: 511 P: 127 F: 63 activation: relu + no_stalls: 0 + single_attention: 0 - S: 63 E: 63 P: 50 F: 129 - activation: relu + activation: gelu + no_stalls: 0 + single_attention: 0 - S: 255 E: 63 P: 511 F: 511 - activation: relu + activation: identity + no_stalls: 0 + single_attention: 0 script: - make bender - make sim VSIM_FLAGS=-c s=$S e=$E p=$P f=$F bias=1 activation=$activation no_stalls=$no_stalls single_attention=$single_attention diff --git a/PyITA/debug.py b/PyITA/debug.py deleted file mode 100644 index 3cb9d7a..0000000 --- a/PyITA/debug.py +++ /dev/null @@ -1,39 +0,0 @@ -import os -import numpy as np -import seaborn as sns -import matplotlib.pyplot as plt - - -def print_matrix(from_txt: bool, - cut: bool = False, - matrix: np.array = None, - txt_file: str = 'Qp_0.txt', - test_vector: str = 'data_S30_E64_P64_F64_H1_B1', - row: int = 64, col: int = 64): - - if (from_txt): - current_dir = os.path.dirname(os.path.abspath(__file__)) - filepath = os.path.join(os.path.dirname(current_dir), - 'simvectors', - test_vector, - 'standalone', - txt_file) - - array = np.loadtxt(filepath) - if (cut): - array = array[:4096] - matrix = array.reshape(row, col) - - sns.set_theme() - sns.heatmap(matrix, annot=False, linewidths=0, linecolor='white', cmap='crest', xticklabels=False, yticklabels=False) - plt.title(txt_file) - plt.xlabel(col) - plt.ylabel(row) - plt.show() - -# print_matrix(from_txt=True, txt_file="A_soft_0.txt") - - - - - From 55ac72669b6326a4d6de2059334dd1c6057d889b Mon Sep 17 00:00:00 2001 From: Marcel Kant Date: Fri, 8 Nov 2024 14:43:54 +0100 Subject: [PATCH 22/28] Added license on top sim_ita_tb_wave_important.tcl --- .gitlab-ci.yml | 3 +- modelsim/sim_ita_tb_wave_important.tcl | 4 + modelsim/sim_ita_tb_wave_makant.tcl | 236 ------------------------- 3 files changed, 5 insertions(+), 238 deletions(-) delete mode 100644 modelsim/sim_ita_tb_wave_makant.tcl diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 63ac46b..305275f 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -34,7 +34,6 @@ generate_testvectors: - python testGenerator.py -H 1 -S 63 -E 62 -P 61 -F 61 --activation relu --no-bias --skip-vector-validation - python testGenerator.py -H 1 -S 65 -E 130 -P 195 -F 195 --activation relu --no-bias --skip-vector-validation - python testGenerator.py -H 1 -S 127 -E 190 -P 253 -F 253 --activation relu --no-bias --skip-vector-validation - - python testGenerator.py -H 1 -S 511 -E 511 -P 127 -F 63 --activation relu --skip-vector-validation - python testGenerator.py -H 1 -S 63 -E 63 -P 50 -F 129 --activation gelu --skip-vector-validation - python testGenerator.py -H 1 -S 255 -E 63 -P 511 -F 511 --activation identity --skip-vector-validation @@ -136,7 +135,7 @@ run_sim_padding: # no_stalls: 0 # single_attention: 0 # - S: 127 - # E: 190gelu + # E: 190 # P: 253 # F: 253 # activation: relu diff --git a/modelsim/sim_ita_tb_wave_important.tcl b/modelsim/sim_ita_tb_wave_important.tcl index 7ddab73..6513e4c 100644 --- a/modelsim/sim_ita_tb_wave_important.tcl +++ b/modelsim/sim_ita_tb_wave_important.tcl @@ -1,3 +1,7 @@ +# Copyright 2023 ETH Zurich and University of Bologna. +# Licensed under the Apache License, Version 2.0, see LICENSE for details. +# SPDX-License-Identifier: Apache-2.0 + onerror {resume} quietly WaveActivateNextPane {} 0 add wave -noupdate /ita_tb/dut/i_inp1_mux/clk_i diff --git a/modelsim/sim_ita_tb_wave_makant.tcl b/modelsim/sim_ita_tb_wave_makant.tcl deleted file mode 100644 index d79b638..0000000 --- a/modelsim/sim_ita_tb_wave_makant.tcl +++ /dev/null @@ -1,236 +0,0 @@ -onerror {resume} -quietly WaveActivateNextPane {} 0 -add wave -noupdate /ita_tb/dut/i_inp1_mux/clk_i -add wave -noupdate /ita_tb/dut/i_inp1_mux/rst_ni -add wave -noupdate /ita_tb/dut/i_inp1_mux/inp_i -add wave -noupdate /ita_tb/dut/i_inp1_mux/inp1_o -add wave -noupdate /ita_tb/dut/i_inp2_mux/rst_ni -add wave -noupdate /ita_tb/dut/i_inp2_mux/weight_i -add wave -noupdate /ita_tb/dut/i_inp2_mux/inp2_o -add wave -noupdate /ita_tb/dut/i_controller/ctrl_i -add wave -noupdate /ita_tb/dut/inp1_q -add wave -noupdate /ita_tb/dut/inp2_q -add wave -noupdate /ita_tb/dut/oup_o -add wave -noupdate /ita_tb/dut/inp_bias -add wave -noupdate /ita_tb/dut/inp_bias_pad_q1 -add wave -noupdate /ita_tb/dut/inp_bias_q2 -add wave -noupdate -radix unsigned /ita_tb/dut/i_controller/count_d -add wave -noupdate -radix unsigned /ita_tb/dut/i_controller/count_q -add wave -noupdate /ita_tb/dut/i_inp2_mux/clk_i -add wave -noupdate -expand -group Calc_en /ita_tb/dut/calc_en -add wave -noupdate -expand -group Calc_en /ita_tb/dut/calc_en_q1 -add wave -noupdate -expand -group Calc_en /ita_tb/dut/calc_en_q2 -add wave -noupdate -expand -group Calc_en /ita_tb/dut/calc_en_q3 -add wave -noupdate -expand -group Calc_en /ita_tb/dut/calc_en_q4 -add wave -noupdate -expand -group Calc_en /ita_tb/dut/calc_en_q5 -add wave -noupdate -expand -group Calc_en /ita_tb/dut/calc_en_q6 -add wave -noupdate -expand -group Calc_en /ita_tb/dut/calc_en_q7 -add wave -noupdate -expand -group Calc_en /ita_tb/dut/calc_en_q8 -add wave -noupdate -expand -group Calc_en /ita_tb/dut/calc_en_q9 -add wave -noupdate -expand -group Calc_en /ita_tb/dut/calc_en_q10 -add wave -noupdate -group Requantizer /ita_tb/dut/i_requantizer/clk_i -add wave -noupdate -group Requantizer /ita_tb/dut/i_requantizer/rst_ni -add wave -noupdate -group Requantizer /ita_tb/dut/i_requantizer/mode_i -add wave -noupdate -group Requantizer /ita_tb/dut/i_requantizer/eps_mult_i -add wave -noupdate -group Requantizer /ita_tb/dut/i_requantizer/right_shift_i -add wave -noupdate -group Requantizer /ita_tb/dut/i_requantizer/calc_en_i -add wave -noupdate -group Requantizer /ita_tb/dut/i_requantizer/calc_en_q_i -add wave -noupdate -group Requantizer /ita_tb/dut/i_requantizer/result_i -add wave -noupdate -group Requantizer /ita_tb/dut/i_requantizer/add_i -add wave -noupdate -group Requantizer /ita_tb/dut/i_requantizer/requant_oup_o -add wave -noupdate -group Requantizer /ita_tb/dut/i_requantizer/mult_signed -add wave -noupdate -group Requantizer /ita_tb/dut/i_requantizer/product -add wave -noupdate -group Requantizer /ita_tb/dut/i_requantizer/shifted_added -add wave -noupdate -group Requantizer /ita_tb/dut/i_requantizer/shifted_d -add wave -noupdate -group Requantizer /ita_tb/dut/i_requantizer/shifted_q -add wave -noupdate -group Requantizer /ita_tb/dut/i_requantizer/add_q1 -add wave -noupdate -group Requantizer /ita_tb/dut/i_requantizer/add_q2 -add wave -noupdate -group Requantizer /ita_tb/dut/i_requantizer/add_q3 -add wave -noupdate -group Requantizer /ita_tb/dut/i_requantizer/add_q4 -add wave -noupdate -group Requantizer /ita_tb/dut/i_requantizer/requant_oup_d -add wave -noupdate -group Requantizer /ita_tb/dut/i_requantizer/requant_oup_q -add wave -noupdate -group Controller /ita_tb/dut/i_controller/clk_i -add wave -noupdate -group Controller /ita_tb/dut/i_controller/rst_ni -add wave -noupdate -group Controller /ita_tb/dut/i_controller/ctrl_i -add wave -noupdate -group Controller /ita_tb/dut/i_controller/inp_valid_i -add wave -noupdate -group Controller /ita_tb/dut/i_controller/inp_ready_o -add wave -noupdate -group Controller /ita_tb/dut/i_controller/weight_valid_i -add wave -noupdate -group Controller /ita_tb/dut/i_controller/weight_ready_o -add wave -noupdate -group Controller /ita_tb/dut/i_controller/bias_valid_i -add wave -noupdate -group Controller /ita_tb/dut/i_controller/bias_ready_o -add wave -noupdate -group Controller /ita_tb/dut/i_controller/oup_valid_i -add wave -noupdate -group Controller /ita_tb/dut/i_controller/oup_ready_i -add wave -noupdate -group Controller /ita_tb/dut/i_controller/pop_softmax_fifo_i -add wave -noupdate -group Controller /ita_tb/dut/i_controller/step_o -add wave -noupdate -group Controller /ita_tb/dut/i_controller/soft_addr_div_i -add wave -noupdate -group Controller /ita_tb/dut/i_controller/softmax_done_i -add wave -noupdate -group Controller /ita_tb/dut/i_controller/calc_en_o -add wave -noupdate -group Controller /ita_tb/dut/i_controller/first_inner_tile_o -add wave -noupdate -group Controller /ita_tb/dut/i_controller/last_inner_tile_o -add wave -noupdate -group Controller /ita_tb/dut/i_controller/tile_x_o -add wave -noupdate -group Controller /ita_tb/dut/i_controller/tile_y_o -add wave -noupdate -group Controller /ita_tb/dut/i_controller/inner_tile_o -add wave -noupdate -group Controller /ita_tb/dut/i_controller/requant_add_i -add wave -noupdate -group Controller /ita_tb/dut/i_controller/requant_add_o -add wave -noupdate -group Controller /ita_tb/dut/i_controller/inp_bias_pad_i -add wave -noupdate -group Controller /ita_tb/dut/i_controller/inp_bias_pad_o -add wave -noupdate -group Controller /ita_tb/dut/i_controller/calc_en_q1_i -add wave -noupdate -group Controller /ita_tb/dut/i_controller/busy_o -add wave -noupdate -group Controller /ita_tb/dut/i_controller/step_d -add wave -noupdate -group Controller /ita_tb/dut/i_controller/step_q -add wave -noupdate -group Controller /ita_tb/dut/i_controller/tile_d -add wave -noupdate -group Controller /ita_tb/dut/i_controller/tile_q -add wave -noupdate -group Controller /ita_tb/dut/i_controller/inner_tile_d -add wave -noupdate -group Controller /ita_tb/dut/i_controller/inner_tile_q -add wave -noupdate -group Controller /ita_tb/dut/i_controller/tile_x_d -add wave -noupdate -group Controller /ita_tb/dut/i_controller/tile_x_q -add wave -noupdate -group Controller /ita_tb/dut/i_controller/tile_y_d -add wave -noupdate -group Controller /ita_tb/dut/i_controller/tile_y_q -add wave -noupdate -group Controller /ita_tb/dut/i_controller/softmax_tile_d -add wave -noupdate -group Controller /ita_tb/dut/i_controller/softmax_tile_q -add wave -noupdate -group Controller /ita_tb/dut/i_controller/ongoing_d -add wave -noupdate -group Controller /ita_tb/dut/i_controller/ongoing_q -add wave -noupdate -group Controller /ita_tb/dut/i_controller/ongoing_soft_d -add wave -noupdate -group Controller /ita_tb/dut/i_controller/ongoing_soft_q -add wave -noupdate -group Controller /ita_tb/dut/i_controller/inp_bias_pad_d -add wave -noupdate -group Controller /ita_tb/dut/i_controller/inp_bias_pad_q -add wave -noupdate -group Controller /ita_tb/dut/i_controller/inner_tile_dim -add wave -noupdate -group Controller /ita_tb/dut/i_controller/first_outer_dim -add wave -noupdate -group Controller /ita_tb/dut/i_controller/second_outer_dim -add wave -noupdate -group Controller /ita_tb/dut/i_controller/softmax_fifo -add wave -noupdate -group Controller /ita_tb/dut/i_controller/softmax_div -add wave -noupdate -group Controller /ita_tb/dut/i_controller/softmax_div_done_d -add wave -noupdate -group Controller /ita_tb/dut/i_controller/softmax_div_done_q -add wave -noupdate -group Controller /ita_tb/dut/i_controller/busy_d -add wave -noupdate -group Controller /ita_tb/dut/i_controller/busy_q -add wave -noupdate -group Controller /ita_tb/dut/i_controller/requant_add_d -add wave -noupdate -group Controller /ita_tb/dut/i_controller/requant_add_q -add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/clk_i -add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/rst_ni -add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/ctrl_i -add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/step_i -add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/calc_en_i -add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/requant_oup_i -add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/calc_stream_soft_en_i -add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/soft_addr_div_o -add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/softmax_done_o -add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/pop_softmax_fifo_o -add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/inp_i -add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/inp_stream_soft_o -add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/div_inp_o -add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/div_valid_o -add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/div_ready_i -add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/div_valid_i -add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/div_ready_o -add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/div_oup_i -add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/read_acc_en_o -add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/read_acc_addr_o -add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/read_acc_data_i -add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/write_acc_en_o -add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/write_acc_addr_o -add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/write_acc_data_o -add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/prev_max_o -add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/max_i -add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/max_o -add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/read_max_en_o -add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/read_max_addr_o -add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/read_max_data_i -add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/write_max_en_o -add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/write_max_addr_o -add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/write_max_data_o -add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/tile_x_i -add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/tile_y_i -add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/inner_tile_i -add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/tile_d -add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/tile_q1 -add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/tile_q2 -add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/tile_q3 -add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/tile_q4 -add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/count_d -add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/count_q1 -add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/count_q2 -add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/count_q3 -add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/count_q4 -add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/inner_tile_q -add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/tile_y_q -add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/exp_sum_d -add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/exp_sum_q -add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/count_soft_d -add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/count_soft_q1 -add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/count_soft_q2 -add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/count_div_d -add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/count_div_q -add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/addr_div_d -add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/addr_div_q -add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/div_read_d -add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/div_read_q -add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/div_write_d -add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/div_write_q -add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/requant_oup_q -add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/max_d -add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/max_q -add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/shift_d -add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/shift_q -add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/shift_diff -add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/shift_sum_d -add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/shift_sum_q -add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/max_diff -add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/shift_inp -add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/shift_inp_diff -add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/calc_stream_soft_en_q -add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/calc_en_d -add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/calc_en_q1 -add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/calc_en_q2 -add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/calc_en_q3 -add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/fifo_full -add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/fifo_empty -add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/push_to_fifo -add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/pop_from_fifo -add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/data_to_fifo -add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/data_from_fifo -add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/fifo_usage -add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/disable_shift -add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/disable_row -add wave -noupdate -group {Softmax Controller} /ita_tb/dut/i_softmax_top/i_softmax/disable_col -add wave -noupdate -group Accumulator /ita_tb/dut/i_accumulator/clk_i -add wave -noupdate -group Accumulator /ita_tb/dut/i_accumulator/rst_ni -add wave -noupdate -group Accumulator /ita_tb/dut/i_accumulator/calc_en_i -add wave -noupdate -group Accumulator /ita_tb/dut/i_accumulator/calc_en_q_i -add wave -noupdate -group Accumulator /ita_tb/dut/i_accumulator/first_tile_i -add wave -noupdate -group Accumulator /ita_tb/dut/i_accumulator/first_tile_q_i -add wave -noupdate -group Accumulator /ita_tb/dut/i_accumulator/last_tile_i -add wave -noupdate -group Accumulator /ita_tb/dut/i_accumulator/last_tile_q_i -add wave -noupdate -group Accumulator /ita_tb/dut/i_accumulator/oup_i -add wave -noupdate -group Accumulator /ita_tb/dut/i_accumulator/inp_bias_i -add wave -noupdate -group Accumulator /ita_tb/dut/i_accumulator/result_o -add wave -noupdate -group Accumulator /ita_tb/dut/i_accumulator/read_en -add wave -noupdate -group Accumulator /ita_tb/dut/i_accumulator/read_addr -add wave -noupdate -group Accumulator /ita_tb/dut/i_accumulator/read_data -add wave -noupdate -group Accumulator /ita_tb/dut/i_accumulator/read_data_unused -add wave -noupdate -group Accumulator /ita_tb/dut/i_accumulator/write_en -add wave -noupdate -group Accumulator /ita_tb/dut/i_accumulator/write_addr -add wave -noupdate -group Accumulator /ita_tb/dut/i_accumulator/write_data -add wave -noupdate -group Accumulator /ita_tb/dut/i_accumulator/read_addr_d -add wave -noupdate -group Accumulator /ita_tb/dut/i_accumulator/read_addr_q -add wave -noupdate -group Accumulator /ita_tb/dut/i_accumulator/write_addr_d -add wave -noupdate -group Accumulator /ita_tb/dut/i_accumulator/write_addr_q -add wave -noupdate -group Accumulator /ita_tb/dut/i_accumulator/result_d -add wave -noupdate -group Accumulator /ita_tb/dut/i_accumulator/result_q -TreeUpdate [SetDefaultTree] -WaveRestoreCursors {{Wrong Output} {2784600 ps} 1} {{Start of Calc} {2775007 ps} 1} {{Calc finish} {2783000 ps} 1} {{Cursor 4} {2783576 ps} 0} -quietly wave cursor active 4 -configure wave -namecolwidth 150 -configure wave -valuecolwidth 100 -configure wave -justifyvalue left -configure wave -signalnamewidth 1 -configure wave -snapdistance 10 -configure wave -datasetprefix 0 -configure wave -rowmargin 4 -configure wave -childrowmargin 2 -configure wave -gridoffset 0 -configure wave -gridperiod 1 -configure wave -griddelta 40 -configure wave -timeline 0 -configure wave -timelineunits ns -update -WaveRestoreZoom {2768202 ps} {2794772 ps} From bf274888cfdead82784b691089d0d8b27a919d48 Mon Sep 17 00:00:00 2001 From: Marcel Kant Date: Fri, 8 Nov 2024 17:31:59 +0100 Subject: [PATCH 23/28] Pipelining test --- .gitlab-ci.yml | 62 ++++++++++++++++++++++++++------------------------ 1 file changed, 32 insertions(+), 30 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 305275f..db718b4 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -26,17 +26,17 @@ generate_testvectors: stage: test script: - !reference [.setup_test, script] - - python testGenerator.py -H 1 -S 64 -E 64 -P 64 -F 64 --activation gelu --skip-vector-validation - - python testGenerator.py -H 1 -S 128 -E 192 -P 256 -F 256 --activation gelu --skip-vector-validation - - python testGenerator.py -H 1 -S 192 -E 256 -P 128 -F 128 --activation relu --skip-vector-validation - - python testGenerator.py -H 1 -S 1 -E 2 -P 3 -F 3 --activation gelu --no-bias --skip-vector-validation - - python testGenerator.py -H 1 -S 1 -E 2 -P 3 -F 3 --activation relu --no-bias --skip-vector-validation - - python testGenerator.py -H 1 -S 63 -E 62 -P 61 -F 61 --activation relu --no-bias --skip-vector-validation - - python testGenerator.py -H 1 -S 65 -E 130 -P 195 -F 195 --activation relu --no-bias --skip-vector-validation - - python testGenerator.py -H 1 -S 127 -E 190 -P 253 -F 253 --activation relu --no-bias --skip-vector-validation - - python testGenerator.py -H 1 -S 511 -E 511 -P 127 -F 63 --activation relu --skip-vector-validation - - python testGenerator.py -H 1 -S 63 -E 63 -P 50 -F 129 --activation gelu --skip-vector-validation - - python testGenerator.py -H 1 -S 255 -E 63 -P 511 -F 511 --activation identity --skip-vector-validation + # - python testGenerator.py -H 1 -S 64 -E 64 -P 64 -F 64 --activation gelu --skip-vector-validation + # - python testGenerator.py -H 1 -S 128 -E 192 -P 256 -F 256 --activation gelu --skip-vector-validation + # - python testGenerator.py -H 1 -S 192 -E 256 -P 128 -F 128 --activation relu --skip-vector-validation + # - python testGenerator.py -H 1 -S 1 -E 2 -P 3 -F 3 --activation gelu --no-bias --skip-vector-validation + # - python testGenerator.py -H 1 -S 1 -E 2 -P 3 -F 3 --activation relu --no-bias --skip-vector-validation + # - python testGenerator.py -H 1 -S 63 -E 62 -P 61 -F 61 --activation relu --no-bias --skip-vector-validation + # - python testGenerator.py -H 1 -S 65 -E 130 -P 195 -F 195 --activation relu --no-bias --skip-vector-validation + # - python testGenerator.py -H 1 -S 127 -E 190 -P 253 -F 253 --activation relu --no-bias --skip-vector-validation + # - python testGenerator.py -H 1 -S 511 -E 511 -P 127 -F 63 --activation relu --skip-vector-validation + # - python testGenerator.py -H 1 -S 63 -E 63 -P 50 -F 129 --activation gelu --skip-vector-validation + # - python testGenerator.py -H 1 -S 255 -E 63 -P 511 -F 511 --activation identity --skip-vector-validation artifacts: paths: - simvectors @@ -108,11 +108,13 @@ run_sim_padding: - generate_testvectors parallel: matrix: - # - S: 1 - # E: 2 - # P: 3 - # F: 3 - # activation: gelu + - S: 1 + E: 2 + P: 3 + F: 3 + activation: gelu + no_stalls: 0 + single_attention: 0 - S: 1 E: 2 P: 3 @@ -127,20 +129,20 @@ run_sim_padding: activation: relu no_stalls: 0 single_attention: 0 - # - S: 65 - # E: 130 - # P: 195 - # F: 195 - # activation: relu - # no_stalls: 0 - # single_attention: 0 - # - S: 127 - # E: 190 - # P: 253 - # F: 253 - # activation: relu - # no_stalls: 0 - # single_attention: 0 + - S: 65 + E: 130 + P: 195 + F: 195 + activation: relu + no_stalls: 0 + single_attention: 0 + - S: 127 + E: 190 + P: 253 + F: 253 + activation: relu + no_stalls: 0 + single_attention: 0 - S: 511 E: 511 P: 127 From 83290ae51f84305d440ebca4ad61cef1cb7d3b8e Mon Sep 17 00:00:00 2001 From: Marcel Kant Date: Fri, 8 Nov 2024 17:39:01 +0100 Subject: [PATCH 24/28] Changed bias for test vectors in gitlab-ci --- .gitlab-ci.yml | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index db718b4..2c97e25 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -26,17 +26,17 @@ generate_testvectors: stage: test script: - !reference [.setup_test, script] - # - python testGenerator.py -H 1 -S 64 -E 64 -P 64 -F 64 --activation gelu --skip-vector-validation - # - python testGenerator.py -H 1 -S 128 -E 192 -P 256 -F 256 --activation gelu --skip-vector-validation - # - python testGenerator.py -H 1 -S 192 -E 256 -P 128 -F 128 --activation relu --skip-vector-validation - # - python testGenerator.py -H 1 -S 1 -E 2 -P 3 -F 3 --activation gelu --no-bias --skip-vector-validation - # - python testGenerator.py -H 1 -S 1 -E 2 -P 3 -F 3 --activation relu --no-bias --skip-vector-validation - # - python testGenerator.py -H 1 -S 63 -E 62 -P 61 -F 61 --activation relu --no-bias --skip-vector-validation - # - python testGenerator.py -H 1 -S 65 -E 130 -P 195 -F 195 --activation relu --no-bias --skip-vector-validation - # - python testGenerator.py -H 1 -S 127 -E 190 -P 253 -F 253 --activation relu --no-bias --skip-vector-validation - # - python testGenerator.py -H 1 -S 511 -E 511 -P 127 -F 63 --activation relu --skip-vector-validation - # - python testGenerator.py -H 1 -S 63 -E 63 -P 50 -F 129 --activation gelu --skip-vector-validation - # - python testGenerator.py -H 1 -S 255 -E 63 -P 511 -F 511 --activation identity --skip-vector-validation + - python testGenerator.py -H 1 -S 64 -E 64 -P 64 -F 64 --activation gelu --skip-vector-validation + - python testGenerator.py -H 1 -S 128 -E 192 -P 256 -F 256 --activation gelu --skip-vector-validation + - python testGenerator.py -H 1 -S 192 -E 256 -P 128 -F 128 --activation relu --skip-vector-validation + - python testGenerator.py -H 1 -S 1 -E 2 -P 3 -F 3 --activation gelu --skip-vector-validation + - python testGenerator.py -H 1 -S 1 -E 2 -P 3 -F 3 --activation relu --skip-vector-validation + - python testGenerator.py -H 1 -S 63 -E 62 -P 61 -F 61 --activation relu --skip-vector-validation + - python testGenerator.py -H 1 -S 65 -E 130 -P 195 -F 195 --activation relu --skip-vector-validation + - python testGenerator.py -H 1 -S 127 -E 190 -P 253 -F 253 --activation relu --skip-vector-validation + - python testGenerator.py -H 1 -S 511 -E 511 -P 127 -F 63 --activation relu --skip-vector-validation + - python testGenerator.py -H 1 -S 63 -E 63 -P 50 -F 129 --activation gelu --skip-vector-validation + - python testGenerator.py -H 1 -S 255 -E 63 -P 511 -F 511 --activation identity --skip-vector-validation artifacts: paths: - simvectors From 3db2ff7bc0dae34b7468df0173ec984b9c99cf8a Mon Sep 17 00:00:00 2001 From: Marcel Kant Date: Mon, 18 Nov 2024 16:51:22 +0100 Subject: [PATCH 25/28] Fixed synthesize errors --- src/ita_controller.sv | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/ita_controller.sv b/src/ita_controller.sv index 5f5bc9d..74de76a 100644 --- a/src/ita_controller.sv +++ b/src/ita_controller.sv @@ -84,9 +84,7 @@ module ita_controller step_d = step_q; softmax_tile_d = softmax_tile_q; softmax_div_done_d = softmax_div_done_q; - requant_add_d = {N {requant_add_i}}; last_time = 1'b0; - inp_bias = inp_bias_i; busy_d = busy_q; softmax_fifo = 1'b0; @@ -347,6 +345,8 @@ module ita_controller end endcase + inp_bias = inp_bias_i; + requant_add_d = {N {requant_add_i}}; bias_count = (count_q == 0) ? 255 : count_q - 1; bias_tile_x_d = (count_q == 0) ? bias_tile_x_q : tile_x_q; bias_tile_y_d = (count_q == 0) ? bias_tile_y_q : tile_y_q; @@ -363,9 +363,11 @@ module ita_controller end else begin if ( ((bias_count) + bias_tile_x_d * M*M/N) >= (second_outer_dim_d / N) * M ) begin if ( (((bias_count) / M) * N + bias_tile_x_d * M ) < second_outer_dim_d) begin - for (int i = (second_outer_dim_d & (N-1)); i < N; i++) begin - requant_add_d[i] = 1'b0; - inp_bias[i] = 1'b0; + for (int i = 0; i < N; i++) begin + if (i >= (second_outer_dim_d & (N-1))) begin + requant_add_d[i] = 1'b0; + inp_bias[i] = 1'b0; + end end end else begin requant_add_d = {N {1'b0}}; From 0ec7089a389dcfde07fd85f4c86f5117fb6fa829 Mon Sep 17 00:00:00 2001 From: Marcel Kant Date: Mon, 18 Nov 2024 16:59:01 +0100 Subject: [PATCH 26/28] Fixed synthesize error --- src/ita_controller.sv | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/ita_controller.sv b/src/ita_controller.sv index 74de76a..28e1885 100644 --- a/src/ita_controller.sv +++ b/src/ita_controller.sv @@ -57,7 +57,7 @@ module ita_controller logic [WO-WI*2-2:0] second_outer_dim_d, second_outer_dim_q; logic softmax_fifo, softmax_div, softmax_div_done_d, softmax_div_done_q, busy_d, busy_q; - requant_oup_t requant_add_d, requant_add_q; + requant_oup_t requant_add, requant_add_d, requant_add_q; assign step_o = step_q; assign busy_o = busy_q; @@ -85,6 +85,7 @@ module ita_controller softmax_tile_d = softmax_tile_q; softmax_div_done_d = softmax_div_done_q; last_time = 1'b0; + requant_add = {N {requant_add_i}}; busy_d = busy_q; softmax_fifo = 1'b0; @@ -346,7 +347,7 @@ module ita_controller endcase inp_bias = inp_bias_i; - requant_add_d = {N {requant_add_i}}; + requant_add_d = requant_add; bias_count = (count_q == 0) ? 255 : count_q - 1; bias_tile_x_d = (count_q == 0) ? bias_tile_x_q : tile_x_q; bias_tile_y_d = (count_q == 0) ? bias_tile_y_q : tile_y_q; @@ -367,6 +368,9 @@ module ita_controller if (i >= (second_outer_dim_d & (N-1))) begin requant_add_d[i] = 1'b0; inp_bias[i] = 1'b0; + end else begin + requant_add_d[i] = requant_add[i]; + inp_bias[i] = inp_bias_i[i]; end end end else begin From 7d42ab74810575feadeea1a46d35af4248924399 Mon Sep 17 00:00:00 2001 From: Marcel Kant <132563549+marcelkant@users.noreply.github.com> Date: Sat, 8 Feb 2025 15:06:24 +0100 Subject: [PATCH 27/28] Update PyITA/softmax.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Typo fix Co-authored-by: Gamze İslamoğlu <54476562+gamzeisl@users.noreply.github.com> --- PyITA/softmax.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/PyITA/softmax.py b/PyITA/softmax.py index eabf432..84d063c 100644 --- a/PyITA/softmax.py +++ b/PyITA/softmax.py @@ -220,7 +220,7 @@ def realSoftmax(A_requant, integerize = True): # ITA sequence length parser.add_argument("-M", default = 64, type = int, help = "ITA sequence length") - # Quantiztion (float or int) + # Quantization (float or int) parser.add_argument("--int", action = "store_true", help = "Quantize to int") parser.add_argument('--seed', default = 0, type = int, help = 'Random seed') From 5e088727bf389a4d1e9ab56f0ccca85816d3f880 Mon Sep 17 00:00:00 2001 From: Marcel Kant <132563549+marcelkant@users.noreply.github.com> Date: Sat, 8 Feb 2025 15:06:58 +0100 Subject: [PATCH 28/28] Update Makefile MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Gamze İslamoğlu <54476562+gamzeisl@users.noreply.github.com> --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index f85f76b..c8192e7 100644 --- a/Makefile +++ b/Makefile @@ -12,8 +12,8 @@ BENDER_INSTALL_DIR = ${INSTALL_DIR}/bender VENV_BIN=venv/bin/ BENDER_VERSION = 0.28.1 -SIM_FOLDER ?= build -SIM_PATH ?= modelsim/${SIM_FOLDER} +SIM_FOLDER ?= build +SIM_PATH ?= modelsim/${SIM_FOLDER} SYNTH_PATH = synopsys BENDER_TARGETS = -t rtl -t test