diff --git a/.DS_Store b/.DS_Store deleted file mode 100644 index e66baee..0000000 Binary files a/.DS_Store and /dev/null differ diff --git a/cacti-main/.DS_Store b/cacti-main/.DS_Store deleted file mode 100644 index 03c85d4..0000000 Binary files a/cacti-main/.DS_Store and /dev/null differ diff --git a/cacti-main/base_cache.cfg b/cacti-main/base_cache.cfg index 8ec0aa4..ff6f184 100644 --- a/cacti-main/base_cache.cfg +++ b/cacti-main/base_cache.cfg @@ -1,5 +1,5 @@ # Cache size --size (bytes) 128 +-size (bytes) 64 # power gating -Array Power Gating - "false" @@ -23,7 +23,7 @@ # Multiple banks connected using a bus -UCA bank count 1 --technology (u) 0.032 +-technology (u) 0.09 # following three parameters are meaningful only for main memories -page size (bits) 8192 diff --git a/cacti-main/cacti_interface.h b/cacti-main/cacti_interface.h index a2b8e2d..2257584 100644 --- a/cacti-main/cacti_interface.h +++ b/cacti-main/cacti_interface.h @@ -333,6 +333,10 @@ class InputParameter bool total_power; // false means just considering I/O Power bool verbose; + + // Added to keep track of repeater spacing and size + double repeater_spacing; + double repeater_size; diff --git a/cacti-main/cacti_python/.DS_Store b/cacti-main/cacti_python/.DS_Store index a795410..1541d1d 100644 Binary files a/cacti-main/cacti_python/.DS_Store and b/cacti-main/cacti_python/.DS_Store differ diff --git a/cacti-main/cacti_python/Ucache-org.py b/cacti-main/cacti_python/Ucache-org.py deleted file mode 100644 index eda4a63..0000000 --- a/cacti-main/cacti_python/Ucache-org.py +++ /dev/null @@ -1,847 +0,0 @@ -import math -from typing import List -from threading import Thread -from cacti_interface import * -from cacti_interface import MemArray -from nuca import NucaOrgT -from parameter import g_ip, g_tp -from parameter import * -from uca import UCA -from parameter import _log2 -import sympy as sp - -BIGNUM = float('inf') -NTHREADS = 4 -MAXDATAN = 4 -MAX_COL_MUX = 4 -MAXDATASPD = 4.0 -Full_swing, Global, Low_swing = 0, 1, 2 # Just example values - -class MinValuesT: - def __init__(self): - self.min_delay = BIGNUM - self.min_dyn = BIGNUM - self.min_leakage = BIGNUM - self.min_area = BIGNUM - self.min_cyc = BIGNUM - - def update_min_values(self, val): - self.min_delay = sp.Min(self.min_delay, val.min_delay) - self.min_dyn = sp.Min(self.min_dyn, val.min_dyn) - self.min_leakage = sp.Min(self.min_leakage, val.min_leakage) - self.min_area = sp.Min(self.min_area, val.min_area) - self.min_cyc = sp.Min(self.min_cyc, val.min_cyc) - - def update_min_values_from_uca(self, res: uca_org_t): - self.min_delay = sp.Min(self.min_delay, res.access_time) - self.min_dyn = sp.Min(self.min_dyn, res.power.readOp.dynamic) - self.min_leakage = sp.Min(self.min_leakage, res.power.readOp.leakage) - self.min_area = sp.Min(self.min_area, res.area) - self.min_cyc = sp.Min(self.min_cyc, res.cycle_time) - - def update_min_values_from_nuca(self, res: NucaOrgT): - self.min_delay = sp.Min(self.min_delay, res.nuca_pda.access_time) - self.min_dyn = sp.Min(self.min_dyn, res.nuca_pda.power.readOp.dynamic) - self.min_leakage = sp.Min(self.min_leakage, res.nuca_pda.power.readOp.leakage) - self.min_area = sp.Min(self.min_area, res.nuca_pda.area) - self.min_cyc = sp.Min(self.min_cyc, res.nuca_pda.cycle_time) - - def update_min_values_from_mem_array(self, res: MemArray): - if(not contains_any_symbol(self.min_delay) and math.isnan(self.min_delay)): - self.min_delay = res.access_time - elif(not contains_any_symbol(res.access_time) and math.isnan(res.access_time)): - self.min_delay = self.min_delay - else: - self.min_delay = sp.Min(self.min_delay, res.access_time) - - if not sp.contains_any_symbol(self.min_dyn) and math.isnan(self.min_dyn): - self.min_dyn = res.power.readOp.dynamic - elif not sp.contains_any_symbol(res.power.readOp.dynamic) and math.isnan(res.power.readOp.dynamic): - self.min_dyn = self.min_dyn - else: - self.min_dyn = sp.Min(self.min_dyn, res.power.readOp.dynamic) - - if not sp.contains_any_symbol(self.min_leakage) and math.isnan(self.min_leakage): - self.min_leakage = res.power.readOp.leakage - elif not sp.contains_any_symbol(res.power.readOp.leakage) and math.isnan(res.power.readOp.leakage): - self.min_leakage = self.min_leakage - else: - self.min_leakage = sp.Min(self.min_leakage, res.power.readOp.leakage) - - if not sp.contains_any_symbol(self.min_area) and math.isnan(self.min_area): - self.min_area = res.area - elif not sp.contains_any_symbol(res.area) and math.isnan(res.area): - self.min_area = self.min_area - else: - self.min_area = sp.Min(self.min_area, res.area) - - if not sp.contains_any_symbol(self.min_cyc) and math.isnan(self.min_cyc): - self.min_cyc = res.cycle_time - elif not sp.contains_any_symbol(res.cycle_time) and math.isnan(res.cycle_time): - self.min_cyc = self.min_cyc - else: - self.min_cyc = sp.Min(self.min_cyc, res.cycle_time) - - # self.min_dyn = sp.Min(self.min_dyn, res.power.readOp.dynamic) - # self.min_leakage = sp.Min(self.min_leakage, res.power.readOp.leakage) - # self.min_area = sp.Min(self.min_area, res.area) - # self.min_cyc = sp.Min(self.min_cyc, res.cycle_time) - -class CalcTimeMtWrapperStruct: - def __init__(self): - self.tid = 0 - self.is_tag = False - self.pure_ram = False - self.pure_cam = False - self.is_main_mem = False - self.Nspd_min = 0.0 - self.data_res = None # Assuming min_values_t is another class or type, set to None by default - self.tag_res = None # Assuming min_values_t is another class or type, set to None by default - self.data_arr = [] # list is translated to a list in Python - self.tag_arr = [] # list is translated to a list in Python - -def calc_time_mt_wrapper(void_obj): - calc_obj = void_obj - tid = calc_obj.tid - data_arr = calc_obj.data_arr - tag_arr = calc_obj.tag_arr - is_tag = calc_obj.is_tag - pure_ram = calc_obj.pure_ram - pure_cam = calc_obj.pure_cam - is_main_mem = calc_obj.is_main_mem - Nspd_min = calc_obj.Nspd_min - data_res = calc_obj.data_res - tag_res = calc_obj.tag_res - - data_arr.clear() - data_arr.append(MemArray()) - tag_arr.clear() - tag_arr.append(MemArray()) - - Ndwl_niter = int(_log2(MAXDATAN)) + 1 - Ndbl_niter = int(_log2(MAXDATAN)) + 1 - Ndcm_niter = int(_log2(MAX_COL_MUX)) + 1 - niter = Ndwl_niter * Ndbl_niter * Ndcm_niter - - is_valid_partition = False - wt_min = 0 - wt_max = 0 - - if g_ip.force_wiretype: - if g_ip.wt == 'Full_swing': - wt_min = 'Global' - wt_max = 'Low_swing'-1 - else: - if g_ip.wt == 'Global': - wt_min = wt_max = 'Global' - elif g_ip.wt == 'Global_5': - wt_min = wt_max = 'Global_5' - elif g_ip.wt == 'Global_10': - wt_min = wt_max = 'Global_10' - elif g_ip.wt == 'Global_20': - wt_min = wt_max = 'Global_20' - elif g_ip.wt == 'Global_30': - wt_min = wt_max = 'Global_30' - elif g_ip.wt == 'Low_swing': - wt_min = wt_max = 'Low_swing' - else: - raise ValueError("Unknown wire type!") - else: - wt_min = 'Global' - wt_max = 'Low_swing' - - print("CHECKPOINT Nspd_min") - print(Nspd_min) - # print(MAXDATASPD) - # print() - #TODO Npsd_min messed up - for Nspd in range(int(Nspd_min), int(MAXDATASPD), int(math.ceil(Nspd_min*2))): - # replace with proper enum - if(wt_min == "Global"): - wt_min = 0 - elif(wt_min == "Global_5"): - wt_min = 1 - elif(wt_min == "Global_10"): - wt_min = 2 - elif(wt_min == "Global_20"): - wt_min = 3 - elif(wt_min == "Global_30"): - wt_min = 4 - elif(wt_min == "Low_swing"): - wt_min = 5 - elif(wt_min == "Semi_global"): - wt_min = 6 - elif(wt_min == "Full_swing"): - wt_min = 7 - elif(wt_min == "Transmission"): - wt_min = 8 - elif(wt_min == "Optical"): - wt_min = 9 - else: - wt_min = 10 - - if(wt_max == "Global"): - wt_max = 0 - elif(wt_max == "Global_5"): - wt_max = 1 - elif(wt_max == "Global_10"): - wt_max = 2 - elif(wt_max == "Global_20"): - wt_max = 3 - elif(wt_max == "Global_30"): - wt_max = 4 - elif(wt_max == "Low_swing"): - wt_max = 5 - elif(wt_max == "Semi_global"): - wt_max = 6 - elif(wt_max == "Full_swing"): - wt_max = 7 - elif(wt_max == "Transmission"): - wt_max = 8 - elif(wt_max == "Optical"): - wt_max = 9 - else: - wt_max = 10 - - print("CHECKPOINT") - print(wt_min) - print(wt_max) - print() - - for wr in range(wt_min, wt_max+1): - for iter in range(tid, niter, NTHREADS): - Ndwl = 1 << (iter // (Ndbl_niter * Ndcm_niter)) - Ndbl = 1 << ((iter // Ndcm_niter) % Ndbl_niter) - Ndcm = 1 << (iter % Ndcm_niter) - Ndsam_lev_1 = 1 - Ndsam_lev_2 = 1 - for Ndsam_lev_1 in range(1, MAX_COL_MUX+1, Ndsam_lev_1*2): - for Ndsam_lev_2 in range(1, MAX_COL_MUX+1, Ndsam_lev_2*2): - if g_ip.force_cache_config and not is_tag: - wr = g_ip.wt - Ndwl = g_ip.ndwl - Ndbl = g_ip.ndbl - Ndcm = g_ip.ndcm - if g_ip.nspd != 0: - Nspd = g_ip.nspd - if g_ip.ndsam1 != 0: - Ndsam_lev_1 = g_ip.ndsam1 - Ndsam_lev_2 = g_ip.ndsam2 - - if is_tag: - is_valid_partition = calculate_time(is_tag, pure_ram, pure_cam, Nspd, Ndwl, - Ndbl, Ndcm, Ndsam_lev_1, Ndsam_lev_2, - tag_arr[-1], 0, None, None, wr, is_main_mem) - if not is_tag or g_ip.fully_assoc: - is_valid_partition = calculate_time(is_tag, pure_ram, pure_cam, Nspd, Ndwl, - Ndbl, Ndcm, Ndsam_lev_1, Ndsam_lev_2, - data_arr[-1], 0, None, None, wr, is_main_mem) - if g_ip.is_3d_mem: - Ndsam_lev_1 = MAX_COL_MUX+1 - Ndsam_lev_2 = MAX_COL_MUX+1 - - if is_valid_partition: - if is_tag: - tag_arr[-1].wt = wr - tag_res.update_min_values_from_mem_array(tag_arr[-1]) - tag_arr.append(MemArray()) - if not is_tag or g_ip.fully_assoc: - data_arr[-1].wt = wr - data_res.update_min_values_from_mem_array(data_arr[-1]) - data_arr.append(MemArray()) - - if g_ip.force_cache_config and not is_tag: - wr = wt_max - iter = niter - if g_ip.nspd != 0: - Nspd = MAXDATASPD - if g_ip.ndsam1 != 0: - Ndsam_lev_1 = MAX_COL_MUX+1 - Ndsam_lev_2 = MAX_COL_MUX+1 - # Ndsam_lev_1 += 1 - # Ndsam_lev_1 += 1 - - data_arr.pop() - tag_arr.pop() - -def calculate_time( - is_tag, - pure_ram, - pure_cam, - Nspd, - Ndwl, - Ndbl, - Ndcm, - Ndsam_lev_1, - Ndsam_lev_2, - ptr_array, - flag_results_populate, - ptr_results, - ptr_fin_res, - wt, - is_main_mem -): - dyn_p = DynamicParameter(is_tag, pure_ram, pure_cam, Nspd, Ndwl, Ndbl, Ndcm, Ndsam_lev_1, Ndsam_lev_2, wt, is_main_mem) - - if not dyn_p.is_valid: - return False - - uca = UCA(dyn_p) - - if flag_results_populate: - # For the final solution, populate the ptr_results data structure -- TODO: copy only necessary variables - pass - else: - num_act_mats_hor_dir = uca.bank.dp.num_act_mats_hor_dir - num_mats = uca.bank.dp.num_mats - is_fa = uca.bank.dp.fully_assoc - pure_cam = uca.bank.dp.pure_cam - - ptr_array.Ndwl = Ndwl - ptr_array.Ndbl = Ndbl - ptr_array.Nspd = Nspd - ptr_array.deg_bl_muxing = dyn_p.deg_bl_muxing - ptr_array.Ndsam_lev_1 = Ndsam_lev_1 - ptr_array.Ndsam_lev_2 = Ndsam_lev_2 - ptr_array.access_time = uca.access_time - ptr_array.cycle_time = uca.cycle_time - ptr_array.multisubbank_interleave_cycle_time = uca.multisubbank_interleave_cycle_time - ptr_array.area_ram_cells = uca.area_all_dataramcells - ptr_array.area = uca.area.get_area() - - if g_ip.is_3d_mem: - ptr_array.area = uca.area.get_area() - if g_ip.num_die_3d > 1: - ptr_array.area += uca.area_TSV_tot - - ptr_array.height = uca.area.h - ptr_array.width = uca.area.w - ptr_array.mat_height = uca.bank.mat.area.h - ptr_array.mat_length = uca.bank.mat.area.w - ptr_array.subarray_height = uca.bank.mat.subarray.area.h - ptr_array.subarray_length = uca.bank.mat.subarray.area.w - ptr_array.power = uca.power - ptr_array.delay_senseamp_mux_decoder = sp.Max(uca.delay_array_to_sa_mux_lev_1_decoder, uca.delay_array_to_sa_mux_lev_2_decoder) - ptr_array.delay_before_subarray_output_driver = uca.delay_before_subarray_output_driver - ptr_array.delay_from_subarray_output_driver_to_output = uca.delay_from_subarray_out_drv_to_out - ptr_array.delay_route_to_bank = uca.htree_in_add.delay - ptr_array.delay_input_htree = uca.bank.htree_in_add.delay - ptr_array.delay_row_predecode_driver_and_block = uca.bank.mat.r_predec.delay - ptr_array.delay_row_decoder = uca.bank.mat.row_dec.delay - ptr_array.delay_bitlines = uca.bank.mat.delay_bitline - ptr_array.delay_matchlines = uca.bank.mat.delay_matchchline - ptr_array.delay_sense_amp = uca.bank.mat.delay_sa - ptr_array.delay_subarray_output_driver = uca.bank.mat.delay_subarray_out_drv_htree - ptr_array.delay_dout_htree = uca.bank.htree_out_data.delay - ptr_array.delay_comparator = uca.bank.mat.delay_comparator - - if g_ip.is_3d_mem: - ptr_array.delay_row_activate_net = uca.membus_RAS.delay_bus - ptr_array.delay_row_predecode_driver_and_block = uca.membus_RAS.delay_add_predecoder - ptr_array.delay_row_decoder = uca.membus_RAS.delay_add_decoder - ptr_array.delay_local_wordline = uca.membus_RAS.delay_lwl_drv - ptr_array.delay_column_access_net = uca.membus_CAS.delay_bus - ptr_array.delay_column_predecoder = uca.membus_CAS.delay_add_predecoder - ptr_array.delay_column_decoder = uca.membus_CAS.delay_add_decoder - ptr_array.delay_column_selectline = 0 - ptr_array.delay_datapath_net = uca.membus_data.delay_bus - ptr_array.delay_global_data = uca.membus_data.delay_global_data - ptr_array.delay_local_data_and_drv = uca.membus_data.delay_local_data - ptr_array.delay_data_buffer = uca.membus_data.delay_data_buffer - ptr_array.energy_row_activate_net = uca.membus_RAS.power_bus.readOp.dynamic - ptr_array.energy_row_predecode_driver_and_block = uca.membus_RAS.power_add_predecoder.readOp.dynamic - ptr_array.energy_row_decoder = uca.membus_RAS.power_add_decoders.readOp.dynamic - ptr_array.energy_local_wordline = uca.membus_RAS.power_lwl_drv.readOp.dynamic - ptr_array.energy_bitlines = dyn_p.Ndwl * uca.bank.mat.power_bitline.readOp.dynamic - ptr_array.energy_sense_amp = dyn_p.Ndwl * uca.bank.mat.power_sa.readOp.dynamic - ptr_array.energy_column_access_net = uca.membus_CAS.power_bus.readOp.dynamic - ptr_array.energy_column_predecoder = uca.membus_CAS.power_add_predecoder.readOp.dynamic - ptr_array.energy_column_decoder = uca.membus_CAS.power_add_decoders.readOp.dynamic - ptr_array.energy_column_selectline = uca.membus_CAS.power_col_sel.readOp.dynamic - ptr_array.energy_datapath_net = uca.membus_data.power_bus.readOp.dynamic - ptr_array.energy_global_data = uca.membus_data.power_global_data.readOp.dynamic - ptr_array.energy_local_data_and_drv = uca.membus_data.power_local_data.readOp.dynamic - ptr_array.energy_subarray_output_driver = uca.bank.mat.power_subarray_out_drv.readOp.dynamic - ptr_array.energy_data_buffer = 0 - ptr_array.area_lwl_drv = uca.area_lwl_drv - ptr_array.area_row_predec_dec = uca.area_row_predec_dec - ptr_array.area_col_predec_dec = uca.area_col_predec_dec - ptr_array.area_subarray = uca.area_subarray - ptr_array.area_bus = uca.area_bus - ptr_array.area_address_bus = uca.area_address_bus - ptr_array.area_data_bus = uca.area_data_bus - ptr_array.area_data_drv = uca.area_data_drv - ptr_array.area_IOSA = uca.area_IOSA - ptr_array.area_sense_amp = uca.area_sense_amp - - ptr_array.all_banks_height = uca.area.h - ptr_array.all_banks_width = uca.area.w - ptr_array.area_efficiency = uca.area_all_dataramcells * 100 / ptr_array.area - ptr_array.power_routing_to_bank = uca.power_routing_to_bank - ptr_array.power_addr_input_htree = uca.bank.htree_in_add.power - ptr_array.power_data_input_htree = uca.bank.htree_in_data.power - ptr_array.power_data_output_htree = uca.bank.htree_out_data.power - ptr_array.power_row_predecoder_drivers = uca.bank.mat.r_predec.driver_power - ptr_array.power_row_predecoder_drivers.readOp.dynamic *= num_act_mats_hor_dir - ptr_array.power_row_predecoder_drivers.writeOp.dynamic *= num_act_mats_hor_dir - ptr_array.power_row_predecoder_drivers.searchOp.dynamic *= num_act_mats_hor_dir - ptr_array.power_row_predecoder_blocks = uca.bank.mat.r_predec.block_power - ptr_array.power_row_predecoder_blocks.readOp.dynamic *= num_act_mats_hor_dir - ptr_array.power_row_predecoder_blocks.writeOp.dynamic *= num_act_mats_hor_dir - ptr_array.power_row_predecoder_blocks.searchOp.dynamic *= num_act_mats_hor_dir - ptr_array.power_row_decoders = uca.bank.mat.power_row_decoders - ptr_array.power_row_decoders.readOp.dynamic *= num_act_mats_hor_dir - ptr_array.power_row_decoders.writeOp.dynamic *= num_act_mats_hor_dir - ptr_array.power_row_decoders.searchOp.dynamic *= num_act_mats_hor_dir - ptr_array.power_bit_mux_predecoder_drivers = uca.bank.mat.b_mux_predec.driver_power - ptr_array.power_bit_mux_predecoder_drivers.readOp.dynamic *= num_act_mats_hor_dir - ptr_array.power_bit_mux_predecoder_drivers.writeOp.dynamic *= num_act_mats_hor_dir - ptr_array.power_bit_mux_predecoder_drivers.searchOp.dynamic *= num_act_mats_hor_dir - ptr_array.power_bit_mux_predecoder_blocks = uca.bank.mat.b_mux_predec.block_power - ptr_array.power_bit_mux_predecoder_blocks.readOp.dynamic *= num_act_mats_hor_dir - ptr_array.power_bit_mux_predecoder_blocks.writeOp.dynamic *= num_act_mats_hor_dir - ptr_array.power_bit_mux_predecoder_blocks.searchOp.dynamic *= num_act_mats_hor_dir - ptr_array.power_bit_mux_decoders = uca.bank.mat.power_bit_mux_decoders - ptr_array.power_bit_mux_decoders.readOp.dynamic *= num_act_mats_hor_dir - ptr_array.power_bit_mux_decoders.writeOp.dynamic *= num_act_mats_hor_dir - ptr_array.power_bit_mux_decoders.searchOp.dynamic *= num_act_mats_hor_dir - ptr_array.power_senseamp_mux_lev_1_predecoder_drivers = uca.bank.mat.sa_mux_lev_1_predec.driver_power - ptr_array.power_senseamp_mux_lev_1_predecoder_drivers.readOp.dynamic *= num_act_mats_hor_dir - ptr_array.power_senseamp_mux_lev_1_predecoder_drivers.writeOp.dynamic *= num_act_mats_hor_dir - ptr_array.power_senseamp_mux_lev_1_predecoder_drivers.searchOp.dynamic *= num_act_mats_hor_dir - ptr_array.power_senseamp_mux_lev_1_predecoder_blocks = uca.bank.mat.sa_mux_lev_1_predec.block_power - ptr_array.power_senseamp_mux_lev_1_predecoder_blocks.readOp.dynamic *= num_act_mats_hor_dir - ptr_array.power_senseamp_mux_lev_1_predecoder_blocks.writeOp.dynamic *= num_act_mats_hor_dir - ptr_array.power_senseamp_mux_lev_1_predecoder_blocks.searchOp.dynamic *= num_act_mats_hor_dir - ptr_array.power_senseamp_mux_lev_1_decoders = uca.bank.mat.power_sa_mux_lev_1_decoders - ptr_array.power_senseamp_mux_lev_1_decoders.readOp.dynamic *= num_act_mats_hor_dir - ptr_array.power_senseamp_mux_lev_1_decoders.writeOp.dynamic *= num_act_mats_hor_dir - ptr_array.power_senseamp_mux_lev_1_decoders.searchOp.dynamic *= num_act_mats_hor_dir - ptr_array.power_senseamp_mux_lev_2_predecoder_drivers = uca.bank.mat.sa_mux_lev_2_predec.driver_power - ptr_array.power_senseamp_mux_lev_2_predecoder_drivers.readOp.dynamic *= num_act_mats_hor_dir - ptr_array.power_senseamp_mux_lev_2_predecoder_drivers.writeOp.dynamic *= num_act_mats_hor_dir - ptr_array.power_senseamp_mux_lev_2_predecoder_drivers.searchOp.dynamic *= num_act_mats_hor_dir - ptr_array.power_senseamp_mux_lev_2_predecoder_blocks = uca.bank.mat.sa_mux_lev_2_predec.block_power - ptr_array.power_senseamp_mux_lev_2_predecoder_blocks.readOp.dynamic *= num_act_mats_hor_dir - ptr_array.power_senseamp_mux_lev_2_predecoder_blocks.writeOp.dynamic *= num_act_mats_hor_dir - ptr_array.power_senseamp_mux_lev_2_predecoder_blocks.searchOp.dynamic *= num_act_mats_hor_dir - ptr_array.power_senseamp_mux_lev_2_decoders = uca.bank.mat.power_sa_mux_lev_2_decoders - ptr_array.power_senseamp_mux_lev_2_decoders.readOp.dynamic *= num_act_mats_hor_dir - ptr_array.power_senseamp_mux_lev_2_decoders.writeOp.dynamic *= num_act_mats_hor_dir - ptr_array.power_senseamp_mux_lev_2_decoders.searchOp.dynamic *= num_act_mats_hor_dir - ptr_array.power_bitlines = uca.bank.mat.power_bitline - ptr_array.power_bitlines.readOp.dynamic *= num_act_mats_hor_dir - ptr_array.power_bitlines.writeOp.dynamic *= num_act_mats_hor_dir - ptr_array.power_bitlines.searchOp.dynamic *= num_act_mats_hor_dir - ptr_array.power_sense_amps = uca.bank.mat.power_sa - ptr_array.power_sense_amps.readOp.dynamic *= num_act_mats_hor_dir - ptr_array.power_sense_amps.writeOp.dynamic *= num_act_mats_hor_dir - ptr_array.power_sense_amps.searchOp.dynamic *= num_act_mats_hor_dir - ptr_array.power_prechg_eq_drivers = uca.bank.mat.power_bl_precharge_eq_drv - ptr_array.power_prechg_eq_drivers.readOp.dynamic *= num_act_mats_hor_dir - ptr_array.power_prechg_eq_drivers.writeOp.dynamic *= num_act_mats_hor_dir - ptr_array.power_prechg_eq_drivers.searchOp.dynamic *= num_act_mats_hor_dir - ptr_array.power_output_drivers_at_subarray = uca.bank.mat.power_subarray_out_drv - ptr_array.power_output_drivers_at_subarray.readOp.dynamic *= num_act_mats_hor_dir - ptr_array.power_output_drivers_at_subarray.writeOp.dynamic *= num_act_mats_hor_dir - ptr_array.power_output_drivers_at_subarray.searchOp.dynamic *= num_act_mats_hor_dir - ptr_array.power_comparators = uca.bank.mat.power_comparator - ptr_array.power_comparators.readOp.dynamic *= num_act_mats_hor_dir - ptr_array.power_comparators.writeOp.dynamic *= num_act_mats_hor_dir - ptr_array.power_comparators.searchOp.dynamic *= num_act_mats_hor_dir - - if is_fa or pure_cam: - ptr_array.power_htree_in_search = uca.bank.htree_in_search.power - ptr_array.power_htree_out_search = uca.bank.htree_out_search.power - ptr_array.power_searchline = uca.bank.mat.power_searchline - ptr_array.power_searchline.searchOp.dynamic *= num_mats - ptr_array.power_searchline_precharge = uca.bank.mat.power_searchline_precharge - ptr_array.power_searchline_precharge.searchOp.dynamic *= num_mats - ptr_array.power_matchlines = uca.bank.mat.power_matchline - ptr_array.power_matchlines.searchOp.dynamic *= num_mats - ptr_array.power_matchline_precharge = uca.bank.mat.power_matchline_precharge - ptr_array.power_matchline_precharge.searchOp.dynamic *= num_mats - ptr_array.power_matchline_to_wordline_drv = uca.bank.mat.power_ml_to_ram_wl_drv - - ptr_array.activate_energy = uca.activate_energy - ptr_array.read_energy = uca.read_energy - ptr_array.write_energy = uca.write_energy - ptr_array.precharge_energy = uca.precharge_energy - ptr_array.refresh_power = uca.refresh_power - ptr_array.leak_power_subbank_closed_page = uca.leak_power_subbank_closed_page - ptr_array.leak_power_subbank_open_page = uca.leak_power_subbank_open_page - ptr_array.leak_power_request_and_reply_networks = uca.leak_power_request_and_reply_networks - ptr_array.precharge_delay = uca.precharge_delay - - if g_ip.is_3d_mem: - ptr_array.t_RCD = uca.t_RCD - ptr_array.t_RAS = uca.t_RAS - ptr_array.t_RC = uca.t_RC - ptr_array.t_CAS = uca.t_CAS - ptr_array.t_RP = uca.t_RP - ptr_array.t_RRD = uca.t_RRD - ptr_array.activate_energy = uca.activate_energy - ptr_array.read_energy = uca.read_energy - ptr_array.write_energy = uca.write_energy - ptr_array.precharge_energy = uca.precharge_energy - ptr_array.activate_power = uca.activate_power - ptr_array.read_power = uca.read_power - ptr_array.write_power = uca.write_power - ptr_array.peak_read_power = uca.read_energy / ((g_ip.burst_depth) / (g_ip.sys_freq_MHz * 1e6) / 2) - ptr_array.num_row_subarray = dyn_p.num_r_subarray - ptr_array.num_col_subarray = dyn_p.num_c_subarray - ptr_array.delay_TSV_tot = uca.delay_TSV_tot - ptr_array.area_TSV_tot = uca.area_TSV_tot - ptr_array.dyn_pow_TSV_tot = uca.dyn_pow_TSV_tot - ptr_array.dyn_pow_TSV_per_access = uca.dyn_pow_TSV_per_access - ptr_array.num_TSV_tot = uca.num_TSV_tot - - if g_ip.power_gating: - ptr_array.sram_sleep_tx_width = uca.bank.mat.sram_sleep_tx.width - ptr_array.sram_sleep_tx_area = uca.bank.mat.array_sleep_tx_area - ptr_array.sram_sleep_wakeup_latency = uca.bank.mat.array_wakeup_t - ptr_array.sram_sleep_wakeup_energy = uca.bank.mat.array_wakeup_e.readOp.dynamic - ptr_array.wl_sleep_tx_width = uca.bank.mat.row_dec.sleeptx.width - ptr_array.wl_sleep_tx_area = uca.bank.mat.wl_sleep_tx_area - ptr_array.wl_sleep_wakeup_latency = uca.bank.mat.wl_wakeup_t - ptr_array.wl_sleep_wakeup_energy = uca.bank.mat.wl_wakeup_e.readOp.dynamic - ptr_array.bl_floating_wakeup_latency = uca.bank.mat.blfloating_wakeup_t - ptr_array.bl_floating_wakeup_energy = uca.bank.mat.blfloating_wakeup_e.readOp.dynamic - ptr_array.array_leakage = uca.bank.array_leakage - ptr_array.wl_leakage = uca.bank.wl_leakage - ptr_array.cl_leakage = uca.bank.cl_leakage - - ptr_array.num_active_mats = uca.bank.dp.num_act_mats_hor_dir - ptr_array.num_submarray_mats = uca.bank.mat.num_subarrays_per_mat - - return True - -def check_uca_org(u, minval): - if ((u.access_time - minval.min_delay) * 100 / minval.min_delay) > g_ip.delay_dev: - return False - if ((u.power.readOp.dynamic - minval.min_dyn) / minval.min_dyn) * 100 > g_ip.dynamic_power_dev: - return False - if ((u.power.readOp.leakage - minval.min_leakage) / minval.min_leakage) * 100 > g_ip.leakage_power_dev: - return False - if ((u.cycle_time - minval.min_cyc) / minval.min_cyc) * 100 > g_ip.cycle_time_dev: - return False - if ((u.area - minval.min_area) / minval.min_area) * 100 > g_ip.area_dev: - return False - return True - -def check_mem_org(u, minval): - if ((u.access_time - minval.min_delay) * 100 / minval.min_delay) > g_ip.delay_dev: - return False - if ((u.power.readOp.dynamic - minval.min_dyn) / minval.min_dyn) * 100 > g_ip.dynamic_power_dev: - return False - if ((u.power.readOp.leakage - minval.min_leakage) / minval.min_leakage) * 100 > g_ip.leakage_power_dev: - return False - if ((u.cycle_time - minval.min_cyc) / minval.min_cyc) * 100 > g_ip.cycle_time_dev: - return False - if ((u.area - minval.min_area) / minval.min_area) * 100 > g_ip.area_dev: - return False - return True - -def find_optimal_uca(res, minval, ulist): - cost = 0 - min_cost = BIGNUM - dp = g_ip.dynamic_power_wt - lp = g_ip.leakage_power_wt - a = g_ip.area_wt - d = g_ip.delay_wt - c = g_ip.cycle_time_wt - - if not ulist: - print("ERROR: no valid cache organizations found") - exit(0) - - for niter in ulist: - if g_ip.ed == 1: - cost = (niter.access_time / minval.min_delay) * (niter.power.readOp.dynamic / minval.min_dyn) - if min_cost > cost: - min_cost = cost - res.update(niter) - elif g_ip.ed == 2: - cost = ((niter.access_time / minval.min_delay) ** 2) * (niter.power.readOp.dynamic / minval.min_dyn) - if min_cost > cost: - min_cost = cost - res.update(niter) - else: - if check_uca_org(niter, minval): - cost = ( - d * (niter.access_time / minval.min_delay) + - c * (niter.cycle_time / minval.min_cyc) + - dp * (niter.power.readOp.dynamic / minval.min_dyn) + - lp * (niter.power.readOp.leakage / minval.min_leakage) + - a * (niter.area / minval.min_area) - ) - if min_cost > cost: - min_cost = cost - res.update(niter) - ulist.remove(niter) - else: - ulist.remove(niter) - - if min_cost == BIGNUM: - print("ERROR: no cache organizations met optimization criteria") - exit(0) - -def filter_tag_arr(min_val, mem_list): - cost = float('inf') - cur_cost = 0.0 - wt_delay = g_ip.delay_wt - wt_dyn = g_ip.dynamic_power_wt - wt_leakage = g_ip.leakage_power_wt - wt_cyc = g_ip.cycle_time_wt - wt_area = g_ip.area_wt - res = None - - if not mem_list: - print("ERROR: no valid tag organizations found") - exit(1) - - while mem_list: - print(len(mem_list)) - v = check_mem_org(mem_list[-1], min_val) - if v: - cur_cost = (wt_delay * (mem_list[-1].access_time / min_val.min_delay) + - wt_dyn * (mem_list[-1].power.readOp.dynamic / min_val.min_dyn) + - wt_leakage * (mem_list[-1].power.readOp.leakage / min_val.min_leakage) + - wt_area * (mem_list[-1].area / min_val.min_area) + - wt_cyc * (mem_list[-1].cycle_time / min_val.min_cyc)) - else: - cur_cost = float('inf') - - if cur_cost < cost: - if res is not None: - del res - cost = cur_cost - res = mem_list[-1] - else: - del mem_list[-1] - - if(len(mem_list) > 0): - mem_list.pop() - - if not res: - print("ERROR: no valid tag organizations found") - exit(0) - - mem_list.append(res) - -# def filter_data_arr(curr_list): -# if not curr_list: -# print("ERROR: no valid data array organizations found") -# exit(1) - -# iter_list = list(curr_list) - -# for m in iter_list: -# if m is None: -# exit(1) - -# if (((m.access_time - m.arr_min.min_delay) / m.arr_min.min_delay > 0.5) and -# ((m.power.readOp.dynamic - m.arr_min.min_dyn) / m.arr_min.min_dyn > 0.5)): -# del m -# curr_list.remove(m) - -def filter_data_arr(curr_list): - if not curr_list: - print("ERROR: no valid data array organizations found") - exit(1) - - iter_list = list(curr_list) - - for m in iter_list: - if m is None: - exit(1) - - if (math.isnan(m.access_time) or math.isnan(m.arr_min.min_delay) or - math.isnan(m.power.readOp.dynamic) or math.isnan(m.arr_min.min_dyn)): - continue # Skip this iteration if any relevant value is NaN - - if (((m.access_time - m.arr_min.min_delay) / m.arr_min.min_delay > 0.5) and - ((m.power.readOp.dynamic - m.arr_min.min_dyn) / m.arr_min.min_dyn > 0.5)): - curr_list.remove(m) - -import threading -from functools import cmp_to_key - -def solve(fin_res): - pure_ram = g_ip.pure_ram - pure_cam = g_ip.pure_cam - - g_tp.init(g_ip.F_sz_um, False) - g_ip.print_detail_debug = 0 - - tag_arr = [] - data_arr = [] - sol_list = [uca_org_t()] - - fin_res.tag_array.access_time = 0 - fin_res.tag_array.Ndwl = 0 - fin_res.tag_array.Ndbl = 0 - fin_res.tag_array.Nspd = 0 - fin_res.tag_array.deg_bl_muxing = 0 - fin_res.tag_array.Ndsam_lev_1 = 0 - fin_res.tag_array.Ndsam_lev_2 = 0 - - calc_array = [CalcTimeMtWrapperStruct() for _ in range(NTHREADS)] - threads = [None] * NTHREADS - - for t in range(NTHREADS): - calc_array[t].tid = t - calc_array[t].pure_ram = pure_ram - calc_array[t].pure_cam = pure_cam - calc_array[t].data_res = MinValuesT() - calc_array[t].tag_res = MinValuesT() - - if not (pure_ram or pure_cam or g_ip.fully_assoc): - is_tag = True - g_tp.init(g_ip.F_sz_um, is_tag) - - for t in range(NTHREADS): - calc_array[t].is_tag = is_tag - calc_array[t].is_main_mem = False - calc_array[t].Nspd_min = 0.125 - threads[t] = threading.Thread(target=calc_time_mt_wrapper, args=(calc_array[t],)) - threads[t].start() - - for t in range(NTHREADS): - threads[t].join() - - - print("HELLo?") - for t in range(NTHREADS): - calc_array[t].data_arr.sort(key=cmp_to_key(MemArray.lt)) - - # CHECKPOINT - print(f'WAT {calc_array[t].data_arr[0].access_time}') - - data_arr.extend(calc_array[t].data_arr) - calc_array[t].tag_arr.sort(key=cmp_to_key(MemArray.lt)) - tag_arr.extend(calc_array[t].tag_arr) - - is_tag = False - g_tp.init(g_ip.F_sz_um, is_tag) - - for t in range(NTHREADS): - calc_array[t].is_tag = is_tag - calc_array[t].is_main_mem = g_ip.is_main_mem - if not (pure_cam or g_ip.fully_assoc): - calc_array[t].Nspd_min = g_ip.out_w / (g_ip.block_sz * 8) - else: - calc_array[t].Nspd_min = 1 - - threads[t] = threading.Thread(target=calc_time_mt_wrapper, args=(calc_array[t],)) - threads[t].start() - - for t in range(NTHREADS): - threads[t].join() - - data_arr.clear() - for t in range(NTHREADS): - calc_array[t].data_arr.sort(key=cmp_to_key(MemArray.lt)) - data_arr.extend(calc_array[t].data_arr) - - d_min = MinValuesT() - t_min = MinValuesT() - cache_min = MinValuesT() - - for t in range(NTHREADS): - d_min.update_min_values(calc_array[t].data_res) - t_min.update_min_values(calc_array[t].tag_res) - - for m in data_arr: - m.arr_min = d_min - - filter_data_arr(data_arr) - if not (pure_ram or pure_cam or g_ip.fully_assoc): - filter_tag_arr(t_min, tag_arr) - - if pure_ram or pure_cam or g_ip.fully_assoc: - for m in data_arr: - curr_org = sol_list[-1] - curr_org.tag_array2 = None - curr_org.data_array2 = m - - curr_org.find_delay() - print(f'ACCESS TIME: {curr_org.access_time}') - curr_org.find_energy() - curr_org.find_area() - curr_org.find_cyc() - - cache_min.update_min_values_from_uca(curr_org) - - sol_list.append(uca_org_t()) - else: - while tag_arr: - arr_temp = tag_arr.pop() - for m in data_arr: - curr_org = sol_list[-1] - curr_org.tag_array2 = arr_temp - curr_org.data_array2 = m - - curr_org.find_delay() - print(f'ACCESS TIME: {curr_org.access_time}') - curr_org.find_energy() - curr_org.find_area() - curr_org.find_cyc() - - cache_min.update_min_values_from_uca(curr_org) - - sol_list.append(uca_org_t()) - - sol_list.pop() - - find_optimal_uca(fin_res, cache_min, sol_list) - - sol_list.clear() - - for m in data_arr: - if m != fin_res.data_array2: - del m - data_arr.clear() - - for t in range(NTHREADS): - del calc_array[t].data_res - del calc_array[t].tag_res - - del calc_array - del cache_min - del d_min - del t_min - -def update(fin_res): - if fin_res.tag_array2: - g_tp.init(g_ip.F_sz_um, True) - tag_arr_dyn_p = DynamicParameter( - True, g_ip.pure_ram, g_ip.pure_cam, - fin_res.tag_array2.Nspd, fin_res.tag_array2.Ndwl, - fin_res.tag_array2.Ndbl, fin_res.tag_array2.Ndcm, - fin_res.tag_array2.Ndsam_lev_1, fin_res.tag_array2.Ndsam_lev_2, - fin_res.data_array2.wt, g_ip.is_main_mem - ) - if tag_arr_dyn_p.is_valid: - tag_arr = UCA(tag_arr_dyn_p) - fin_res.tag_array2.power = tag_arr.power - else: - print("ERROR: Cannot retrieve array structure for leakage feedback") - exit(1) - - g_tp.init(g_ip.F_sz_um, False) - data_arr_dyn_p = DynamicParameter( - False, g_ip.pure_ram, g_ip.pure_cam, - fin_res.data_array2.Nspd, fin_res.data_array2.Ndwl, - fin_res.data_array2.Ndbl, fin_res.data_array2.Ndcm, - fin_res.data_array2.Ndsam_lev_1, fin_res.data_array2.Ndsam_lev_2, - fin_res.data_array2.wt, g_ip.is_main_mem - ) - if data_arr_dyn_p.is_valid: - data_arr = UCA(data_arr_dyn_p) - fin_res.data_array2.power = data_arr.power - else: - print("ERROR: Cannot retrieve array structure for leakage feedback") - exit(1) - - fin_res.find_energy() diff --git a/cacti-main/cacti_python/Ucache.py b/cacti-main/cacti_python/Ucache.py index 2a09f17..778e284 100644 --- a/cacti-main/cacti_python/Ucache.py +++ b/cacti-main/cacti_python/Ucache.py @@ -151,11 +151,7 @@ def calc_time_mt_wrapper(void_obj): wt_min = 'Global' wt_max = 'Low_swing' - print("CHECKPOINT Nspd_min") - print(Nspd_min) - # print(MAXDATASPD) - # print() - #TODO Npsd_min messed up + # Check Npsd_min for Nspd in range(int(Nspd_min), int(MAXDATASPD), int(math.ceil(Nspd_min*2))): # replace with proper enum if(wt_min == "Global"): @@ -204,11 +200,6 @@ def calc_time_mt_wrapper(void_obj): else: wt_max = 10 - print("CHECKPOINT") - print(wt_min) - print(wt_max) - print() - for wr in range(wt_min, wt_max+1): for iter in range(tid, niter, NTHREADS): Ndwl = 1 << (iter // (Ndbl_niter * Ndcm_niter)) @@ -322,7 +313,11 @@ def calculate_time( ptr_array.subarray_height = uca.bank.mat.subarray.area.h ptr_array.subarray_length = uca.bank.mat.subarray.area.w ptr_array.power = uca.power + + #RECENT CHANGE: MAX - ignore to reduce expression size ptr_array.delay_senseamp_mux_decoder = symbolic_convex_max(uca.delay_array_to_sa_mux_lev_1_decoder, uca.delay_array_to_sa_mux_lev_2_decoder) + # ptr_array.delay_senseamp_mux_decoder = uca.delay_array_to_sa_mux_lev_1_decoder + ptr_array.delay_before_subarray_output_driver = uca.delay_before_subarray_output_driver ptr_array.delay_from_subarray_output_driver_to_output = uca.delay_from_subarray_out_drv_to_out ptr_array.delay_route_to_bank = uca.htree_in_add.delay @@ -711,14 +706,8 @@ def solve(fin_res): for t in range(NTHREADS): threads[t].join() - - print("HELLo?") for t in range(NTHREADS): calc_array[t].data_arr.sort(key=cmp_to_key(MemArray.lt)) - - # CHECKPOINT - print(f'WAT {calc_array[t].data_arr[0].access_time}') - data_arr.extend(calc_array[t].data_arr) calc_array[t].tag_arr.sort(key=cmp_to_key(MemArray.lt)) tag_arr.extend(calc_array[t].tag_arr) @@ -767,7 +756,6 @@ def solve(fin_res): curr_org.data_array2 = m curr_org.find_delay() - print(f'ACCESS TIME: {curr_org.access_time}') curr_org.find_energy() curr_org.find_area() curr_org.find_cyc() @@ -784,7 +772,6 @@ def solve(fin_res): curr_org.data_array2 = m curr_org.find_delay() - print(f'ACCESS TIME: {curr_org.access_time}') curr_org.find_energy() curr_org.find_area() curr_org.find_cyc() @@ -907,8 +894,9 @@ def calculate_time_single( ptr_array.subarray_height = uca.bank.mat.subarray.area.h ptr_array.subarray_length = uca.bank.mat.subarray.area.w ptr_array.power = uca.power - # TODO check + ptr_array.delay_senseamp_mux_decoder = symbolic_convex_max(uca.delay_array_to_sa_mux_lev_1_decoder, uca.delay_array_to_sa_mux_lev_2_decoder) + ptr_array.delay_before_subarray_output_driver = uca.delay_before_subarray_output_driver ptr_array.delay_from_subarray_output_driver_to_output = uca.delay_from_subarray_out_drv_to_out ptr_array.delay_route_to_bank = uca.htree_in_add.delay @@ -1137,7 +1125,7 @@ def solve_single(): curr_org.data_array2 = data_arr curr_org.find_delay() - # curr_org.find_energy() + curr_org.find_energy() # curr_org.find_area() # curr_org.find_cyc() @@ -1147,7 +1135,7 @@ def solve_single(): curr_org.data_array2 = data_arr curr_org.find_delay() - # curr_org.find_energy() + curr_org.find_energy() # curr_org.find_area() # curr_org.find_cyc() diff --git a/cacti-main/cacti_python/basic_circuit.py b/cacti-main/cacti_python/basic_circuit.py deleted file mode 100644 index 9ed1d48..0000000 --- a/cacti-main/cacti_python/basic_circuit.py +++ /dev/null @@ -1,482 +0,0 @@ -import math -from const import * - -UNI_LEAK_STACK_FACTOR = 0.43 - -def powers(base, n): - p = 1 - for i in range(1, n + 1): - p *= base - return p - -def is_pow2(val): - if val <= 0: - return False - elif val == 1: - return True - else: - return (_log2(val) != _log2(val - 1)) - -def _log2(num): - if num == 0: - raise ValueError("log0?") - log2 = 0 - while num > 1: - num >>= 1 - log2 += 1 - return log2 - -def factorial(n, m=1): - fa = m - for i in range(m + 1, n + 1): - fa *= i - return fa - -def combination(n, m): - return factorial(n, m + 1) // factorial(n - m) - - -outside_mat = "outside_mat" -inside_mat = "inside_mat" -local_wires = "local_wires" - - -Add_htree = "Add_htree" -Data_in_htree = "Data_in_htree" -Data_out_htree = "Data_out_htree" -Search_in_htree = "Search_in_htree" -Search_out_htree = "Search_out_htree" - - -Row_add_path = "Row_add_path" -Col_add_path = "Col_add_path" -Data_path = "Data_path" - - -nmos = "nmos" -pmos = "pmos" -inv = "inv" -nand = "nand" -nor = "nor" -tri = "tri" -tg = "tg" - -parallel = "parallel" -series = "series" - -class WirePlacement: - outside_mat = "outside_mat" - inside_mat = "inside_mat" - local_wires = "local_wires" - -class HtreeType: - Add_htree = "Add_htree" - Data_in_htree = "Data_in_htree" - Data_out_htree = "Data_out_htree" - Search_in_htree = "Search_in_htree" - Search_out_htree = "Search_out_htree" - -class MemorybusType: - Row_add_path = "Row_add_path" - Col_add_path = "Col_add_path" - Data_path = "Data_path" - -class GateType: - nmos = "nmos" - pmos = "pmos" - inv = "inv" - nand = "nand" - nor = "nor" - tri = "tri" - tg = "tg" - -class HalfNetTopology: - parallel = "parallel" - series = "series" - -def logtwo(x): - assert x > 0 - return math.log(x) / math.log(2.0) - -def gate_C(width, wirelength, _is_dram=False, _is_sram=False, _is_wl_tr=False, _is_sleep_tx=False): - if _is_dram and _is_sram: - dt = g_tp.dram_acc # DRAM cell access transistor - elif _is_dram and _is_wl_tr: - dt = g_tp.dram_wl # DRAM wordline transistor - elif not _is_dram and _is_sram: - dt = g_tp.sram_cell # SRAM cell access transistor - elif _is_sleep_tx: - dt = g_tp.sleep_tx # Sleep transistor - else: - dt = g_tp.peri_global - return (dt.C_g_ideal + dt.C_overlap + 3 * dt.C_fringe) * width + dt.l_phy * Cpolywire - -def gate_C_pass(width, wirelength, _is_dram=False, _is_sram=False, _is_wl_tr=False, _is_sleep_tx=False): - return gate_C(width, wirelength, _is_dram, _is_sram, _is_wl_tr, _is_sleep_tx) - -def drain_C_(width, nchannel, stack, next_arg_thresh_folding_width_or_height_cell, fold_dimension, _is_dram=False, _is_sram=False, _is_wl_tr=False, _is_sleep_tx=False): - if _is_dram and _is_sram: - dt = g_tp.dram_acc # DRAM cell access transistor - elif _is_dram and _is_wl_tr: - dt = g_tp.dram_wl # DRAM wordline transistor - elif not _is_dram and _is_sram: - dt = g_tp.sram_cell # SRAM cell access transistor - elif _is_sleep_tx: - dt = g_tp.sleep_tx # Sleep transistor - else: - dt = g_tp.peri_global - - c_junc_area = dt.C_junc - c_junc_sidewall = dt.C_junc_sidewall - c_fringe = 2 * dt.C_fringe - c_overlap = 2 * dt.C_overlap - drain_C_metal_connecting_folded_tr = 0 - - if next_arg_thresh_folding_width_or_height_cell == 0: - w_folded_tr = fold_dimension - else: - h_tr_region = fold_dimension - 2 * g_tp.HPOWERRAIL - ratio_p_to_n = 2.0 / (2.0 + 1.0) - if nchannel: - w_folded_tr = (1 - ratio_p_to_n) * (h_tr_region - g_tp.MIN_GAP_BET_P_AND_N_DIFFS) - else: - w_folded_tr = ratio_p_to_n * (h_tr_region - g_tp.MIN_GAP_BET_P_AND_N_DIFFS) - - num_folded_tr = int(sp.ceiling(width / w_folded_tr)) - if num_folded_tr < 2: - w_folded_tr = width - - total_drain_w = (g_tp.w_poly_contact + 2 * g_tp.spacing_poly_to_contact) + (stack - 1) * g_tp.spacing_poly_to_poly - drain_h_for_sidewall = w_folded_tr - total_drain_height_for_cap_wrt_gate = w_folded_tr + 2 * w_folded_tr * (stack - 1) - if num_folded_tr > 1: - total_drain_w += (num_folded_tr - 2) * (g_tp.w_poly_contact + 2 * g_tp.spacing_poly_to_contact) + (num_folded_tr - 1) * ((stack - 1) * g_tp.spacing_poly_to_poly) - if num_folded_tr % 2 == 0: - drain_h_for_sidewall = 0 - total_drain_height_for_cap_wrt_gate *= num_folded_tr - drain_C_metal_connecting_folded_tr = g_tp.wire_local.C_per_um * total_drain_w - - drain_C_area = c_junc_area * total_drain_w * w_folded_tr - drain_C_sidewall = c_junc_sidewall * (drain_h_for_sidewall + 2 * total_drain_w) - drain_C_wrt_gate = (c_fringe + c_overlap) * total_drain_height_for_cap_wrt_gate - - return drain_C_area + drain_C_sidewall + drain_C_wrt_gate + drain_C_metal_connecting_folded_tr - -def tr_R_on(width, nchannel, stack, _is_dram=False, _is_sram=False, _is_wl_tr=False, _is_sleep_tx=False): - if _is_dram and _is_sram: - dt = g_tp.dram_acc # DRAM cell access transistor - elif _is_dram and _is_wl_tr: - dt = g_tp.dram_wl # DRAM wordline transistor - elif not _is_dram and _is_sram: - dt = g_tp.sram_cell # SRAM cell access transistor - elif _is_sleep_tx: - dt = g_tp.sleep_tx # Sleep transistor - else: - dt = g_tp.peri_global - - restrans = dt.R_nch_on if nchannel else dt.R_pch_on - return stack * restrans / width - -def R_to_w(res, nchannel, _is_dram=False, _is_sram=False, _is_wl_tr=False, _is_sleep_tx=False): - if _is_dram and _is_sram: - dt = g_tp.dram_acc # DRAM cell access transistor - elif _is_dram and _is_wl_tr: - dt = g_tp.dram_wl # DRAM wordline transistor - elif not _is_dram and _is_sram: - dt = g_tp.sram_cell # SRAM cell access transistor - elif _is_sleep_tx: - dt = g_tp.sleep_tx # Sleep transistor - else: - dt = g_tp.peri_global - - restrans = dt.R_nch_on if nchannel else dt.R_pch_on - return restrans / res - -def pmos_to_nmos_sz_ratio(_is_dram=False, _is_wl_tr=False, _is_sleep_tx=False): - if _is_dram and _is_wl_tr: - return g_tp.dram_wl.n_to_p_eff_curr_drv_ratio - elif _is_sleep_tx: - return g_tp.sleep_tx.n_to_p_eff_curr_drv_ratio - else: - return g_tp.peri_global.n_to_p_eff_curr_drv_ratio - -def horowitz(inputramptime, tf, vs1, vs2, rise): - if inputramptime == 0 and vs1 == vs2: - return tf * (-math.log(vs1) if vs1 < 1 else math.log(vs1)) - - a = inputramptime / tf - if rise == RISE: - b = 0.5 - td = tf * math.sqrt(math.log(vs1) ** 2 + 2 * a * b * (1.0 - vs1)) + tf * (math.log(vs1) - math.log(vs2)) - else: - b = 0.4 - td = tf * math.sqrt(math.log(1.0 - vs1) ** 2 + 2 * a * b * vs1) + tf * (math.log(1.0 - vs1) - math.log(1.0 - vs2)) - return td - -def cmos_Ileak(nWidth, pWidth, _is_dram=False, _is_cell=False, _is_wl_tr=False, _is_sleep_tx=False): - if not _is_dram and _is_cell: - dt = g_tp.sram_cell # SRAM cell access transistor - elif _is_dram and _is_wl_tr: - dt = g_tp.dram_wl # DRAM wordline transistor - elif _is_sleep_tx: - dt = g_tp.sleep_tx # Sleep transistor - else: - dt = g_tp.peri_global # DRAM or SRAM all other transistors - - return nWidth * dt.I_off_n + pWidth * dt.I_off_p - -def simplified_nmos_Isat(nwidth, _is_dram=False, _is_cell=False, _is_wl_tr=False, _is_sleep_tx=False): - if not _is_dram and _is_cell: - dt = g_tp.sram_cell # SRAM cell access transistor - elif _is_dram and _is_wl_tr: - dt = g_tp.dram_wl # DRAM wordline transistor - elif _is_sleep_tx: - dt = g_tp.sleep_tx # Sleep transistor - else: - dt = g_tp.peri_global # DRAM or SRAM all other transistors - - return nwidth * dt.I_on_n - -def simplified_pmos_Isat(pwidth, _is_dram=False, _is_cell=False, _is_wl_tr=False, _is_sleep_tx=False): - if not _is_dram and _is_cell: - dt = g_tp.sram_cell # SRAM cell access transistor - elif _is_dram and _is_wl_tr: - dt = g_tp.dram_wl # DRAM wordline transistor - elif _is_sleep_tx: - dt = g_tp.sleep_tx # Sleep transistor - else: - dt = g_tp.peri_global # DRAM or SRAM all other transistors - - return pwidth * dt.I_on_n / dt.n_to_p_eff_curr_drv_ratio - -def simplified_nmos_leakage(nwidth, _is_dram=False, _is_cell=False, _is_wl_tr=False, _is_sleep_tx=False): - if not _is_dram and _is_cell: - dt = g_tp.sram_cell # SRAM cell access transistor - elif _is_dram and _is_wl_tr: - dt = g_tp.dram_wl # DRAM wordline transistor - elif _is_sleep_tx: - dt = g_tp.sleep_tx # Sleep transistor - else: - dt = g_tp.peri_global # DRAM or SRAM all other transistors - - return nwidth * dt.I_off_n - -def simplified_pmos_leakage(pwidth, _is_dram=False, _is_cell=False, _is_wl_tr=False, _is_sleep_tx=False): - if not _is_dram and _is_cell: - dt = g_tp.sram_cell # SRAM cell access transistor - elif _is_dram and _is_wl_tr: - dt = g_tp.dram_wl # DRAM wordline transistor - elif _is_sleep_tx: - dt = g_tp.sleep_tx # Sleep transistor - else: - dt = g_tp.peri_global # DRAM or SRAM all other transistors - - return pwidth * dt.I_off_p - -def cmos_Ig_n(nWidth, _is_dram=False, _is_cell=False, _is_wl_tr=False, _is_sleep_tx=False): - if not _is_dram and _is_cell: - dt = g_tp.sram_cell # SRAM cell access transistor - elif _is_dram and _is_wl_tr: - dt = g_tp.dram_wl # DRAM wordline transistor - elif _is_sleep_tx: - dt = g_tp.sleep_tx # Sleep transistor - else: - dt = g_tp.peri_global # DRAM or SRAM all other transistors - - return nWidth * dt.I_g_on_n - -def cmos_Ig_p(pWidth, _is_dram=False, _is_cell=False, _is_wl_tr=False, _is_sleep_tx=False): - if not _is_dram and _is_cell: - dt = g_tp.sram_cell # SRAM cell access transistor - elif _is_dram and _is_wl_tr: - dt = g_tp.dram_wl # DRAM wordline transistor - elif _is_sleep_tx: - dt = g_tp.sleep_tx # Sleep transistor - else: - dt = g_tp.peri_global # DRAM or SRAM all other transistors - - return pWidth * dt.I_g_on_p - -def cmos_Isub_leakage(nWidth, pWidth, fanin, g_type, _is_dram=False, _is_cell=False, _is_wl_tr=False, _is_sleep_tx=False, topo=series): - assert fanin >= 1 - nmos_leak = simplified_nmos_leakage(nWidth, _is_dram, _is_cell, _is_wl_tr, _is_sleep_tx) - pmos_leak = simplified_pmos_leakage(pWidth, _is_dram, _is_cell, _is_wl_tr, _is_sleep_tx) - Isub = 0 - num_states = int(math.pow(2.0, fanin)) - - if g_type == nmos: - if fanin == 1: - Isub = nmos_leak / num_states - else: - if topo == parallel: - Isub = nmos_leak * fanin / num_states - else: - for num_off_tx in range(1, fanin + 1): - Isub += nmos_leak * math.pow(UNI_LEAK_STACK_FACTOR, (num_off_tx - 1)) * combination(fanin, num_off_tx) - Isub /= num_states - - elif g_type == pmos: - if fanin == 1: - Isub = pmos_leak / num_states - else: - if topo == parallel: - Isub = pmos_leak * fanin / num_states - else: - for num_off_tx in range(1, fanin + 1): - Isub += pmos_leak * math.pow(UNI_LEAK_STACK_FACTOR, (num_off_tx - 1)) * combination(fanin, num_off_tx) - Isub /= num_states - - elif g_type == inv: - Isub = (nmos_leak + pmos_leak) / 2 - - elif g_type == nand: - Isub += fanin * pmos_leak - for num_off_tx in range(1, fanin + 1): - Isub += nmos_leak * math.pow(UNI_LEAK_STACK_FACTOR, (num_off_tx - 1)) * combination(fanin, num_off_tx) - Isub /= num_states - - elif g_type == nor: - for num_off_tx in range(1, fanin + 1): - Isub += pmos_leak * math.pow(UNI_LEAK_STACK_FACTOR, (num_off_tx - 1)) * combination(fanin, num_off_tx) - Isub += fanin * nmos_leak - Isub /= num_states - - elif g_type == tri: - Isub += (nmos_leak + pmos_leak) / 2 - Isub += nmos_leak * UNI_LEAK_STACK_FACTOR - Isub /= 2 - - elif g_type == tg: - Isub = (nmos_leak + pmos_leak) / 2 - - else: - raise ValueError("Invalid gate type") - - return Isub - -def cmos_Ig_leakage(nWidth, pWidth, fanin, g_type, _is_dram=False, _is_cell=False, _is_wl_tr=False, _is_sleep_tx=False, topo=series): - assert fanin >= 1 - nmos_leak = cmos_Ig_n(nWidth, _is_dram, _is_cell, _is_wl_tr, _is_sleep_tx) - pmos_leak = cmos_Ig_p(pWidth, _is_dram, _is_cell, _is_wl_tr, _is_sleep_tx) - Ig_on = 0 - num_states = int(math.pow(2.0, fanin)) - - if g_type == nmos: - if fanin == 1: - Ig_on = nmos_leak / num_states - else: - if topo == parallel: - for num_on_tx in range(1, fanin + 1): - Ig_on += nmos_leak * combination(fanin, num_on_tx) * num_on_tx - else: - Ig_on += nmos_leak * fanin - for num_on_tx in range(1, fanin): - Ig_on += nmos_leak * combination(fanin, num_on_tx) * num_on_tx / 2 - Ig_on /= num_states - - elif g_type == pmos: - if fanin == 1: - Ig_on = pmos_leak / num_states - else: - if topo == parallel: - for num_on_tx in range(1, fanin + 1): - Ig_on += pmos_leak * combination(fanin, num_on_tx) * num_on_tx - else: - Ig_on += pmos_leak * fanin - for num_on_tx in range(1, fanin): - Ig_on += pmos_leak * combination(fanin, num_on_tx) * num_on_tx / 2 - Ig_on /= num_states - - elif g_type == inv: - Ig_on = (nmos_leak + pmos_leak) / 2 - - elif g_type == nand: - for num_on_tx in range(1, fanin + 1): - Ig_on += pmos_leak * combination(fanin, num_on_tx) * num_on_tx - Ig_on += nmos_leak * fanin - for num_on_tx in range(1, fanin): - Ig_on += nmos_leak * combination(fanin, num_on_tx) * num_on_tx / 2 - Ig_on /= num_states - - elif g_type == nor: - Ig_on += pmos_leak * fanin - for num_on_tx in range(1, fanin): - Ig_on += pmos_leak * combination(fanin, num_on_tx) * num_on_tx / 2 - for num_on_tx in range(1, fanin + 1): - Ig_on += nmos_leak * combination(fanin, num_on_tx) * num_on_tx - Ig_on /= num_states - - elif g_type == tri: - Ig_on += (2 * nmos_leak + 2 * pmos_leak) / 2 - Ig_on += (nmos_leak + pmos_leak) / 2 - Ig_on /= 2 - - elif g_type == tg: - Ig_on = (nmos_leak + pmos_leak) / 2 - - else: - raise ValueError("Invalid gate type") - - return Ig_on - -def shortcircuit_simple(vt, velocity_index, c_in, c_out, w_nmos, w_pmos, i_on_n, i_on_p, i_on_n_in, i_on_p_in, vdd): - fo_n = i_on_n / i_on_n_in - fo_p = i_on_p / i_on_p_in - fanout = c_out / c_in - beta_ratio = i_on_p / i_on_n - vt_to_vdd_ratio = vt / vdd - - p_short_circuit_discharge_low = (10 / 3) * (pow((vdd - vt) - vt_to_vdd_ratio, 3.0) / pow(velocity_index, 2.0) / pow(2.0, 3 * vt_to_vdd_ratio * vt_to_vdd_ratio)) * c_in * vdd * vdd * fo_p * fo_p / fanout / beta_ratio - p_short_circuit_charge_low = (10 / 3) * (pow((vdd - vt) - vt_to_vdd_ratio, 3.0) / pow(velocity_index, 2.0) / pow(2.0, 3 * vt_to_vdd_ratio * vt_to_vdd_ratio)) * c_in * vdd * vdd * fo_n * fo_n / fanout * beta_ratio - - p_short_circuit_discharge = p_short_circuit_discharge_low - p_short_circuit_charge = p_short_circuit_charge_low - p_short_circuit = (p_short_circuit_discharge + p_short_circuit_charge) / 2 - - return p_short_circuit - -def shortcircuit(vt, velocity_index, c_in, c_out, w_nmos, w_pmos, i_on_n, i_on_p, i_on_n_in, i_on_p_in, vdd): - fo_p = i_on_p / i_on_p_in - fanout = 1 - beta_ratio = i_on_p / i_on_n - e = 2.71828 - f_alpha = 1 / (velocity_index + 2) - velocity_index / (2 * (velocity_index + 3)) + velocity_index / (velocity_index + 4) * (velocity_index / 2 - 1) - k_v = 0.9 / 0.8 + (vdd - vt) / 0.8 * math.log(10 * (vdd - vt) / e) - g_v_alpha = (velocity_index + 1) * pow((1 - velocity_index), velocity_index) * pow((1 - velocity_index), velocity_index / 2) / f_alpha / pow((1 - velocity_index - velocity_index), (velocity_index / 2 + velocity_index + 2)) - h_v_alpha = pow(2, velocity_index) * (velocity_index + 1) * pow((1 - velocity_index), velocity_index) / pow((1 - velocity_index - velocity_index), (velocity_index + 1)) - - p_short_circuit_discharge = k_v * vdd * vdd * c_in * fo_p * fo_p / ((vdd - vt) * g_v_alpha * fanout * beta_ratio / 2 / k_v + h_v_alpha * fo_p) - return p_short_circuit_discharge - -def wire_resistance(resistivity, wire_width, wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter): - resistance = alpha_scatter * resistivity / ((wire_thickness - barrier_thickness - dishing_thickness) * (wire_width - 2 * barrier_thickness)) - return resistance - -def wire_capacitance(wire_width, wire_thickness, wire_spacing, ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant, fringe_cap): - vertical_cap = 2 * PERMITTIVITY_FREE_SPACE * vert_dielectric_constant * wire_width / ild_thickness - sidewall_cap = 2 * PERMITTIVITY_FREE_SPACE * miller_value * horiz_dielectric_constant * wire_thickness / wire_spacing - total_cap = vertical_cap + sidewall_cap + fringe_cap - return total_cap - -def tsv_resistance(resistivity, tsv_len, tsv_diam, tsv_contact_resistance): - resistance = resistivity * tsv_len / (math.pi * (tsv_diam / 2) ** 2) + tsv_contact_resistance - return resistance - -def tsv_capacitance(tsv_len, tsv_diam, tsv_pitch, dielec_thickness, liner_dielectric_constant, depletion_width): - e_si = PERMITTIVITY_FREE_SPACE * 11.9 - PI = math.pi - lateral_coupling_constant = 4.1 - diagonal_coupling_constant = 5.3 - - liner_cap = 2 * PI * PERMITTIVITY_FREE_SPACE * liner_dielectric_constant * tsv_len / math.log(1 + dielec_thickness / (tsv_diam / 2)) - depletion_cap = 2 * PI * e_si * tsv_len / math.log(1 + depletion_width / (dielec_thickness + tsv_diam / 2)) - self_cap = 1 / (1 / liner_cap + 1 / depletion_cap) - - lateral_coupling_cap = 0.4 * (0.225 * math.log(0.97 * tsv_len / tsv_diam) + 0.53) * e_si / (tsv_pitch - tsv_diam) * PI * tsv_diam * tsv_len - diagonal_coupling_cap = 0.4 * (0.225 * math.log(0.97 * tsv_len / tsv_diam) + 0.53) * e_si / (1.414 * tsv_pitch - tsv_diam) * PI * tsv_diam * tsv_len - - total_cap = self_cap + lateral_coupling_constant * lateral_coupling_cap + diagonal_coupling_constant * diagonal_coupling_cap - return total_cap - -def tsv_area(tsv_pitch): - return tsv_pitch ** 2 diff --git a/cacti-main/cacti_python/cacti_interface.py b/cacti-main/cacti_python/cacti_interface.py index 30ce2f0..cf24a4b 100644 --- a/cacti-main/cacti_python/cacti_interface.py +++ b/cacti-main/cacti_python/cacti_interface.py @@ -230,29 +230,56 @@ def __init__(self): def find_delay(self): data_arr = self.data_array2 tag_arr = self.tag_array2 - - print("uca_org_t find_delay 0") - if g_ip.pure_ram or g_ip.pure_cam or g_ip.fully_assoc: - print("pure ram") self.access_time = data_arr.access_time - elif g_ip.fast_access: - print("fast_access") - self.access_time = symbolic_convex_max(tag_arr.access_time, data_arr.access_time) - elif g_ip.is_seq_acc: - print("seq_acc") - self.access_time = tag_arr.access_time + data_arr.access_time + if(g_ip.pure_ram): + if (g_ip.is_main_mem): + self.access_time *= 10e6 / 2 + else: + self.access_time *= 10e6 / 4 + else: + self.access_time *= 2 else: - print("else") - self.access_time = symbolic_convex_max(tag_arr.access_time + data_arr.delay_senseamp_mux_decoder, - data_arr.delay_before_subarray_output_driver) + data_arr.delay_from_subarray_output_driver_to_output - print("uca_org_t find_delay 1") + if g_ip.fast_access: + self.access_time = symbolic_convex_max(tag_arr.access_time, data_arr.access_time) + elif g_ip.is_seq_acc: + self.access_time = tag_arr.access_time + data_arr.access_time + else: + self.access_time = symbolic_convex_max(tag_arr.access_time + data_arr.delay_senseamp_mux_decoder, + data_arr.delay_before_subarray_output_driver) + data_arr.delay_from_subarray_output_driver_to_output + + if (g_ip.is_main_mem): + self.access_time *= 10e6 / 2 + else: + self.access_time *= 10e6 / 4 + + def find_energy(self): if not (g_ip.pure_ram or g_ip.pure_cam or g_ip.fully_assoc): self.power = self.data_array2.power + self.tag_array2.power + # self.power.readOp.dynamic *= 3e-1 + # self.power.writeOp.dynamic *= 3e-1 + # self.power.readOp.leakage *= 1e-3 else: self.power = self.data_array2.power + if g_ip.pure_ram: + self.power.readOp.dynamic *= 5e-4 + self.power.writeOp.dynamic *= 5e-4 + self.power.readOp.leakage *= 5 + elif g_ip.fully_assoc: + self.power.readOp.dynamic *= 15e-5 + self.power.writeOp.dynamic *= 15e-5 + self.power.readOp.leakage *= 5e-3 + + if g_ip.is_main_mem: + self.power.readOp.dynamic *= 3 + self.power.writeOp.dynamic *= 3 + self.power.readOp.leakage /= 2 + + self.power.readOp.dynamic *= 1e9 + self.power.writeOp.dynamic *= 1e9 + self.power.readOp.leakage *= 1e3 def find_area(self): if g_ip.pure_ram or g_ip.pure_cam or g_ip.fully_assoc: diff --git a/cacti-main/cacti_python/component.py b/cacti-main/cacti_python/component.py index 894f62d..91c7380 100644 --- a/cacti-main/cacti_python/component.py +++ b/cacti-main/cacti_python/component.py @@ -23,16 +23,21 @@ def compute_diffusion_width(num_stacked_in, num_folded_tr): w_poly = g_ip.F_sz_um spacing_poly_to_poly = g_tp.w_poly_contact + 2 * g_tp.spacing_poly_to_contact - # TODO RECENTLY COMMENTED - total_diff_w = sp.Piecewise( - (2 * spacing_poly_to_poly + num_stacked_in * w_poly + (num_stacked_in - 1) * g_tp.spacing_poly_to_poly, num_folded_tr <= 1), - (2 * spacing_poly_to_poly + num_stacked_in * w_poly + (num_stacked_in - 1) * g_tp.spacing_poly_to_poly + - (num_folded_tr - 2) * 2 * spacing_poly_to_poly + - (num_folded_tr - 1) * num_stacked_in * w_poly + - (num_folded_tr - 1) * (num_stacked_in - 1) * g_tp.spacing_poly_to_poly, num_folded_tr > 1) - ) + # Change: Relational - set to one option to reduce expression size + # total_diff_w = sp.Piecewise( + # (2 * spacing_poly_to_poly + num_stacked_in * w_poly + (num_stacked_in - 1) * g_tp.spacing_poly_to_poly, num_folded_tr <= 1), + # (2 * spacing_poly_to_poly + num_stacked_in * w_poly + (num_stacked_in - 1) * g_tp.spacing_poly_to_poly + + # (num_folded_tr - 2) * 2 * spacing_poly_to_poly + + # (num_folded_tr - 1) * num_stacked_in * w_poly + + # (num_folded_tr - 1) * (num_stacked_in - 1) * g_tp.spacing_poly_to_poly, num_folded_tr > 1) + # ) + + total_diff_w = 2 * spacing_poly_to_poly + num_stacked_in * w_poly + (num_stacked_in - 1) * g_tp.spacing_poly_to_poly + \ + (num_folded_tr - 2) * 2 * spacing_poly_to_poly + \ + (num_folded_tr - 1) * num_stacked_in * w_poly + \ + (num_folded_tr - 1) * (num_stacked_in - 1) * g_tp.spacing_poly_to_poly + # total_diff_w = 2 * spacing_poly_to_poly + num_stacked_in * w_poly + (num_stacked_in - 1) * g_tp.spacing_poly_to_poly - # TODO Important can't do this symbolically # total_diff_w = (2 * spacing_poly_to_poly + # for both source and drain # num_stacked_in * w_poly + # (num_stacked_in - 1) * g_tp.spacing_poly_to_poly) @@ -44,75 +49,22 @@ def compute_diffusion_width(num_stacked_in, num_folded_tr): return total_diff_w def compute_gate_area(gate_type, num_inputs, w_pmos, w_nmos, h_gate): - #TODO IMPORTANT this can't be done synbolically - - # TODO inverstiage Why is w_pmos and w_nmos 0 - # Traceback (most recent call last): - # File "/Users/dw/Documents/codesign/cacti/diff/second/main.py", line 44, in - # mat = Mat(dyn_p) - # ^^^^^^^^^^ - # File "/Users/dw/Documents/codesign/cacti/diff/second/mat.py", line 164, in __init__ - # self.r_predec_blk1 = PredecBlk(num_dec_signals, self.row_dec, C_wire_predec_blk_out, R_wire_predec_blk_out, self.num_subarrays_per_mat, self.is_dram, True) - # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - # File "/Users/dw/Documents/codesign/cacti/diff/second/decoder.py", line 271, in __init__ - # self.compute_area() - # File "/Users/dw/Documents/codesign/cacti/diff/second/decoder.py", line 396, in compute_area - # tot_area_L1_nand2 = compute_gate_area(NAND, 2, self.w_L1_nand2_p[0], self.w_L1_nand2_n[0], g_tp.cell_h_def) - # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - # File "/Users/dw/Documents/codesign/cacti/diff/second/component.py", line 39, in compute_gate_area - # if w_pmos <= 0.0 or w_nmos <= 0.0: - # ^^^^^^^^^^^^^ - # File "/Users/dw/miniconda3/lib/python3.11/site-packages/sympy/core/relational.py", line 510, in __bool__ - # raise TypeError("cannot determine truth value of Relational") - - # if w_pmos <= 0.0 or w_nmos <= 0.0: - # return 0.0 - - if isinstance(w_pmos, (int, float)) and isinstance(w_nmos, (int, float)): - if w_pmos <= 0.0 or w_nmos <= 0.0: + # Relational + if w_pmos <= 0.0 or w_nmos <= 0.0: return 0.0 - - print("compute_gate_area CHECKPINT 0") - # TODO RELATIONAL - # simplify_w_pmos = sp.simplify(w_pmos) - # simplify_w_nmos = sp.simplify(w_nmos) - # if simplify_w_pmos.is_zero or simplify_w_pmos.is_negative or simplify_w_nmos.is_zero or simplify_w_nmos.is_negative: - # return 0.0 - - print("compute_gate_area CHECKPINT 1") - - # print(f"w_pmos {w_pmos} and w_nmos {w_nmos}") h_tr_region = h_gate - 2 * g_tp.HPOWERRAIL ratio_p_to_n = w_pmos / (w_pmos + w_nmos) - # TODO IMPORTANT this can't be done synbolically - # simplify_ratio_p_to_n = sp.simplify(ratio_p_to_n) + # Relational resolved with 'result' below # if ratio_p_to_n >= 1 or ratio_p_to_n <= 0: - # return 0.0 - # if sp.Or(ratio_p_to_n >= 1 or ratio_p_to_n <= 0): - # return 0.0 - # if simplify_ratio_p_to_n.is_integer and (simplify_ratio_p_to_n >= 1 or simplify_ratio_p_to_n <= 0): - # return 0.0 - if isinstance(ratio_p_to_n, (int, float)): - if ratio_p_to_n <= 0 or ratio_p_to_n >= 1: - return 0.0 - - - - print("compute_gate_area CHECKPINT 2") + # return 0.0 - w_folded_pmos = (h_tr_region - g_tp.MIN_GAP_BET_P_AND_N_DIFFS) * ratio_p_to_n w_folded_nmos = (h_tr_region - g_tp.MIN_GAP_BET_P_AND_N_DIFFS) * (1 - ratio_p_to_n) - # TODO IMPORTANT this can't be done synbolically + # Relational # assert w_folded_pmos > 0 - if(w_folded_pmos == 0): - return 0 - if isinstance(w_folded_pmos, (int, float)): - if w_folded_pmos <= 0: - return 0 num_folded_pmos = sp.ceiling(w_pmos / w_folded_pmos) num_folded_nmos = sp.ceiling(w_nmos / w_folded_nmos) @@ -131,21 +83,20 @@ def compute_gate_area(gate_type, num_inputs, w_pmos, w_nmos, h_gate): sys.exit(1) gate_w = symbolic_convex_max(total_ndiff_w, total_pdiff_w) - - # TODO Important can't do this symbolically - # if w_folded_nmos > w_nmos: - # gate_h = (w_nmos + w_pmos + g_tp.MIN_GAP_BET_P_AND_N_DIFFS + 2 * g_tp.HPOWERRAIL) - # else: - # gate_h = h_gate - - # TODO RECENTLY COMMENTED - gate_h = sp.Piecewise( - ((w_nmos + w_pmos + g_tp.MIN_GAP_BET_P_AND_N_DIFFS + 2 * g_tp.HPOWERRAIL), w_folded_nmos > w_nmos), - (h_gate, True) # TODO CHECK Else case + + # Change: Relational - set to one option to reduce expression size + # gate_h = sp.Piecewise( + # ((w_nmos + w_pmos + g_tp.MIN_GAP_BET_P_AND_N_DIFFS + 2 * g_tp.HPOWERRAIL), w_folded_nmos > w_nmos), + # (h_gate, True) + # ) + gate_h = (w_nmos + w_pmos + g_tp.MIN_GAP_BET_P_AND_N_DIFFS + 2 * g_tp.HPOWERRAIL) + + result = sp.Piecewise( + (0, sp.Or(ratio_p_to_n >= 1, ratio_p_to_n <= 0)), + (gate_w * gate_h, True ) ) - # gate_h = (w_nmos + w_pmos + g_tp.MIN_GAP_BET_P_AND_N_DIFFS + 2 * g_tp.HPOWERRAIL) - return gate_w * gate_h # Assuming area is width * height + return result def compute_tr_width_after_folding(input_width, threshold_folding_width): if input_width <= 0: @@ -170,66 +121,49 @@ def height_sense_amplifier(pitch_sense_amp): return h_pmos_tr + h_nmos_tr + g_tp.MIN_GAP_BET_P_AND_N_DIFFS + def logical_effort(num_gates_min, g, F, w_n, w_p, C_load, p_to_n_sz_ratio, is_dram_, is_wl_tr_, max_w_nmos): - #TODO deleted int - # print(f'F is this {F}') - # print(f'F is this {fopt}') - num_gates = sp.log(F) / sp.log(fopt) - if(F == 0): - num_gates = 2 - # print(f'num_gates is this {num_gates}') - # print("End") - # print("") - - # TODO MOD Important - # num_gates += (num_gates % 2) - num_gates += (num_gates + 2) - num_gates = symbolic_convex_max(num_gates, num_gates_min) + # num_gates = sp.log(F) / sp.log(fopt) + # if(F == 0): + # num_gates = 4 + # else: + # num_gates = 4 + num_gates = 4 f = sp.Pow(F, 1.0 / num_gates) - num_gates = 4 # TODO IMPORTANT this can't be done synbolically i = num_gates - 1 - #TODO IMPORTANT this can't be done synbolically - # i = 2 - #print(f'OH NOES! {i}') + if (f == 0): + f = 1 C_in = C_load / f - # print(f'I is {i}') + w_n[i] = (1.0 / (1.0 + p_to_n_sz_ratio)) * C_in / gate_C(1, 0, is_dram_, False, is_wl_tr_) - # print(f'w_n[i] is this {w_n[i]}') - # print(f'min_w_nmos_ is this {g_tp.min_w_nmos_}') - # print("End") - # print("") - #TODO important nan shortcut - if not contains_any_symbol(w_n[i]) and math.isnan(w_n[i]): - w_n[i] = 0 + + # RECENT CHANGE: Max - ignore to reduce expression length w_n[i] = symbolic_convex_max(w_n[i], g_tp.min_w_nmos_) + w_p[i] = p_to_n_sz_ratio * w_n[i] - #TODO IMPORTANT SINCE RELATIONAL + # CHANGE: ARRAY LOGIC + # #TODO IMPORTANT SINCE RELATIONAL # if w_n[i] > max_w_nmos: - # print(f'OH NOES p_to_n_sz_ratio! {p_to_n_sz_ratio}') - # print(f'OH NOES max_w_nmos! {max_w_nmos}') # C_ld = gate_C((1 + p_to_n_sz_ratio) * max_w_nmos, 0, is_dram_, False, is_wl_tr_) - # print(f'OH NOES C_ld! {C_ld}') # F = g * C_ld / gate_C(w_n[0] + w_p[0], 0, is_dram_, False, is_wl_tr_) - # print(f'OH NOES F! {F}') - # #TODO deleted int - # num_gates = sp.log(F) / sp.log(fopt) + 1 - # num_gates += (num_gates % 2) - # print(f'OH NOES num_gates! {num_gates}') - # num_gates = symbolic_convex_max(num_gates, num_gates_min) + + # num_gates += 2 # f = sp.Pow(F, 1.0 / (num_gates - 1)) # i = num_gates - 1 # w_n[i] = max_w_nmos # w_p[i] = p_to_n_sz_ratio * w_n[i] for i in range(num_gates - 2, 0, -1): - #TODO important zoo shortcut w_item = w_n[i + 1] / f if w_item == sp.zoo: w_item = 0 - - w_n[i] = symbolic_convex_max(w_item, g_tp.min_w_nmos_) + + # RECENT CHANGE: Max - ignore to reduce expression length + # w_n[i] = symbolic_convex_max(w_item, g_tp.min_w_nmos_) + w_n[i] = w_item + w_p[i] = p_to_n_sz_ratio * w_n[i] assert num_gates <= MAX_NUMBER_GATES_STAGE @@ -237,16 +171,13 @@ def logical_effort(num_gates_min, g, F, w_n, w_p, C_load, p_to_n_sz_ratio, is_dr def compute_tr_width_after_folding(input_width, threshold_folding_width): - # TODO can't do relational - # if input_width <= 0: - # return 0 + # CHANGE: RELATIONAL: this function either returns 0 or result if isinstance(input_width, (int, float)): if input_width <= 0: return 0 - # print(f"input_widht {input_width}") - # print(f"thresh {threshold_folding_width}") + # CHANGE: RELATIONAL # num_folded_tr = sp.ceiling(input_width / threshold_folding_width) # spacing_poly_to_poly = g_tp.w_poly_contact + 2 * g_tp.spacing_poly_to_contact # width_poly = g_ip.F_sz_um @@ -255,10 +186,10 @@ def compute_tr_width_after_folding(input_width, threshold_folding_width): # return total_diff_width result = sp.Piecewise( - (0, input_width <= 0), # Return 0 if input_width <= 0 + (0, input_width <= 0), (sp.ceiling(input_width / threshold_folding_width) * g_ip.F_sz_um + (sp.ceiling(input_width / threshold_folding_width) + 1) * (g_tp.w_poly_contact + 2 * g_tp.spacing_poly_to_contact), - True) # TODO CHECKCalculate total_diff_width otherwise + True) ) return result \ No newline at end of file diff --git a/cacti-main/cacti_python/const.py b/cacti-main/cacti_python/const.py index b56e42b..86f6b6a 100644 --- a/cacti-main/cacti_python/const.py +++ b/cacti-main/cacti_python/const.py @@ -45,9 +45,9 @@ MINSUBARRAYCOLS = 2 MAXSUBARRAYCOLS = 262144 -INV = 0 -NOR = 1 -NAND = 2 +INV = "INV" +NOR = "NOR" +NAND = "NAND" NUMBER_TECH_FLAVORS = 4 NUMBER_INTERCONNECT_PROJECTION_TYPES = 2 diff --git a/cacti-main/cacti_python/decoder.py b/cacti-main/cacti_python/decoder.py index 190a4b8..d02148c 100644 --- a/cacti-main/cacti_python/decoder.py +++ b/cacti-main/cacti_python/decoder.py @@ -8,6 +8,7 @@ from .powergating import SleepTx from .parameter import g_tp from .parameter import g_ip +import time class Decoder(Component): def __init__(self, _num_dec_signals, flag_way_select, _C_ld_dec_out, _R_wire_dec_out, fully_assoc_, is_dram_, is_wl_tr_, cell_): @@ -29,10 +30,9 @@ def __init__(self, _num_dec_signals, flag_way_select, _C_ld_dec_out, _R_wire_dec self.w_dec_n = [0] * MAX_NUMBER_GATES_STAGE self.w_dec_p = [0] * MAX_NUMBER_GATES_STAGE - # TODO RELATIONAL - _num_dec_signals = _num_dec_signals - num_addr_bits_dec = sp.log(_num_dec_signals, 2) + num_addr_bits_dec = _log2(_num_dec_signals) + # Relational # if num_addr_bits_dec < 4: # if flag_way_select: # self.exist = True @@ -46,22 +46,13 @@ def __init__(self, _num_dec_signals, flag_way_select, _C_ld_dec_out, _R_wire_dec # else: # self.num_in_signals = 2 - # TODO increases length - self.exist = sp.Piecewise( - (True, num_addr_bits_dec < 4), # self.exist is True if num_addr_bits_dec < 4 - (self.exist, True) # self.exist is True otherwise - ) - self.num_in_signals = sp.Piecewise( - (2, sp.And(num_addr_bits_dec < 4, flag_way_select)), # self.num_in_signals = 2 if num_addr_bits_dec < 4 and flag_way_select - (0, sp.And(num_addr_bits_dec < 4, not flag_way_select)), # self.num_in_signals = 0 if num_addr_bits_dec < 4 and not flag_way_select - (3, sp.And(num_addr_bits_dec >= 4, flag_way_select)), # self.num_in_signals = 3 if num_addr_bits_dec >= 4 and flag_way_select - (2, num_addr_bits_dec >= 4, True) # self.num_in_signals = 2 if num_addr_bits_dec >= 4 and not flag_way_select + (2, sp.And(num_addr_bits_dec < 4, flag_way_select != 0)), # self.num_in_signals = 2 if num_addr_bits_dec < 4 and flag_way_select + (0, sp.And(num_addr_bits_dec < 4, flag_way_select == 0)), # self.num_in_signals = 0 if num_addr_bits_dec < 4 and not flag_way_select + (3, sp.And(num_addr_bits_dec >= 4, flag_way_select != 0)), # self.num_in_signals = 3 if num_addr_bits_dec >= 4 and flag_way_select + (2, True) # self.num_in_signals = 2 if num_addr_bits_dec >= 4 and not flag_way_select ) - # self.exist = True - # self.num_in_signals = 3 - # assert self.cell.h > 0 # assert self.cell.w > 0 self.area.h = g_tp.h_dec * self.cell.h @@ -86,7 +77,8 @@ def compute_widths(self): F *= self.C_ld_dec_out / (gate_C(self.w_dec_n[0], 0, self.is_dram, False, self.is_wl_tr) + gate_C(self.w_dec_p[0], 0, self.is_dram, False, self.is_wl_tr)) - #print(f'BEORE CALL logical effort {g_tp.max_w_nmos_dec}') + + print("Made it to Decoder Logical Effort") self.num_gates = logical_effort( self.num_gates_min, gnand2 if self.num_in_signals == 2 else gnand3, @@ -99,6 +91,7 @@ def compute_widths(self): self.is_wl_tr, g_tp.max_w_nmos_dec ) + print("Made it past Decoder Logical Effort") def compute_area(self): cumulative_area = 0 @@ -256,8 +249,7 @@ def __init__(self, num_dec_signals, dec_, C_wire_predec_blk_out, R_wire_predec_b self.w_L2_n = [0] * MAX_NUMBER_GATES_STAGE self.w_L2_p = [0] * MAX_NUMBER_GATES_STAGE - print("PRE CHECKPOINT 0") - # TODO CHECK RELATIONAL + # CHANGE: RELATIONAL: set to default values, otherwise, expression will be too long # if is_blk1: # if num_addr_bits_dec <= 0: # return @@ -275,6 +267,7 @@ def __init__(self, num_dec_signals, dec_, C_wire_predec_blk_out, R_wire_predec_b # self.C_ld_predec_blk_out = branch_effort_predec_out * C_ld_dec_gate + C_wire_predec_blk_out # else: # if num_addr_bits_dec >= 4: + self.exist = True self.number_input_addr_bits = blk2_num_input_addr_bits branch_effort_predec_out = sp.Pow(2, blk1_num_input_addr_bits) #(1 << blk1_num_input_addr_bits) @@ -282,49 +275,45 @@ def __init__(self, num_dec_signals, dec_, C_wire_predec_blk_out, R_wire_predec_b self.R_wire_predec_blk_out = R_wire_predec_blk_out_ self.C_ld_predec_blk_out = branch_effort_predec_out * C_ld_dec_gate + C_wire_predec_blk_out - self.exist = sp.Piecewise( - (self.exist, sp.And(is_blk1, num_addr_bits_dec <= 0)), - (True, True) - ) + # self.exist = sp.Piecewise( + # (self.exist, sp.And(is_blk1, num_addr_bits_dec <= 0)), + # (True, True) + # ) - self.number_input_addr_bits = sp.Piecewise( - (num_addr_bits_dec, sp.And(is_blk1, num_addr_bits_dec < 4)), # self.number_input_addr_bits = num_addr_bits_dec if is_blk1 and 0 < num_addr_bits_dec < 4 - (blk1_num_input_addr_bits, sp.And(is_blk1, num_addr_bits_dec >= 4)), # self.number_input_addr_bits = blk1_num_input_addr_bits if is_blk1 and num_addr_bits_dec >= 4 - (blk2_num_input_addr_bits, True) # self.number_input_addr_bits = blk2_num_input_addr_bits if not is_blk1 - ) + # self.number_input_addr_bits = sp.Piecewise( + # (num_addr_bits_dec, sp.And(is_blk1, num_addr_bits_dec < 4)), # self.number_input_addr_bits = num_addr_bits_dec if is_blk1 and 0 < num_addr_bits_dec < 4 + # (blk1_num_input_addr_bits, sp.And(is_blk1, num_addr_bits_dec >= 4)), # self.number_input_addr_bits = blk1_num_input_addr_bits if is_blk1 and num_addr_bits_dec >= 4 + # (blk2_num_input_addr_bits, True) # self.number_input_addr_bits = blk2_num_input_addr_bits if not is_blk1 + # ) - branch_effort_predec_out = sp.Piecewise( - (sp.Pow(2, blk2_num_input_addr_bits), sp.And(is_blk1, num_addr_bits_dec >= 4)), # branch_effort_predec_out = 2^blk2_num_input_addr_bits if is_blk1 and num_addr_bits_dec >= 4 - (sp.Pow(2, blk1_num_input_addr_bits), True) - ) + # branch_effort_predec_out = sp.Piecewise( + # (sp.Pow(2, blk2_num_input_addr_bits), sp.And(is_blk1, num_addr_bits_dec >= 4)), # branch_effort_predec_out = 2^blk2_num_input_addr_bits if is_blk1 and num_addr_bits_dec >= 4 + # (sp.Pow(2, blk1_num_input_addr_bits), True) + # ) - C_ld_dec_gate = sp.Piecewise( - (num_dec_per_predec * gate_C(self.dec.w_dec_n[0] + self.dec.w_dec_p[0], 0, self.is_dram_, False, False), - num_addr_bits_dec >= 4), # C_ld_dec_gate calculation based on conditions - (1, True) # C_ld_dec_gate = 13/10 if num_addr_bits_dec <= 0 - ) + # C_ld_dec_gate = sp.Piecewise( + # (num_dec_per_predec * gate_C(self.dec.w_dec_n[0] + self.dec.w_dec_p[0], 0, self.is_dram_, False, False), + # num_addr_bits_dec >= 4), # C_ld_dec_gate calculation based on conditions + # (1, True) # C_ld_dec_gate = 13/10 if num_addr_bits_dec <= 0 + # ) - self.R_wire_predec_blk_out = sp.Piecewise( - (self.dec.R_wire_dec_out, num_addr_bits_dec < 4), # self.R_wire_predec_blk_out based on conditions - (R_wire_predec_blk_out_, True), # self.R_wire_predec_blk_out = 3/2 if num_addr_bits_dec <= 0 - ) + # self.R_wire_predec_blk_out = sp.Piecewise( + # (self.dec.R_wire_dec_out, num_addr_bits_dec < 4), # self.R_wire_predec_blk_out based on conditions + # (R_wire_predec_blk_out_, True), # self.R_wire_predec_blk_out = 3/2 if num_addr_bits_dec <= 0 + # ) - self.C_ld_predec_blk_out = sp.Piecewise( - (self.dec.C_ld_dec_out, num_addr_bits_dec < 4), # self.C_ld_predec_blk_out based on conditions - (branch_effort_predec_out * C_ld_dec_gate + C_wire_predec_blk_out, True) # self.C_ld_predec_blk_out = 13/10 if num_addr_bits_dec <= 0 - ) + # self.C_ld_predec_blk_out = sp.Piecewise( + # (self.dec.C_ld_dec_out, num_addr_bits_dec < 4), # self.C_ld_predec_blk_out based on conditions + # (branch_effort_predec_out * C_ld_dec_gate + C_wire_predec_blk_out, True) # self.C_ld_predec_blk_out = 13/10 if num_addr_bits_dec <= 0 + # ) - print("PRE CHECKPOINT 1") self.compute_widths() - print("PRE CHECKPOINT 2") self.compute_area() - print("PRE CHECKPOINT 3") def compute_widths(self): - print("huh?") if not self.exist: return - print("huh1?") + p_to_n_sz_ratio = pmos_to_nmos_sz_ratio(self.is_dram_) gnand2 = (2 + p_to_n_sz_ratio) / (1 + p_to_n_sz_ratio) gnand3 = (3 + p_to_n_sz_ratio) / (1 + p_to_n_sz_ratio) @@ -332,7 +321,6 @@ def compute_widths(self): flag_L2_gate = 0 number_inputs_L1_gate = 0 - print(f'number input addr_bits {self.number_input_addr_bits}') if self.number_input_addr_bits == 1: flag_two_unique_paths = False number_inputs_L1_gate = 2 @@ -376,7 +364,6 @@ def compute_widths(self): flag_L2_gate = 3 branch_effort_nand3_gate_output = 64 else: - # TODO this is being reached but why? flag_two_unique_paths = False number_inputs_L1_gate = 2 flag_L2_gate = 2 @@ -386,11 +373,7 @@ def compute_widths(self): self.number_inputs_L1_gate = number_inputs_L1_gate self.flag_L2_gate = flag_L2_gate - print(f'PREDEC Compute widths: end') - if flag_L2_gate: - print(f'PREDEC Compute widths: flagL2_gate') - print(f'PREDEC Compute widths: {g_tp.min_w_nmos_}, {p_to_n_sz_ratio}') if flag_L2_gate == 2: self.w_L2_n[0] = 2 * g_tp.min_w_nmos_ F = gnand2 @@ -398,10 +381,7 @@ def compute_widths(self): self.w_L2_n[0] = 3 * g_tp.min_w_nmos_ F = gnand3 self.w_L2_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_ - print(f"F0 before call to logical_effort {F}") - print(f"self.C_ld_predec_blk_out before call to logical_effort {self.C_ld_predec_blk_out}") F *= self.C_ld_predec_blk_out / (gate_C(self.w_L2_n[0], 0, self.is_dram_) + gate_C(self.w_L2_p[0], 0, self.is_dram_)) - print(f"F1 before call to logical_effort {F}") self.number_gates_L2 = logical_effort( self.min_number_gates_L2, gnand2 if flag_L2_gate == 2 else gnand3, @@ -450,8 +430,6 @@ def compute_widths(self): g_tp.max_w_nmos_ ) else: - print(f'PREDEC Compute widths: else flagL2_gate') - print(f'PREDEC Compute widths: {g_tp.min_w_nmos_}, {p_to_n_sz_ratio}') if self.number_inputs_L1_gate == 2: self.w_L1_nand2_n[0] = 2 * g_tp.min_w_nmos_ self.w_L1_nand2_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_ @@ -467,7 +445,6 @@ def compute_widths(self): self.is_dram_, False, g_tp.max_w_nmos_ ) - print(f'PREDEC In Input L1 gate 2: {self.w_L1_nand2_n[0]}, {self.w_L1_nand2_p[0]}') elif self.number_inputs_L1_gate == 3: self.w_L1_nand3_n[0] = 3 * g_tp.min_w_nmos_ self.w_L1_nand3_p[0] = p_to_n_sz_ratio * g_tp.min_w_nmos_ @@ -483,7 +460,6 @@ def compute_widths(self): self.is_dram_, False, g_tp.max_w_nmos_ ) - print(f'PREDEC In Input L1 gate 3: {self.w_L1_nand2_n[0]}, {self.w_L1_nand2_p[0]}') def compute_area(self): if self.exist: @@ -494,14 +470,10 @@ def compute_area(self): leak_L1_nand3 = 0 gate_leak_L1_nand3 = 0 - print("compute_area CHECKPINT 0") - tot_area_L1_nand2 = compute_gate_area(NAND, 2, self.w_L1_nand2_p[0], self.w_L1_nand2_n[0], g_tp.cell_h_def) leak_L1_nand2 = cmos_Isub_leakage(self.w_L1_nand2_n[0], self.w_L1_nand2_p[0], 2, nand, self.is_dram_) gate_leak_L1_nand2 = cmos_Ig_leakage(self.w_L1_nand2_n[0], self.w_L1_nand2_p[0], 2, nand, self.is_dram_) - print("compute_area CHECKPINT 1") - if self.number_inputs_L1_gate != 3: tot_area_L1_nand3 = 0 leak_L1_nand3 = 0 @@ -560,19 +532,10 @@ def compute_area(self): self.num_L1_active_nand2_path = 0 self.num_L1_active_nand3_path = 3 - print("compute_area CHECKPINT 2") - - print(self.number_gates_L1_nand2_path) for i in range(1, self.number_gates_L1_nand2_path): - print("compute_area CHECKPINT 2.1") tot_area_L1_nand2 += compute_gate_area(INV, 1, self.w_L1_nand2_p[i], self.w_L1_nand2_n[i], g_tp.cell_h_def) - print("compute_area CHECKPINT 2.2") leak_L1_nand2 += cmos_Isub_leakage(self.w_L1_nand2_n[i], self.w_L1_nand2_p[i], 2, nand, self.is_dram_) - print("compute_area CHECKPINT 2.3") gate_leak_L1_nand2 += cmos_Ig_leakage(self.w_L1_nand2_n[i], self.w_L1_nand2_p[i], 2, nand, self.is_dram_) - print("compute_area CHECKPINT 2.4") - - print("compute_area CHECKPINT 3") tot_area_L1_nand2 *= num_L1_nand2 leak_L1_nand2 *= num_L1_nand2 @@ -587,8 +550,6 @@ def compute_area(self): leak_L1_nand3 *= num_L1_nand3 gate_leak_L1_nand3 *= num_L1_nand3 - print("compute_area CHECKPINT 4") - cumulative_area_L1 = tot_area_L1_nand2 + tot_area_L1_nand3 cumulative_area_L2 = 0.0 leakage_L2 = 0.0 @@ -603,15 +564,11 @@ def compute_area(self): leakage_L2 = cmos_Isub_leakage(self.w_L2_n[0], self.w_L2_p[0], 3, nand, self.is_dram_) gate_leakage_L2 = cmos_Ig_leakage(self.w_L2_n[0], self.w_L2_p[0], 3, nand, self.is_dram_) - print("compute_area CHECKPINT 5") - for i in range(1, self.number_gates_L2): cumulative_area_L2 += compute_gate_area(INV, 1, self.w_L2_p[i], self.w_L2_n[i], g_tp.cell_h_def) leakage_L2 += cmos_Isub_leakage(self.w_L2_n[i], self.w_L2_p[i], 2, inv, self.is_dram_) gate_leakage_L2 += cmos_Ig_leakage(self.w_L2_n[i], self.w_L2_p[i], 2, inv, self.is_dram_) - print("compute_area CHECKPINT 6") - cumulative_area_L2 *= num_L2 leakage_L2 *= num_L2 gate_leakage_L2 *= num_L2 @@ -625,26 +582,14 @@ def compute_area(self): self.power_L2.readOp.gate_leakage = gate_leakage_L2 * g_tp.peri_global.Vdd def compute_delays(self, inrisetime): - print(f'PredecBLK {inrisetime}') ret_val = (0, 0) if self.exist: - print("self.exist") Vdd = g_tp.peri_global.Vdd inrisetime_nand2_path = inrisetime[0] inrisetime_nand3_path = inrisetime[1] - - print("flag 0") - - # print(f'debug_num_addr_bits_dec {self.debug_num_addr_bits_dec}') - # print(f'blk1_num_input_addr_bits {self.debug_blk1_num_input_addr_bits}') - # print(f'blk2_num_input_addr_bits {self.debug_blk2_num_input_addr_bits}') - - # print(f'number input addr_bits {self.number_input_addr_bits}') - # print(f'numberinputs L1 gate: {self.number_inputs_L1_gate}') if self.flag_two_unique_paths or self.number_inputs_L1_gate == 2: - print("flag 1") rd = tr_R_on(self.w_L1_nand2_n[0], NCH, 2, self.is_dram_) c_load = gate_C(self.w_L1_nand2_n[1] + self.w_L1_nand2_p[1], 0.0, self.is_dram_) c_intrinsic = 2 * drain_C_(self.w_L1_nand2_p[0], PCH, 1, 1, g_tp.cell_h_def, self.is_dram_) + \ @@ -678,7 +623,6 @@ def compute_delays(self, inrisetime): self.power_nand2_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd if self.flag_two_unique_paths or self.number_inputs_L1_gate == 3: - print("flag 2") rd = tr_R_on(self.w_L1_nand3_n[0], NCH, 3, self.is_dram_) c_load = gate_C(self.w_L1_nand3_n[1] + self.w_L1_nand3_p[1], 0.0, self.is_dram_) c_intrinsic = 3 * drain_C_(self.w_L1_nand3_p[0], PCH, 1, 1, g_tp.cell_h_def, self.is_dram_) + \ @@ -710,7 +654,6 @@ def compute_delays(self, inrisetime): self.delay_nand3_path += this_delay ret_val = (ret_val[0], this_delay / (1.0 - 0.5)) self.power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * Vdd * Vdd - print("return Predeclk") return ret_val def leakage_feedback(self, temperature): @@ -979,8 +922,6 @@ def compute_delays(self, inrisetime_nand2_path, inrisetime_nand3_path): Vdd = g_tp.peri_global.Vdd if self.flag_driver_exists: - print(f"PRedecBLKDrv IN HERE") - # print(f"PREDECBLK DRV {self.number_gates_nand2_path}") for i in range(self.number_gates_nand2_path - 1): rd = tr_R_on(self.width_nand2_path_n[i], NCH, 1, self.is_dram) c_gate_load = gate_C(self.width_nand2_path_p[i + 1] + self.width_nand2_path_n[i + 1], 0.0, self.is_dram) @@ -992,7 +933,6 @@ def compute_delays(self, inrisetime_nand2_path, inrisetime_nand3_path): self.delay_nand2_path += this_delay inrisetime_nand2_path = this_delay / (1.0 - 0.5) self.power_nand2_path.readOp.dynamic += (c_gate_load + c_intrinsic) * 0.5 * Vdd * Vdd - # print(self.delay_nand2_path) if self.number_gates_nand2_path != 0: i = self.number_gates_nand2_path - 1 @@ -1028,9 +968,6 @@ def compute_delays(self, inrisetime_nand2_path, inrisetime_nand3_path): self.delay_nand3_path += this_delay ret_val = (ret_val[0], this_delay / (1.0 - 0.5)) self.power_nand3_path.readOp.dynamic += (c_intrinsic + c_load) * 0.5 * Vdd * Vdd - print("compute Predecblkdrv delay") - print(f"PREDECBLKDRV (ret_val[0], this_delay / (1.0 - 0.5) {ret_val}") - print("HELLO the ret val should be set to the above") return ret_val def get_rdOp_dynamic_E(self, num_act_mats_hor_dir): @@ -1114,18 +1051,10 @@ def __init__(self, drv1, drv2): self.power.readOp.gate_leakage = self.driver_power.readOp.gate_leakage + self.block_power.readOp.gate_leakage def compute_delays(self, inrisetime): - # print("PREDECBLK COMPUTE DELAYS") tmp_pair1 = self.drv1.compute_delays(inrisetime, inrisetime) - # print(f'tmp_pair1 before PredecBLK compute_delay {tmp_pair1}') tmp_pair1 = self.blk1.compute_delays(tmp_pair1) tmp_pair2 = self.drv2.compute_delays(inrisetime, inrisetime) tmp_pair2 = self.blk2.compute_delays(tmp_pair2) - # print(" JUST COMPTUED DELAYS!") - # print(f"tmp_pair before max delay {tmp_pair1[0]}") - # print(f"tmp_pair before max delay {tmp_pair1[1]}") - # print(f"tmp_pair before max delay {tmp_pair2[0]}") - # print(f"tmp_pair before max delay {tmp_pair2[1]}") - # print("END") tmp_pair1 = self.get_max_delay_before_decoder(tmp_pair1, tmp_pair2) @@ -1143,8 +1072,6 @@ def compute_delays(self, inrisetime): self.power.readOp.dynamic = self.driver_power.readOp.dynamic + self.block_power.readOp.dynamic - print("past predec compute_delays") - # print(f"tmp_pair {tmp_pair1[0]}") self.delay = tmp_pair1[0] return tmp_pair1[1] @@ -1180,6 +1107,9 @@ def leakage_feedback(self, temperature): def get_max_delay_before_decoder(self, input_pair1, input_pair2): ret_val = [0, 0] + + # CHANGE: MAX: set to one option, otherwise, expression will be too long + # delay = self.drv1.delay_nand2_path + self.blk1.delay_nand2_path # ret_val[0] = delay # ret_val[1] = input_pair1[0] @@ -1196,39 +1126,16 @@ def get_max_delay_before_decoder(self, input_pair1, input_pair2): # ret_val[0] = delay # ret_val[1] = input_pair2[1] - # TODO MAX CHECK - print("INSIDE MAX FUNCTION") delay1 = self.drv1.delay_nand2_path + self.blk1.delay_nand2_path delay2 = self.drv1.delay_nand3_path + self.blk1.delay_nand3_path + delay3 = self.drv2.delay_nand2_path + self.blk2.delay_nand2_path delay4 = self.drv2.delay_nand3_path + self.blk2.delay_nand3_path - - # print(f"delay1{delay1})") - # print(f"delay2 {delay2}") - # print(f"delay3 {delay3}") - # print(f"delay4 {delay4}") - # TODO MAX CHECK - # max_delay = sp.Max(delay1, delay2, delay3, delay4) - # max_delay = symbolic_convex_max(delay1, delay2) - # max_delay = symbolic_convex_max(max_delay, delay3) - # max_delay = symbolic_convex_max(max_delay, delay4) - max_delay = delay2 - print("past max_delay set") + max_delay = delay2 # picked an option to reduce expression size ret_val[0] = max_delay - # print(f"input_pair1 {input_pair1}") - # print(f"input_pair2 {input_pair2}") - # print(f"max_delay {ret_val[0]}") - # TODO Piecewise doesn't work - # ret_val[1] = sp.Piecewise( - # (input_pair1[0], max_delay == delay1), - # (input_pair1[1], max_delay == delay2), - # (input_pair2[0], max_delay == delay3), - # (input_pair2[1], max_delay == delay4) - # ) ret_val[1] = input_pair1[0] - #print(f"input_pair {ret_val[1]}") return ret_val @@ -1295,23 +1202,17 @@ def compute_delay(self, inrisetime): for i in range(self.number_gates - 1): rd = tr_R_on(self.width_n[i], NCH, 1, self.is_dram_) - print("CHECKPOINT 6.52") c_load = gate_C(self.width_n[i + 1] + self.width_p[i + 1], 0.0, self.is_dram_) - print("CHECKPOINT 6.53") c_intrinsic = drain_C_(self.width_p[i], PCH, 1, 1, g_tp.cell_h_def, self.is_dram_) + \ drain_C_(self.width_n[i], NCH, 1, 1, g_tp.cell_h_def, self.is_dram_) - print("CHECKPOINT 6.54") tf = rd * (c_intrinsic + c_load) this_delay = horowitz(inrisetime, tf, 0.5, 0.5, RISE) - print("CHECKPOINT 6.55") self.delay += this_delay inrisetime = this_delay / (1.0 - 0.5) self.power.readOp.dynamic += (c_intrinsic + c_load) * g_tp.peri_global.Vdd * g_tp.peri_global.Vdd self.power.readOp.leakage += cmos_Isub_leakage(self.width_n[i], self.width_p[i], 1, inv, self.is_dram_) * g_tp.peri_global.Vdd self.power.readOp.gate_leakage += cmos_Ig_leakage(self.width_n[i], self.width_p[i], 1, inv, self.is_dram_) * g_tp.peri_global.Vdd - print("CHECKPOINT 6.6") - i = self.number_gates - 1 c_load = self.c_gate_load + self.c_wire_load rd = tr_R_on(self.width_n[i], NCH, 1, self.is_dram_) @@ -1332,9 +1233,5 @@ def compute_delay(self, inrisetime): # Helper functions and constants (placeholders for actual implementations) NAND = 'nand' INV = 'inv' -NCH = 'nch' -PCH = 'pch' -RISE = 'rise' - diff --git a/cacti-main/cacti_python/htree.py b/cacti-main/cacti_python/htree.py index c333ed8..c9efcf8 100644 --- a/cacti-main/cacti_python/htree.py +++ b/cacti-main/cacti_python/htree.py @@ -1,8 +1,9 @@ import math import enum -from .parameter import g_tp +from .parameter import g_tp, _log2 from .component import * from .wire import Wire +import time class HtreeType(enum.Enum): Add_htree = 1 @@ -46,13 +47,11 @@ def __init__(self, wire_model, mat_w, mat_h, a_bits, d_inbits, search_data_in, d assert self.ndbl >= 2 and self.ndwl >= 2 - self.max_unpipelined_link_delay = 0 # TODO + self.max_unpipelined_link_delay = 0 self.min_w_nmos = g_tp.min_w_nmos_ self.min_w_pmos = self.deviceType.n_to_p_eff_curr_drv_ratio * self.min_w_nmos - # TODO have to fix the HtreeType self.wire_bw = self.init_wire_bw = 0 - print(f"SEE TREE TYPE! {self.tree_type}") if self.tree_type == "Add_htree": self.wire_bw = self.init_wire_bw = self.add_bits @@ -83,7 +82,9 @@ def input_nand(self, s1, s2, l_eff): w1 = Wire(self.wt, l_eff) pton_size = self.deviceType.n_to_p_eff_curr_drv_ratio nsize = s1 * (1 + pton_size) / (2 + pton_size) - nsize = symbolic_convex_max(1, nsize) + + # CHANGE: LENGTH max ignored, otherwise, expression will be too long + # nsize = symbolic_convex_max(1, nsize) tc = 2 * tr_R_on(nsize * self.min_w_nmos, NCH, 1) * ( drain_C_(nsize * self.min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) * 2 + @@ -109,20 +110,36 @@ def output_buffer(self, s1, s2, l_eff): w1 = Wire(self.wt, l_eff) pton_size = self.deviceType.n_to_p_eff_curr_drv_ratio size = s1 * (1 + pton_size) / (2 + pton_size + 1 + 2 * pton_size) + s_eff = (gate_C(s2 * (self.min_w_nmos + self.min_w_pmos), 0) + w1.wire_cap(l_eff * 1e-6, True)) / gate_C(s2 * (self.min_w_nmos + self.min_w_pmos), 0) - tr_size = gate_C(s1 * (self.min_w_nmos + self.min_w_pmos), 0) * 1 / 2 / (s_eff * gate_C(self.min_w_pmos, 0)) - size = symbolic_convex_max(1, size) + if s_eff == sp.zoo: + s_eff = 1 + + tr_size = gate_C(s1 * (self.min_w_nmos + self.min_w_pmos), 0) / (2 * s_eff * gate_C(self.min_w_pmos, 0)) + + # CHANGE: MAX - avoiding max to decrease expression size + # size = symbolic_convex_max(1, size) res_nor = 2 * tr_R_on(size * self.min_w_pmos, PCH, 1) res_ptrans = tr_R_on(tr_size * self.min_w_nmos, NCH, 1) - cap_nand_out = drain_C_(size * self.min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) + drain_C_(size * self.min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) * 2 + gate_C(tr_size * self.min_w_pmos, 0) - cap_ptrans_out = 2 * (drain_C_(tr_size * self.min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + drain_C_(tr_size * self.min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)) + gate_C(s1 * (self.min_w_nmos + self.min_w_pmos), 0) + cap_nand_out = ( + drain_C_(size * self.min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) + + 2 * drain_C_(size * self.min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + + gate_C(tr_size * self.min_w_pmos, 0) + ) + cap_ptrans_out = ( + 2 * (drain_C_(tr_size * self.min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + drain_C_(tr_size * self.min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)) + + gate_C(s1 * (self.min_w_nmos + self.min_w_pmos), 0) + ) tc = res_nor * cap_nand_out + (res_nor + res_ptrans) * cap_ptrans_out - self.delay += horowitz(w1.out_rise_time, tc, - self.deviceType.Vth / self.deviceType.Vdd, self.deviceType.Vth / self.deviceType.Vdd, RISE) + self.delay += horowitz( + w1.out_rise_time, tc, + self.deviceType.Vth / self.deviceType.Vdd, self.deviceType.Vth / self.deviceType.Vdd, RISE + ) + # NAND self.power.readOp.dynamic += 0.5 * ( 2 * drain_C_(size * self.min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + drain_C_(size * self.min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) + @@ -135,6 +152,7 @@ def output_buffer(self, s1, s2, l_eff): gate_C(tr_size * self.min_w_pmos, 0) ) * self.deviceType.Vdd * self.deviceType.Vdd * self.init_wire_bw + # NOT self.power.readOp.dynamic += 0.5 * ( drain_C_(size * self.min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + drain_C_(size * self.min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) + @@ -147,6 +165,7 @@ def output_buffer(self, s1, s2, l_eff): gate_C(size * (self.min_w_nmos + self.min_w_pmos), 0) ) * self.deviceType.Vdd * self.deviceType.Vdd * self.init_wire_bw + # NOR self.power.readOp.dynamic += 0.5 * ( drain_C_(size * self.min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + 2 * drain_C_(size * self.min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) + @@ -159,15 +178,14 @@ def output_buffer(self, s1, s2, l_eff): gate_C(tr_size * (self.min_w_nmos + self.min_w_pmos), 0) ) * self.deviceType.Vdd * self.deviceType.Vdd * self.init_wire_bw + # Output transistor self.power.readOp.dynamic += 0.5 * ( - drain_C_(tr_size * self.min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + - drain_C_(tr_size * self.min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) * 2 + + 2 * (drain_C_(tr_size * self.min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + drain_C_(tr_size * self.min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)) + gate_C(s1 * (self.min_w_nmos + self.min_w_pmos), 0) ) * self.deviceType.Vdd * self.deviceType.Vdd self.power.searchOp.dynamic += 0.5 * ( - drain_C_(tr_size * self.min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + - drain_C_(tr_size * self.min_w_nmos, NCH, 1, 1, g_tp.cell_h_def) * 2 + + 2 * (drain_C_(tr_size * self.min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + drain_C_(tr_size * self.min_w_nmos, NCH, 1, 1, g_tp.cell_h_def)) + gate_C(s1 * (self.min_w_nmos + self.min_w_pmos), 0) ) * self.deviceType.Vdd * self.deviceType.Vdd * self.init_wire_bw @@ -190,49 +208,64 @@ def output_buffer(self, s1, s2, l_eff): def in_htree(self): # temp var - s1, s2, s3 = 0, 0, 0 + s1 = 0 + s2 = 0 + s3 = 0 l_eff = 0 - wtemp1, wtemp2, wtemp3 = None, None, None - len_temp, ht_temp = 0, 0 + wtemp1 = None + wtemp2 = None + wtemp3 = None + len_temp = 0 + ht_temp = 0 option = 0 - #TODO deleted ints - h = math.log2(self.ndwl / 2) # horizontal nodes - v = math.log2(self.ndbl / 2) # vertical nodes + # RECENT CHANGE + h = max(int(_log2(self.ndwl / 2)), 1) # horizontal nodes + v = max(int(_log2(self.ndbl / 2)), 1) # vertical nodes + if self.uca_tree: - # this computation does not consider the wires that route from edge to middle ht_temp = (self.mat_height * self.ndbl / 2 + - ((self.add_bits + self.data_in_bits + self.data_out_bits + (self.search_data_in_bits + self.search_data_out_bits)) * g_tp.wire_outside_mat.pitch * + ((self.add_bits + self.data_in_bits + self.data_out_bits + + (self.search_data_in_bits + self.search_data_out_bits)) * g_tp.wire_outside_mat.pitch * 2 * (1 - pow(0.5, h)))) / 2 len_temp = (self.mat_width * self.ndwl / 2 + - ((self.add_bits + self.data_in_bits + self.data_out_bits + (self.search_data_in_bits + self.search_data_out_bits)) * g_tp.wire_outside_mat.pitch * + ((self.add_bits + self.data_in_bits + self.data_out_bits + + (self.search_data_in_bits + self.search_data_out_bits)) * g_tp.wire_outside_mat.pitch * 2 * (1 - pow(0.5, v)))) / 2 else: if self.ndwl == self.ndbl: ht_temp = ((self.mat_height * self.ndbl / 2) + - ((self.add_bits + (self.search_data_in_bits + self.search_data_out_bits)) * (self.ndbl / 2 - 1) * g_tp.wire_outside_mat.pitch) + + ((self.add_bits + (self.search_data_in_bits + self.search_data_out_bits)) * + (self.ndbl / 2 - 1) * g_tp.wire_outside_mat.pitch) + ((self.data_in_bits + self.data_out_bits) * g_tp.wire_outside_mat.pitch * h)) / 2 len_temp = (self.mat_width * self.ndwl / 2 + - ((self.add_bits + (self.search_data_in_bits + self.search_data_out_bits)) * (self.ndwl / 2 - 1) * g_tp.wire_outside_mat.pitch) + + ((self.add_bits + (self.search_data_in_bits + self.search_data_out_bits)) * + (self.ndwl / 2 - 1) * g_tp.wire_outside_mat.pitch) + ((self.data_in_bits + self.data_out_bits) * g_tp.wire_outside_mat.pitch * v)) / 2 + elif self.ndwl > self.ndbl: - excess_part = (math.log2(self.ndwl / 2) - math.log2(self.ndbl / 2)) + excess_part = (_log2(self.ndwl / 2) - _log2(self.ndbl / 2)) ht_temp = ((self.mat_height * self.ndbl / 2) + - ((self.add_bits + (self.search_data_in_bits + self.search_data_out_bits)) * ((self.ndbl / 2 - 1) + excess_part) * g_tp.wire_outside_mat.pitch) + + ((self.add_bits + (self.search_data_in_bits + self.search_data_out_bits)) * + ((self.ndbl / 2 - 1) + excess_part) * g_tp.wire_outside_mat.pitch) + (self.data_in_bits + self.data_out_bits) * g_tp.wire_outside_mat.pitch * (2 * (1 - pow(0.5, h - v)) + pow(0.5, v - h) * v)) / 2 len_temp = (self.mat_width * self.ndwl / 2 + - ((self.add_bits + (self.search_data_in_bits + self.search_data_out_bits)) * (self.ndwl / 2 - 1) * g_tp.wire_outside_mat.pitch) + + ((self.add_bits + (self.search_data_in_bits + self.search_data_out_bits)) * + (self.ndwl / 2 - 1) * g_tp.wire_outside_mat.pitch) + ((self.data_in_bits + self.data_out_bits) * g_tp.wire_outside_mat.pitch * v)) / 2 else: - excess_part = (math.log2(self.ndbl / 2) - math.log2(self.ndwl / 2)) + excess_part = (_log2(self.ndbl / 2) - _log2(self.ndwl / 2)) ht_temp = ((self.mat_height * self.ndbl / 2) + - ((self.add_bits + (self.search_data_in_bits + self.search_data_out_bits)) * ((self.ndwl / 2 - 1) + excess_part) * g_tp.wire_outside_mat.pitch) + + ((self.add_bits + (self.search_data_in_bits + self.search_data_out_bits)) * + ((self.ndwl / 2 - 1) + excess_part) * g_tp.wire_outside_mat.pitch) + ((self.data_in_bits + self.data_out_bits) * g_tp.wire_outside_mat.pitch * h)) / 2 len_temp = (self.mat_width * self.ndwl / 2 + - ((self.add_bits + (self.search_data_in_bits + self.search_data_out_bits)) * ((self.ndwl / 2 - 1) + excess_part) * g_tp.wire_outside_mat.pitch) + - (self.data_in_bits + self.data_out_bits) * g_tp.wire_outside_mat.pitch * (h + 2 * (1 - pow(0.5, v - h)))) / 2 + ((self.add_bits + (self.search_data_in_bits + self.search_data_out_bits)) * + ((self.ndwl / 2 - 1) + excess_part) * g_tp.wire_outside_mat.pitch) + + (self.data_in_bits + self.data_out_bits) * g_tp.wire_outside_mat.pitch * + (h + 2 * (1 - pow(0.5, v - h)))) / 2 self.area.h = ht_temp * 2 self.area.w = len_temp * 2 @@ -240,7 +273,7 @@ def in_htree(self): self.power.readOp.dynamic = 0 self.power.readOp.leakage = 0 self.power.searchOp.dynamic = 0 - len = len_temp + len_ = len_temp ht = ht_temp / 2 while v > 0 or h > 0: @@ -252,28 +285,25 @@ def in_htree(self): del wtemp3 if h > v: - # the iteration considers only one horizontal link - wtemp1 = Wire(self.wt, len) # hor - wtemp2 = Wire(self.wt, len / 2) # ver - len_temp = len - len /= 2 + wtemp1 = Wire(self.wt, len_) # hor + wtemp2 = Wire(self.wt, len_ / 2) # ver + len_temp = len_ + len_ /= 2 wtemp3 = None h -= 1 option = 0 elif v > 0 and h > 0: - # considers one horizontal link and one vertical link - wtemp1 = Wire(self.wt, len) # hor + wtemp1 = Wire(self.wt, len_) # hor wtemp2 = Wire(self.wt, ht) # ver - wtemp3 = Wire(self.wt, len / 2) # next hor - len_temp = len + wtemp3 = Wire(self.wt, len_ / 2) # next hor + len_temp = len_ ht_temp = ht - len /= 2 + len_ /= 2 ht /= 2 v -= 1 h -= 1 option = 1 else: - # considers only one vertical link assert h == 0 wtemp1 = Wire(self.wt, ht) # ver wtemp2 = Wire(self.wt, ht / 2) # hor @@ -285,14 +315,16 @@ def in_htree(self): self.delay += wtemp1.delay self.power.readOp.dynamic += wtemp1.power.readOp.dynamic + self.power.searchOp.dynamic += wtemp1.power.readOp.dynamic * self.wire_bw self.power.readOp.leakage += wtemp1.power.readOp.leakage * self.wire_bw self.power.readOp.gate_leakage += wtemp1.power.readOp.gate_leakage * self.wire_bw + if not self.uca_tree and option == 2 or self.search_tree: self.wire_bw *= 2 # wire bandwidth doubles only for vertical branches if not self.uca_tree: - # TODO important relational cannot handle + # Change: Relational set to one value, otherwise, expression will be too long # if len_temp > wtemp1.repeater_spacing: # s1 = wtemp1.repeater_size # l_eff = wtemp1.repeater_spacing @@ -300,65 +332,72 @@ def in_htree(self): # s1 = (len_temp / wtemp1.repeater_spacing) * wtemp1.repeater_size # l_eff = len_temp - s1 = sp.Piecewise( - (wtemp1.repeater_size, len_temp > wtemp1.repeater_spacing), - ((len_temp / wtemp1.repeater_spacing) * wtemp1.repeater_size, True) - ) + # print(f"lentemp: {len_temp}") + # s1 = sp.Piecewise( + # (wtemp1.repeater_size, len_temp > wtemp1.repeater_spacing), + # ((len_temp / wtemp1.repeater_spacing) * wtemp1.repeater_size, True) + # ) - l_eff = sp.Piecewise( - (wtemp1.repeater_spacing, len_temp > wtemp1.repeater_spacing), - (len_temp, True) - ) + s1 = wtemp1.repeater_size - # TODO important relational cannot handle + # l_eff = sp.Piecewise( + # (wtemp1.repeater_spacing, len_temp > wtemp1.repeater_spacing), + # (len_temp, True) + # ) + + l_eff = wtemp1.repeater_spacing + + # Change: Relational set to one value, otherwise, expression will be too long # if ht_temp > wtemp2.repeater_spacing: # s2 = wtemp2.repeater_size # else: # s2 = (len_temp / wtemp2.repeater_spacing) * wtemp2.repeater_size - s2 = sp.Piecewise( - (wtemp2.repeater_size, ht_temp > wtemp2.repeater_spacing), - ((len_temp / wtemp2.repeater_spacing) * wtemp2.repeater_size, True) - ) + # s2 = sp.Piecewise( + # (wtemp2.repeater_size, ht_temp > wtemp2.repeater_spacing), + # ((len_temp / wtemp2.repeater_spacing) * wtemp2.repeater_size, True) + # ) + s2 = wtemp2.repeater_size - # first level self.input_nand(s1, s2, l_eff) if option != 1: continue - # second level self.delay += wtemp2.delay self.power.readOp.dynamic += wtemp2.power.readOp.dynamic + self.power.searchOp.dynamic += wtemp2.power.readOp.dynamic * self.wire_bw self.power.readOp.leakage += wtemp2.power.readOp.leakage * self.wire_bw self.power.readOp.gate_leakage += wtemp2.power.readOp.gate_leakage * self.wire_bw if self.uca_tree: - self.power.readOp.leakage += (wtemp2.power.readOp.leakage * self.wire_bw) + self.power.readOp.leakage += wtemp2.power.readOp.leakage * self.wire_bw self.power.readOp.gate_leakage += wtemp2.power.readOp.gate_leakage * self.wire_bw else: - self.power.readOp.leakage += (wtemp2.power.readOp.leakage * self.wire_bw) + self.power.readOp.leakage += wtemp2.power.readOp.leakage * self.wire_bw self.power.readOp.gate_leakage += wtemp2.power.readOp.gate_leakage * self.wire_bw self.wire_bw *= 2 - # TODO RELATIONAL + # Change: Relational set to one value, otherwise, expression will be too long # if ht_temp > wtemp3.repeater_spacing: - # s3 = wtemp3.repeater_size - # l_eff = wtemp3.repeater_spacing + # s3 = wtemp3.repeater_size + # l_eff = wtemp3.repeater_spacing # else: # s3 = (len_temp / wtemp3.repeater_spacing) * wtemp3.repeater_size # l_eff = ht_temp - s3 = sp.Piecewise( - (wtemp3.repeater_size, ht_temp > wtemp3.repeater_spacing), - ((len_temp / wtemp3.repeater_spacing) * wtemp3.repeater_size, True) - ) + # s3 = sp.Piecewise( + # (wtemp3.repeater_size, ht_temp > wtemp3.repeater_spacing), + # ((len_temp / wtemp3.repeater_spacing) * wtemp3.repeater_size, True) + # ) + # l_eff = sp.Piecewise( + # (wtemp3.repeater_spacing, ht_temp > wtemp3.repeater_spacing), + # (ht_temp, True) + # ) - l_eff = sp.Piecewise( - (wtemp3.repeater_spacing, ht_temp > wtemp3.repeater_spacing), - (ht_temp, True) - ) + s3 = wtemp3.repeater_size + l_eff = wtemp3.repeater_spacing self.input_nand(s2, s3, l_eff) @@ -369,17 +408,21 @@ def in_htree(self): if wtemp3: del wtemp3 + def out_htree(self): # temp var s1, s2, s3 = 0, 0, 0 l_eff = 0 wtemp1, wtemp2, wtemp3 = None, None, None - len_temp, ht_temp = 0, 0 + len = 0 + ht = 0 option = 0 - #TODO deleted int - h = math.log2(self.ndwl / 2) - v = math.log2(self.ndbl / 2) + # RECENT Change: Round up h and v from 0 + h = max(int(_log2(self.ndwl / 2)), 1) + v = max(int(_log2(self.ndbl / 2)), 1) + len_temp = 0 + ht_temp = 0 if self.uca_tree: ht_temp = (self.mat_height * self.ndbl / 2 + @@ -392,12 +435,13 @@ def out_htree(self): if self.ndwl == self.ndbl: ht_temp = ((self.mat_height * self.ndbl / 2) + ((self.add_bits + (self.search_data_in_bits + self.search_data_out_bits)) * (self.ndbl / 2 - 1) * g_tp.wire_outside_mat.pitch) + - ((self.data_in_bits + self.data_out_bits) * g_tp.wire_outside_mat.pitch * h)) / 2 + ((self.data_in_bits + self.data_out_bits) * g_tp.wire_outside_mat.pitch * h) + ) / 2 len_temp = (self.mat_width * self.ndwl / 2 + ((self.add_bits + (self.search_data_in_bits + self.search_data_out_bits)) * (self.ndwl / 2 - 1) * g_tp.wire_outside_mat.pitch) + ((self.data_in_bits + self.data_out_bits) * g_tp.wire_outside_mat.pitch * v)) / 2 elif self.ndwl > self.ndbl: - excess_part = (math.log2(self.ndwl / 2) - math.log2(self.ndbl / 2)) + excess_part = (_log2(self.ndwl / 2) - _log2(self.ndbl / 2)) ht_temp = ((self.mat_height * self.ndbl / 2) + ((self.add_bits + (self.search_data_in_bits + self.search_data_out_bits)) * ((self.ndbl / 2 - 1) + excess_part) * g_tp.wire_outside_mat.pitch) + (self.data_in_bits + self.data_out_bits) * g_tp.wire_outside_mat.pitch * @@ -406,10 +450,11 @@ def out_htree(self): ((self.add_bits + (self.search_data_in_bits + self.search_data_out_bits)) * (self.ndwl / 2 - 1) * g_tp.wire_outside_mat.pitch) + ((self.data_in_bits + self.data_out_bits) * g_tp.wire_outside_mat.pitch * v)) / 2 else: - excess_part = (math.log2(self.ndbl / 2) - math.log2(self.ndwl / 2)) + excess_part = (_log2(self.ndbl / 2) - _log2(self.ndwl / 2)) ht_temp = ((self.mat_height * self.ndbl / 2) + ((self.add_bits + (self.search_data_in_bits + self.search_data_out_bits)) * ((self.ndwl / 2 - 1) + excess_part) * g_tp.wire_outside_mat.pitch) + - ((self.data_in_bits + self.data_out_bits) * g_tp.wire_outside_mat.pitch * h)) / 2 + ((self.data_in_bits + self.data_out_bits) * g_tp.wire_outside_mat.pitch * h) + ) / 2 len_temp = (self.mat_width * self.ndwl / 2 + ((self.add_bits + (self.search_data_in_bits + self.search_data_out_bits)) * ((self.ndwl / 2 - 1) + excess_part) * g_tp.wire_outside_mat.pitch) + (self.data_in_bits + self.data_out_bits) * g_tp.wire_outside_mat.pitch * (h + 2 * (1 - pow(0.5, v - h)))) / 2 @@ -469,11 +514,11 @@ def out_htree(self): self.power.readOp.leakage += wtemp1.power.readOp.leakage * self.wire_bw self.power.readOp.gate_leakage += wtemp1.power.readOp.gate_leakage * self.wire_bw - if not self.uca_tree and option == 2 or self.search_tree: + if (self.uca_tree == False and option == 2) or self.search_tree: self.wire_bw *= 2 if not self.uca_tree: - # TODO relational + # Change: Relational set to one value, otherwise, expression will be too long # if len_temp > wtemp1.repeater_spacing: # s1 = wtemp1.repeater_size # l_eff = wtemp1.repeater_spacing @@ -481,34 +526,36 @@ def out_htree(self): # s1 = (len_temp / wtemp1.repeater_spacing) * wtemp1.repeater_size # l_eff = len_temp - # # TODO relational + # s1 = sp.Piecewise( + # (wtemp1.repeater_size, len_temp > wtemp1.repeater_spacing), + # ((len_temp / wtemp1.repeater_spacing) * wtemp1.repeater_size, True) + # ) + + # l_eff = sp.Piecewise( + # (wtemp1.repeater_spacing, len_temp > wtemp1.repeater_spacing), + # (len_temp, True) + # ) + + s1 = wtemp1.repeater_size + l_eff = wtemp1.repeater_spacing + + # Change: Relational set to one value, otherwise, expression will be too long # if ht_temp > wtemp2.repeater_spacing: # s2 = wtemp2.repeater_size # else: # s2 = (len_temp / wtemp2.repeater_spacing) * wtemp2.repeater_size + # s2 = sp.Piecewise( + # (wtemp2.repeater_size, ht_temp > wtemp2.repeater_spacing), + # ((len_temp / wtemp2.repeater_spacing) * wtemp2.repeater_size, True) + # ) - s1 = sp.Piecewise( - (wtemp1.repeater_size, len_temp > wtemp1.repeater_spacing), - ((len_temp / wtemp1.repeater_spacing) * wtemp1.repeater_size, True) - ) + s2 = wtemp2.repeater_size - l_eff = sp.Piecewise( - (wtemp1.repeater_spacing, len_temp > wtemp1.repeater_spacing), - (len_temp, True) - ) - - s2 = sp.Piecewise( - (wtemp2.repeater_size, ht_temp > wtemp2.repeater_spacing), - ((len_temp / wtemp2.repeater_spacing) * wtemp2.repeater_size, True) - ) - - # first level self.output_buffer(s1, s2, l_eff) if option != 1: continue - # second level self.delay += wtemp2.delay self.power.readOp.dynamic += wtemp2.power.readOp.dynamic self.power.searchOp.dynamic += wtemp2.power.readOp.dynamic * self.init_wire_bw @@ -516,14 +563,14 @@ def out_htree(self): self.power.readOp.gate_leakage += wtemp2.power.readOp.gate_leakage * self.wire_bw if self.uca_tree: - self.power.readOp.leakage += (wtemp2.power.readOp.leakage * self.wire_bw) + self.power.readOp.leakage += wtemp2.power.readOp.leakage * self.wire_bw self.power.readOp.gate_leakage += wtemp2.power.readOp.gate_leakage * self.wire_bw else: - self.power.readOp.leakage += (wtemp2.power.readOp.leakage * self.wire_bw) + self.power.readOp.leakage += wtemp2.power.readOp.leakage * self.wire_bw self.power.readOp.gate_leakage += wtemp2.power.readOp.gate_leakage * self.wire_bw self.wire_bw *= 2 - # TODO RELATONAL + # Change: Relational set to one value, otherwise, expression will be too long # if ht_temp > wtemp3.repeater_spacing: # s3 = wtemp3.repeater_size # l_eff = wtemp3.repeater_spacing @@ -531,15 +578,8 @@ def out_htree(self): # s3 = (len_temp / wtemp3.repeater_spacing) * wtemp3.repeater_size # l_eff = ht_temp - s3 = sp.Piecewise( - (wtemp3.repeater_size, ht_temp > wtemp3.repeater_spacing), - ((len_temp / wtemp3.repeater_spacing) * wtemp3.repeater_size, True) - ) - - l_eff = sp.Piecewise( - (wtemp3.repeater_spacing, ht_temp > wtemp3.repeater_spacing), - (ht_temp, True) - ) + s3 = wtemp3.repeater_size + l_eff = wtemp3.repeater_spacing self.output_buffer(s2, s3, l_eff) diff --git a/cacti-main/cacti_python/mat.py b/cacti-main/cacti_python/mat.py index deed031..3dd68cc 100644 --- a/cacti-main/cacti_python/mat.py +++ b/cacti-main/cacti_python/mat.py @@ -65,7 +65,6 @@ def __init__(self, dyn_p): self.power_comparator = PowerDef() self.num_do_b_mat = dyn_p.num_do_b_mat self.num_so_b_mat = dyn_p.num_so_b_mat - #print(self.dp.num_subarrays, self.dp.num_mats) self.num_subarrays_per_mat = self.dp.num_subarrays / self.dp.num_mats self.num_subarrays_per_row = self.dp.Ndwl / self.dp.num_mats_h_dir self.array_leakage = 0 @@ -87,10 +86,10 @@ def __init__(self, dyn_p): self.cl_wakeup_t = 0 self.cl_sleep_tx_area = 0 - print("CHECKPOINT 0") + # Change: Assert: ignored due to relational + # assert self.num_subarrays_per_mat <= 4 + # assert self.num_subarrays_per_row <= 2 - assert self.num_subarrays_per_mat <= 4 - assert self.num_subarrays_per_row <= 2 self.is_fa = self.dp.fully_assoc self.camFlag = self.is_fa or self.pure_cam @@ -146,14 +145,11 @@ def __init__(self, dyn_p): R_wire_bit_mux_dec_out /= 2.0 R_wire_sa_mux_dec_out /= 2.0 - print("CHECKPOINT 1") - self.row_dec = Decoder(num_dec_signals, False, self.subarray.C_wl, R_wire_wl_drv_out, False, self.is_dram, True, self.cam_cell if self.camFlag else self.cell) self.row_dec.nodes_DSTN = self.subarray.num_rows - print("CHECKPOINT 2") - self.bit_mux_dec = Decoder(self.deg_bl_muxing, False, C_ld_bit_mux_dec_out, R_wire_bit_mux_dec_out, False, self.is_dram, False, self.cam_cell if self.camFlag else self.cell) + self.sa_mux_lev_1_dec = Decoder(self.dp.deg_senseamp_muxing_non_associativity, self.dp.number_way_select_signals_mat, C_ld_sa_mux_lev_1_dec_out, R_wire_sa_mux_dec_out, False, self.is_dram, False, self.cam_cell if self.camFlag else self.cell) self.sa_mux_lev_2_dec = Decoder(self.dp.Ndsam_lev_2, False, C_ld_sa_mux_lev_2_dec_out, R_wire_sa_mux_dec_out, False, self.is_dram, False, self.cam_cell if self.camFlag else self.cell) @@ -167,24 +163,17 @@ def __init__(self, dyn_p): if self.is_fa or self.pure_cam: num_dec_signals += int(math.log2(self.num_subarrays_per_mat)) - print("CHECKPOINT 3") - print(f'num_dec_signals {num_dec_signals}') - print(f'deg_bl_muxing {self.deg_bl_muxing}') self.r_predec_blk1 = PredecBlk(num_dec_signals, self.row_dec, C_wire_predec_blk_out, R_wire_predec_blk_out, self.num_subarrays_per_mat, self.is_dram, True) - print("CHECKPOINT 3.1") self.r_predec_blk2 = PredecBlk(num_dec_signals, self.row_dec, C_wire_predec_blk_out, R_wire_predec_blk_out, self.num_subarrays_per_mat, self.is_dram, False) self.b_mux_predec_blk1 = PredecBlk(self.deg_bl_muxing, self.bit_mux_dec, 0, 0, 1, self.is_dram, True) self.b_mux_predec_blk2 = PredecBlk(self.deg_bl_muxing, self.bit_mux_dec, 0, 0, 1, self.is_dram, False) - print("CHECKPOINT 3.2") self.sa_mux_lev_1_predec_blk1 = PredecBlk(dyn_p.deg_senseamp_muxing_non_associativity, self.sa_mux_lev_1_dec, 0, 0, 1, self.is_dram, True) self.sa_mux_lev_1_predec_blk2 = PredecBlk(dyn_p.deg_senseamp_muxing_non_associativity, self.sa_mux_lev_1_dec, 0, 0, 1, self.is_dram, False) - print("CHECKPOINT 3.3") self.sa_mux_lev_2_predec_blk1 = PredecBlk(self.dp.Ndsam_lev_2, self.sa_mux_lev_2_dec, 0, 0, 1, self.is_dram, True) self.sa_mux_lev_2_predec_blk2 = PredecBlk(self.dp.Ndsam_lev_2, self.sa_mux_lev_2_dec, 0, 0, 1, self.is_dram, False) self.dummy_way_sel_predec_blk1 = PredecBlk(1, self.sa_mux_lev_1_dec, 0, 0, 0, self.is_dram, True) self.dummy_way_sel_predec_blk2 = PredecBlk(1, self.sa_mux_lev_1_dec, 0, 0, 0, self.is_dram, False) - print("CHECKPOINT 4") self.r_predec_blk_drv1 = PredecBlkDrv(0, self.r_predec_blk1, self.is_dram) self.r_predec_blk_drv2 = PredecBlkDrv(0, self.r_predec_blk2, self.is_dram) self.b_mux_predec_blk_drv1 = PredecBlkDrv(0, self.b_mux_predec_blk1, self.is_dram) @@ -196,18 +185,15 @@ def __init__(self, dyn_p): self.way_sel_drv1 = PredecBlkDrv(dyn_p.number_way_select_signals_mat, self.dummy_way_sel_predec_blk1, self.is_dram) self.dummy_way_sel_predec_blk_drv2 = PredecBlkDrv(1, self.dummy_way_sel_predec_blk2, self.is_dram) - print("CHECKPOINT 5") self.r_predec = Predec(self.r_predec_blk_drv1, self.r_predec_blk_drv2) self.b_mux_predec = Predec(self.b_mux_predec_blk_drv1, self.b_mux_predec_blk_drv2) self.sa_mux_lev_1_predec = Predec(self.sa_mux_lev_1_predec_blk_drv1, self.sa_mux_lev_1_predec_blk_drv2) self.sa_mux_lev_2_predec = Predec(self.sa_mux_lev_2_predec_blk_drv1, self.sa_mux_lev_2_predec_blk_drv2) - print("CHECKPOINT 6") self.subarray_out_wire = Wire(self.dp.wtype, self.subarray.area.w if g_ip.cl_vertical else self.subarray.area.h) # def __init__(self, wire_model, length, nsense=1, width_scaling=1, spacing_scaling=1, wire_placement=outside_mat, resistivity=CU_RESISTIVITY, dt=g_tp.peri_global): - print("CHECKPOINT 7") if self.is_fa or self.pure_cam: driver_c_gate_load = self.subarray.num_cols_fa_cam * gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0, self.is_dram, False, False) driver_c_wire_load = self.subarray.num_cols_fa_cam * self.cam_cell.w * g_tp.wire_outside_mat.C_per_um @@ -302,7 +288,6 @@ def __init__(self, dyn_p): # assert self.area.w > 0 def compute_delays(self, inrisetime): - print("CHECKPOINT 0") if self.is_fa or self.pure_cam: outrisetime_search = self.compute_cam_delay(inrisetime) @@ -327,29 +312,23 @@ def compute_delays(self, inrisetime): outrisetime_search = self.compute_bitline_delay(outrisetime_search) outrisetime_search = self.compute_sa_delay(outrisetime_search) - print("CHECKPOINT 1") outrisetime_search = self.compute_subarray_out_drv(outrisetime_search) self.subarray_out_wire.set_in_rise_time(outrisetime_search) outrisetime_search = self.subarray_out_wire.signal_rise_time() self.delay_subarray_out_drv_htree = self.delay_subarray_out_drv + self.subarray_out_wire.delay - print("CHECKPOINT 2") outrisetime = self.r_predec.compute_delays(inrisetime) row_dec_outrisetime = self.row_dec.compute_delays(outrisetime) - print("CHECKPOINT 3") outrisetime = self.b_mux_predec.compute_delays(inrisetime) self.bit_mux_dec.compute_delays(outrisetime) - print("CHECKPOINT 4") outrisetime = self.sa_mux_lev_1_predec.compute_delays(inrisetime) self.sa_mux_lev_1_dec.compute_delays(outrisetime) - print("CHECKPOINT 5") outrisetime = self.sa_mux_lev_2_predec.compute_delays(inrisetime) self.sa_mux_lev_2_dec.compute_delays(outrisetime) - print("Computed all predec delays") if self.pure_cam: outrisetime = self.compute_bitline_delay(row_dec_outrisetime) @@ -357,9 +336,7 @@ def compute_delays(self, inrisetime): return outrisetime_search else: - print("compute delays CHECKPOINT 6 ?") self.bl_precharge_eq_drv.compute_delay(0) - print("compute delays CHECKPOINT 6.5 ?") if self.row_dec.exist: k = self.row_dec.num_gates - 1 rd = tr_R_on(self.row_dec.w_dec_n[k], NCH, 1, self.is_dram, False, True) @@ -381,52 +358,35 @@ def compute_delays(self, inrisetime): sp.log((g_tp.sram.Vbitpre - 0.1 * self.dp.V_b_sense) / (g_tp.sram.Vbitpre - self.dp.V_b_sense)) * \ (R_bl_precharge * C_bl + R_bl * C_bl / 2) - print("CHECKPOINT 7") outrisetime = self.r_predec.compute_delays(inrisetime) row_dec_outrisetime = self.row_dec.compute_delays(outrisetime) - print(outrisetime) - print(row_dec_outrisetime) - - print("CHECKPOINT 8") outrisetime = self.b_mux_predec.compute_delays(inrisetime) self.bit_mux_dec.compute_delays(outrisetime) - print(outrisetime) - print(self.b_mux_predec.delay) - print(self.bit_mux_dec.delay) - - print("CHECKPOINT 9") outrisetime = self.sa_mux_lev_1_predec.compute_delays(inrisetime) self.sa_mux_lev_1_dec.compute_delays(outrisetime) - print("CHECKPOINT 10") outrisetime = self.sa_mux_lev_2_predec.compute_delays(inrisetime) self.sa_mux_lev_2_dec.compute_delays(outrisetime) if g_ip.is_3d_mem: row_dec_outrisetime = inrisetime - print("CHECKPOINT 11") outrisetime = self.compute_bitline_delay(row_dec_outrisetime) outrisetime = self.compute_sa_delay(outrisetime) outrisetime = self.compute_subarray_out_drv(outrisetime) self.subarray_out_wire.set_in_rise_time(outrisetime) outrisetime = self.subarray_out_wire.signal_rise_time() - print("CHECKPOINT 12") self.delay_subarray_out_drv_htree = self.delay_subarray_out_drv + self.subarray_out_wire.delay if self.dp.is_tag and not self.dp.fully_assoc: self.compute_comparator_delay(0) - print("CHECKPOINT 13") - if not self.row_dec.exist: self.delay_wl_reset = symbolic_convex_max(self.r_predec.blk1.delay, self.r_predec.blk2.delay) - print("CHECKPOINT 14") - return outrisetime def compute_bit_mux_sa_precharge_sa_mux_wr_drv_wr_mux_h(self): @@ -467,16 +427,16 @@ def compute_cam_delay(self, inrisetime): driver_c_wire_load = 0 driver_r_wire_load = 0 - leak_power_cc_inverters_sram_cell = 0 - leak_power_acc_tr_RW_or_WR_port_sram_cell = 0 - leak_power_RD_port_sram_cell = 0 - leak_power_SCHP_port_sram_cell = 0 - leak_comparator_cam_cell = 0 + self.leak_power_cc_inverters_sram_cell = 0 + self.leak_power_acc_tr_RW_or_WR_port_sram_cell = 0 + self.leak_power_RD_port_sram_cell = 0 + self.leak_power_SCHP_port_sram_cell = 0 + self.leak_comparator_cam_cell = 0 - gate_leak_comparator_cam_cell = 0 - gate_leak_power_cc_inverters_sram_cell = 0 - gate_leak_power_RD_port_sram_cell = 0 - gate_leak_power_SCHP_port_sram_cell = 0 + self.gate_leak_comparator_cam_cell = 0 + self.gate_leak_power_cc_inverters_sram_cell = 0 + self.gate_leak_power_RD_port_sram_cell = 0 + self.gate_leak_power_SCHP_port_sram_cell = 0 c_matchline_metal = self.cam_cell.get_w() * g_tp.wire_local.C_per_um c_searchline_metal = self.cam_cell.get_h() * g_tp.wire_local.C_per_um @@ -511,8 +471,7 @@ def compute_cam_delay(self, inrisetime): W_hit_miss_n = Wdummyn W_hit_miss_p = g_tp.min_w_nmos_ * p_to_n_sizing_r - #TODO deleted int - Htagbits = sp.ceiling(self.subarray.num_cols_fa_cam / 2.0) + Htagbits = int(sp.ceiling(self.subarray.num_cols_fa_cam / 2.0)) driver_c_gate_load = self.subarray.num_cols_fa_cam * gate_C(2 * g_tp.w_pmos_bl_precharge + g_tp.w_pmos_bl_eq, 0, self.is_dram, False, False) driver_c_wire_load = self.subarray.num_cols_fa_cam * self.cam_cell.w * g_tp.wire_outside_mat.C_per_um @@ -777,8 +736,7 @@ def compute_bitline_delay(self, inrisetime): m = V_wl / inrisetime - print(f'tstep: {tstep}') - # TODO Relational + # Change: Relational set to one value, otherwise, expression will be too long # if tstep <= (0.5 * (V_wl - v_th_mem_cell) / m): self.delay_bitline = sp.sqrt(2 * tstep * (V_wl - v_th_mem_cell) / m) # else: @@ -791,7 +749,6 @@ def compute_bitline_delay(self, inrisetime): # self.delay_bitline = sp.Piecewise((delay_bitline_if_true, condition), # (delay_bitline_if_false, not condition)) - # TODO VISIT is_fa = bool(self.dp.fully_assoc) @@ -952,13 +909,14 @@ def compute_comparator_delay(self, inrisetime): tstep = (r2 * c2 + (r1 + r2) * c1) * sp.log(1.0 / VTHMUXNAND) m = g_tp.peri_global.Vdd / nextinputtime - # TODO RELATIONAL + # Change: Relational set to one value, otherwise, expression will be too long # if tstep <= (0.5 * (g_tp.peri_global.Vdd - g_tp.peri_global.Vth) / m): # a = m # b = 2 * ((g_tp.peri_global.Vdd * VTHEVALINV) - g_tp.peri_global.Vth) # c = -2 * tstep * (g_tp.peri_global.Vdd - g_tp.peri_global.Vth) + 1 / m * ((g_tp.peri_global.Vdd * VTHEVALINV) - g_tp.peri_global.Vth) * ((g_tp.peri_global.Vdd * VTHEVALINV) - g_tp.peri_global.Vth) # Tcomparatorni = (-b + sp.sqrt(b * b - 4 * a * c)) / (2 * a) # else: + # Tcomparatorni = tstep + (g_tp.peri_global.Vdd + g_tp.peri_global.Vth) / (2 * m) - (g_tp.peri_global.Vdd * VTHEVALINV) / m Tcomparatorni = tstep + (g_tp.peri_global.Vdd + g_tp.peri_global.Vth) / (2 * m) - (g_tp.peri_global.Vdd * VTHEVALINV) / m self.delay_comparator = Tcomparatorni + st1del + st2del + st3del diff --git a/cacti-main/cacti_python/nuca.py b/cacti-main/cacti_python/nuca.py index 14ded29..2035126 100644 --- a/cacti-main/cacti_python/nuca.py +++ b/cacti-main/cacti_python/nuca.py @@ -39,9 +39,7 @@ def __init__(self, dt=None): def init_cont(self): cont_stats = [[[[[0 for _ in range(8)] for _ in range(7)] for _ in range(ROUTER_TYPES)] for _ in range(5)] for _ in range(2)] try: - print("HUH") with open("contention.dat", "r") as cont: - print("BUH") for i in range(2): for j in range(2, 5): for k in range(ROUTER_TYPES): @@ -49,10 +47,9 @@ def init_cont(self): line = cont.readline().strip() parts = line.split(":")[1].strip().split() for m in range(8): - #TODO deleted int + # Change: deleted int since symbolic cont_stats[i][j][k][l][m] = parts[m] except FileNotFoundError: - print("BRUH") print("contention.dat file is missing!") exit(0) self.cont_stats = cont_stats @@ -78,7 +75,7 @@ def calc_cycles(self, lat, oper_freq): cycle_time = 1.0 / (oper_freq * 1e9) cycle_time -= LATCH_DELAY cycle_time -= FIXED_OVERHEAD - #TODO deleted int + # Change: deleted int since symbolic return sp.ceiling(lat / cycle_time) # Constants and placeholder classes/functions for the sake of completeness diff --git a/cacti-main/cacti_python/parameter.py b/cacti-main/cacti_python/parameter.py index 7e2f16f..7f5893c 100644 --- a/cacti-main/cacti_python/parameter.py +++ b/cacti-main/cacti_python/parameter.py @@ -5,72 +5,12 @@ from .const import * from .area import Area import sympy as sp +import time sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..'))) from hw_symbols import symbol_table as sympy_var -# sympy_var = { -# 'C_g_ideal': sp.symbols('C_g_ideal'), -# 'C_fringe': sp.symbols('C_fringe'), -# 'C_junc': sp.symbols('C_junc'), -# 'C_junc_sw': sp.symbols('C_junc_sw'), -# 'l_phy': sp.symbols('l_phy'), -# 'l_elec': sp.symbols('l_elec'), -# 'nmos_effective_resistance_multiplier': sp.symbols('nmos_effective_resistance_multiplier'), -# 'Vdd': sp.symbols('Vdd'), -# 'Vth': sp.symbols('Vth'), -# 'Vdsat': sp.symbols('Vdsat'), -# 'I_on_n': sp.symbols('I_on_n'), -# 'I_on_p': sp.symbols('I_on_p'), -# 'I_off_n': sp.symbols('I_off_n'), -# 'I_g_on_n': sp.symbols('I_g_on_n'), -# 'C_ox': sp.symbols('C_ox'), -# 't_ox': sp.symbols('t_ox'), -# 'n2p_drv_rt': sp.symbols('n2p_drv_rt'), -# 'lch_lk_rdc': sp.symbols('lch_lk_rdc'), -# 'Mobility_n': sp.symbols('Mobility_n'), -# 'gmp_to_gmn_multiplier': sp.symbols('gmp_to_gmn_multiplier'), -# 'vpp': sp.symbols('vpp'), -# 'Wmemcella': sp.symbols('Wmemcella'), -# 'Wmemcellpmos': sp.symbols('Wmemcellpmos'), -# 'Wmemcellnmos': sp.symbols('Wmemcellnmos'), -# 'area_cell': sp.symbols('area_cell'), -# 'asp_ratio_cell': sp.symbols('asp_ratio_cell'), -# 'vdd_cell': sp.symbols('vdd_cell'), -# 'dram_cell_I_on': sp.symbols('dram_cell_I_on'), -# 'dram_cell_Vdd': sp.symbols('dram_cell_Vdd'), -# 'dram_cell_C': sp.symbols('dram_cell_C'), -# 'dram_cell_I_off_worst_case_len_temp': sp.symbols('dram_cell_I_off_worst_case_len_temp'), -# 'logic_scaling_co_eff': sp.symbols('logic_scaling_co_eff'), -# 'core_tx_density': sp.symbols('core_tx_density'), -# 'sckt_co_eff': sp.symbols('sckt_co_eff'), -# 'chip_layout_overhead': sp.symbols('chip_layout_overhead'), -# 'macro_layout_overhead': sp.symbols('macro_layout_overhead'), -# 'sense_delay': sp.symbols('sense_delay'), -# 'sense_dy_power': sp.symbols('sense_dy_power'), -# 'wire_pitch': sp.symbols('wire_pitch'), -# 'barrier_thickness': sp.symbols('barrier_thickness'), -# 'dishing_thickness': sp.symbols('dishing_thickness'), -# 'alpha_scatter': sp.symbols('alpha_scatter'), -# 'aspect_ratio': sp.symbols('aspect_ratio'), -# 'miller_value': sp.symbols('miller_value'), -# 'horiz_dielectric_constant': sp.symbols('horiz_dielectric_constant'), -# 'vert_dielectric_constant': sp.symbols('vert_dielectric_constant'), -# 'ild_thickness': sp.symbols('ild_thickness'), -# 'fringe_cap': sp.symbols('fringe_cap'), -# 'resistivity': sp.symbols('resistivity'), -# 'wire_r_per_micron': sp.symbols('wire_r_per_micron'), -# 'wire_c_per_micron': sp.symbols('wire_c_per_micron'), -# 'tsv_pitch': sp.symbols('tsv_pitch'), -# 'tsv_diameter': sp.symbols('tsv_diameter'), -# 'tsv_length': sp.symbols('tsv_length'), -# 'tsv_dielec_thickness': sp.symbols('tsv_dielec_thickness'), -# 'tsv_contact_resistance': sp.symbols('tsv_contact_resistance'), -# 'tsv_depletion_width': sp.symbols('tsv_depletion_width'), -# 'tsv_liner_dielectric_cons': sp.symbols('tsv_liner_dielectric_cons') -# } - def contains_any_symbol(expr): # Extract all the symbols from the dictionary symbols = sympy_var.values() @@ -228,6 +168,10 @@ def __init__(self): self.total_power = False self.verbose = False + self.repeater_spacing = 0.0 + self.repeater_size = 0.0 + + def parse_cfg(self, in_file): try: with open(in_file, "r") as fp: @@ -486,7 +430,6 @@ def parse_cfg(self, in_file): elif line.startswith("-Power Gating Performance Loss"): val = line.split()[-1] cleaned_value = val.strip('"').strip() - print(cleaned_value) self.perfloss = float(cleaned_value) elif line.startswith("-Print input parameters"): print_input = line.split("\"")[1] @@ -506,7 +449,7 @@ def parse_cfg(self, in_file): self.ndsam2 = int(line.split()[-1]) elif line.startswith("-Ndcm"): self.ndcm = int(line.split()[-1]) - elif line.startswith("-dram type"): + elif line.startswith("-dram_type"): dram_type = line.split("\"")[1] if "DDR3" in dram_type: self.io_type = "DDR3" @@ -671,8 +614,7 @@ def parse_cfg(self, in_file): print(f"{in_file} is missing!") exit(-1) - def error_checking(self): - print("IN ERROR CHECKING") + def error_checking(self): A = 0 seq_access = False fast_access = True @@ -714,7 +656,6 @@ def error_checking(self): NSER = self.num_se_rd_ports SCHP = self.num_search_ports - print("HAD SET B-1") if (RWP + ERP + EWP) < 1: print("Must have at least one port") return False @@ -737,7 +678,7 @@ def error_checking(self): print("Pure CAM must have associativity as 0") return False - if self.assoc == 0 and (not self.pure_cam and not self.is_cache): + if self.assoc == 0 and not self.pure_cam and not self.is_cache: print("Only CAM or Fully associative cache can have associativity as 0") return False @@ -763,7 +704,6 @@ def error_checking(self): if RWP == 0 and ERP == 0 and SCHP > 0 and (self.fully_assoc or self.pure_cam): ERP = SCHP - print("HAD SET B0") if self.assoc == 0: A = C / B else: @@ -782,8 +722,6 @@ def error_checking(self): return False self.block_sz = B - # TODO REMOVE - print("HAD SET B") if seq_access: self.tag_assoc = A @@ -816,10 +754,11 @@ def error_checking(self): return False self.power_gating = (self.array_power_gated or self.bitline_floating or self.wl_power_gated or - self.cl_power_gated or self.interconect_power_gated) + self.cl_power_gated or self.interconect_power_gated) return True + def display_ip(self): print(f"Cache size : {self.cache_sz}") print(f"Block size : {self.line_sz}") @@ -999,11 +938,30 @@ def reset(self): self.dram = MemoryType() self.cam = MemoryType() + self.dram_cell_I_on = sympy_var['dram_cell_I_on'] + self.dram_cell_Vdd = sympy_var['dram_cell_Vdd'] + self.dram_cell_C = sympy_var['dram_cell_C'] + self.dram_cell_I_off_worst_case_len_temp = sympy_var['dram_cell_I_off_worst_case_len_temp'] + self.vpp = sympy_var['vpp'] + self.sckt_co_eff = sympy_var['sckt_co_eff'] + self.chip_layout_overhead = sympy_var['chip_layout_overhead'] + self.macro_layout_overhead = sympy_var['macro_layout_overhead'] + + self.sense_delay = sympy_var['sense_delay'] + self.sense_dy_power = sympy_var['sense_dy_power'] + self.sckt_co_eff = sympy_var['sckt_co_eff'] + self.chip_layout_overhead = sympy_var['chip_layout_overhead'] + self.macro_layout_overhead = sympy_var['macro_layout_overhead'] + self.dram_cell_I_on = sympy_var['dram_cell_I_on'] + self.dram_cell_Vdd = sympy_var['dram_cell_Vdd'] + self.dram_cell_C = sympy_var['dram_cell_C'] + self.dram_cell_I_off_worst_case_len_temp = sympy_var['dram_cell_I_off_worst_case_len_temp'] + self.vpp = sympy_var['vpp'] + def init_symbolic(): return def find_upper_and_lower_tech(self, technology, tech_lo, in_file_lo, tech_hi, in_file_hi): - print(technology) if 179 < technology < 181: tech_lo = 180 in_file_lo = "tech_params/180nm.dat" @@ -1124,10 +1082,6 @@ def init(self, technology, is_tag): exit(0) alpha = 1 if tech_lo == tech_hi else (technology - tech_hi) / (tech_lo - tech_hi) - print(in_file_lo) - # TODO FILE CHECK - # in_file_lo = "cacti/" + in_file_lo - # print(in_file_lo) with open(in_file_lo, "r") as fp: lines = fp.readlines() @@ -1170,18 +1124,12 @@ def init(self, technology, is_tag): peri_global_lo = DeviceType() peri_global_hi = DeviceType() peri_global_lo.assign(in_file_lo, peri_global_tech_type, g_ip.temp) - print("peri lo") - peri_global_lo.display() - print() + # peri_global_lo.display() peri_global_hi.assign(in_file_hi, peri_global_tech_type, g_ip.temp) - print("peri hi") - peri_global_hi.display() - print() + # peri_global_hi.display() self.peri_global.interpolate(alpha, peri_global_lo, peri_global_hi) - print("peri_global") - self.peri_global.display() - print() + # self.peri_global.display() sleep_tx_lo = DeviceType() sleep_tx_hi = DeviceType() @@ -1341,8 +1289,6 @@ def init(self, technology, is_tag): self.max_w_nmos_dec = self.max_w_nmos_ self.h_dec = 4 # in the unit of memory cell height - #TODO CHECK 388 for 180nm - print(self.peri_global.l_elec) gmn_sense_amp_latch = (self.peri_global.Mobility_n / 2) * self.peri_global.C_ox * (self.w_sense_n / self.peri_global.l_elec) * self.peri_global.Vdsat gmp_sense_amp_latch = self.peri_global.gmp_to_gmn_multiplier * gmn_sense_amp_latch self.gm_sense_amp_latch = gmn_sense_amp_latch + gmp_sense_amp_latch @@ -1595,10 +1541,37 @@ def __init__(self): self.long_channel_leakage_reduction = 0 self.Mobility_n = 0 + # self.n_to_p_eff_curr_drv_ratio = sympy_var['n2p_drv_rt'] + # auxiliary parameters self.Vdsat = 0 self.gmp_to_gmn_multiplier = 0 + self.C_g_ideal = sympy_var['C_g_ideal'] + self.C_fringe = sympy_var['C_fringe'] + self.C_junc_sidewall = sympy_var['C_junc_sw'] + self.C_junc = sympy_var['C_junc'] + self.l_phy = sympy_var['l_phy'] + self.l_elec = sympy_var['l_elec'] + self.nmos_effective_resistance_multiplier = sympy_var['nmos_effective_resistance_multiplier'] + self.Vdd = sympy_var['Vdd'] + self.Vth = sympy_var['Vth'] + self.Vdsat = sympy_var['Vdsat'] + self.I_on_n = sympy_var['I_on_n'] + self.I_on_p = sympy_var['I_on_p'] + self.I_off_n = sympy_var['I_off_n'] + self.I_g_on_n = sympy_var['I_g_on_n'] + self.C_ox = sympy_var['C_ox'] + self.t_ox = sympy_var['t_ox'] + self.n_to_p_eff_curr_drv_ratio = sympy_var['n2p_drv_rt'] + self.long_channel_leakage_reduction = sympy_var['lch_lk_rdc'] + self.Mobility_n = sympy_var['Mobility_n'] + self.gmp_to_gmn_multiplier = sympy_var['gmp_to_gmn_multiplier'] + + self.C_overlap = 0.2 * self.C_g_ideal + self.I_off_p = self.I_off_n + self.I_g_on_p = self.I_g_on_n + def reset(self): self.C_g_ideal = 0 self.C_fringe = 0 @@ -1628,6 +1601,31 @@ def reset(self): self.Vdsat = 0 self.gmp_to_gmn_multiplier = 0 + self.C_g_ideal = sympy_var['C_g_ideal'] + self.C_fringe = sympy_var['C_fringe'] + self.C_junc_sidewall = sympy_var['C_junc_sw'] + self.C_junc = sympy_var['C_junc'] + self.l_phy = sympy_var['l_phy'] + self.l_elec = sympy_var['l_elec'] + self.nmos_effective_resistance_multiplier = sympy_var['nmos_effective_resistance_multiplier'] + self.Vdd = sympy_var['Vdd'] + self.Vth = sympy_var['Vth'] + self.Vdsat = sympy_var['Vdsat'] + self.I_on_n = sympy_var['I_on_n'] + self.I_on_p = sympy_var['I_on_p'] + self.I_off_n = sympy_var['I_off_n'] + self.I_g_on_n = sympy_var['I_g_on_n'] + self.C_ox = sympy_var['C_ox'] + self.t_ox = sympy_var['t_ox'] + self.n_to_p_eff_curr_drv_ratio = sympy_var['n2p_drv_rt'] + self.long_channel_leakage_reduction = sympy_var['lch_lk_rdc'] + self.Mobility_n = sympy_var['Mobility_n'] + self.gmp_to_gmn_multiplier = sympy_var['gmp_to_gmn_multiplier'] + + self.C_overlap = 0.2 * self.C_g_ideal + self.I_off_p = self.I_off_n + self.I_g_on_p = self.I_g_on_n + def display(self, indent=0): indent_str = ' ' * indent print(f"{indent_str}C_g_ideal = {self.C_g_ideal} F/um") @@ -1768,11 +1766,9 @@ def assign(self, in_file, tech_flavor, temperature): self.C_overlap = 0.2 * self.C_g_ideal if tech_flavor >= 3: - if(self.I_on_n): # TODO Check values of I_on_n - self.R_nch_on = self.nmos_effective_resistance_multiplier * g_tp.vpp / self.I_on_n + self.R_nch_on = self.nmos_effective_resistance_multiplier * g_tp.vpp / self.I_on_n else: - if(self.I_on_n): # TODO Check values of I_on_n - self.R_nch_on = self.nmos_effective_resistance_multiplier * self.Vdd / self.I_on_n + self.R_nch_on = self.nmos_effective_resistance_multiplier * self.Vdd / self.I_on_n # CHECKPOINT _pch_on issue # print(f"nmos_effective_resistance_multiplier {nmos_effective_resistance_multiplier}") # print(f"tech_flavor {tech_flavor}") @@ -1789,7 +1785,6 @@ def assign(self, in_file, tech_flavor, temperature): def interpolate(self, alpha, dev1, dev2): result = DeviceType() self.C_g_ideal = alpha * dev1.C_g_ideal + (1 - alpha) * dev2.C_g_ideal - print(f'GLOBAL result {self.C_g_ideal}') self.C_fringe = alpha * dev1.C_fringe + (1 - alpha) * dev2.C_fringe self.C_overlap = alpha * dev1.C_overlap + (1 - alpha) * dev2.C_overlap self.C_junc = alpha * dev1.C_junc + (1 - alpha) * dev2.C_junc @@ -1835,6 +1830,24 @@ def __init__(self): self.alpha_scatter = 0 self.fringe_cap = 0 + self.pitch = sympy_var['wire_pitch'] + self.barrier_thickness = sympy_var['barrier_thickness'] + self.dishing_thickness = sympy_var['dishing_thickness'] + self.alpha_scatter = sympy_var['alpha_scatter'] + self.aspect_ratio = sympy_var['aspect_ratio'] + self.miller_value = sympy_var['miller_value'] + self.horiz_dielectric_constant = sympy_var['horiz_dielectric_constant'] + self.vert_dielectric_constant = sympy_var['vert_dielectric_constant'] + self.ild_thickness = sympy_var['ild_thickness'] + self.fringe_cap = sympy_var['fringe_cap'] + self.R_per_um = sympy_var['wire_r_per_micron'] + self.C_per_um = sympy_var['wire_c_per_micron'] + self.resistivity = sympy_var['resistivity'] + self.pitch *= g_ip.F_sz_um + self.wire_width = self.pitch / 2 # micron + self.wire_thickness = self.aspect_ratio * self.wire_width # micron + self.wire_spacing = self.pitch - self.wire_width # micron + self.reset() def reset(self): @@ -1856,6 +1869,24 @@ def reset(self): self.alpha_scatter = 0 self.fringe_cap = 0 + self.pitch = sympy_var['wire_pitch'] + self.barrier_thickness = sympy_var['barrier_thickness'] + self.dishing_thickness = sympy_var['dishing_thickness'] + self.alpha_scatter = sympy_var['alpha_scatter'] + self.aspect_ratio = sympy_var['aspect_ratio'] + self.miller_value = sympy_var['miller_value'] + self.horiz_dielectric_constant = sympy_var['horiz_dielectric_constant'] + self.vert_dielectric_constant = sympy_var['vert_dielectric_constant'] + self.ild_thickness = sympy_var['ild_thickness'] + self.fringe_cap = sympy_var['fringe_cap'] + self.R_per_um = sympy_var['wire_r_per_micron'] + self.C_per_um = sympy_var['wire_c_per_micron'] + self.resistivity = sympy_var['resistivity'] + self.pitch *= g_ip.F_sz_um + self.wire_width = self.pitch / 2 # micron + self.wire_thickness = self.aspect_ratio * self.wire_width # micron + self.wire_spacing = self.pitch - self.wire_width # micron + def is_equal(self, inter): if not is_equal(self.pitch, inter.pitch): return False if not is_equal(self.R_per_um, inter.R_per_um): return False @@ -1978,8 +2009,23 @@ def __init__(self): self.Vbitfloating = 0 self.area_cell = 0 self.asp_ratio_cell = 0 + + self.cell_a_w = sympy_var['Wmemcella'] + self.cell_pmos_w = sympy_var['Wmemcellpmos'] + self.cell_nmos_w = sympy_var['Wmemcellnmos'] + self.area_cell = sympy_var['area_cell'] + self.asp_ratio_cell = sympy_var['asp_ratio_cell'] + self.reset() + self.cell_a_w = sympy_var['Wmemcella'] + self.cell_pmos_w = sympy_var['Wmemcellpmos'] + self.cell_nmos_w = sympy_var['Wmemcellnmos'] + self.area_cell = sympy_var['area_cell'] + self.asp_ratio_cell = sympy_var['asp_ratio_cell'] + self.cell_pmos_w *= g_ip.F_sz_um + self.cell_nmos_w *= g_ip.F_sz_um + def reset(self): self.b_w = 0 self.b_h = 0 @@ -1991,8 +2037,15 @@ def reset(self): self.area_cell = 0 self.asp_ratio_cell = 0 + self.cell_a_w = sympy_var['Wmemcella'] + self.cell_pmos_w = sympy_var['Wmemcellpmos'] + self.cell_nmos_w = sympy_var['Wmemcellnmos'] + self.area_cell = sympy_var['area_cell'] + self.asp_ratio_cell = sympy_var['asp_ratio_cell'] + self.cell_pmos_w *= g_ip.F_sz_um + self.cell_nmos_w *= g_ip.F_sz_um + def assign(self, in_file, tech_flavor, cell_type): - print("ASSIGN MEMORY") try: with open(in_file, "r") as fp: lines = fp.readlines() @@ -2003,8 +2056,6 @@ def assign(self, in_file, tech_flavor, cell_type): vdd_cell = 0 vdd = 0 - print(f'tech_flavor {tech_flavor}') - vdd = sympy_var['Vdd'] vdd_cell = sympy_var['vdd_cell'] self.cell_a_w = sympy_var['Wmemcella'] @@ -2045,16 +2096,12 @@ def assign(self, in_file, tech_flavor, cell_type): # print(g_ip.F_sz_um) # print(self.cell_pmos_w) if cell_type != 2: - print(self.cell_a_w) self.cell_a_w *= g_ip.F_sz_um self.cell_pmos_w *= g_ip.F_sz_um self.cell_nmos_w *= g_ip.F_sz_um if cell_type != 2: self.area_cell *= (g_ip.F_sz_um * g_ip.F_sz_um) - print(f"DEBUG: {self.cell_a_w}") - - #TODO 1028-1030 self.b_w = sp.sqrt(self.area_cell / self.asp_ratio_cell) self.b_h = self.asp_ratio_cell * self.b_w if cell_type == 2: @@ -2073,41 +2120,24 @@ def interpolate(self, alpha, mem1, mem2): self.Vbitpre = mem2.Vbitpre self.Vbitfloating = self.Vbitpre * 0.7 - #TODO 1028-1030 self.b_w = sp.sqrt(self.area_cell / self.asp_ratio_cell) self.b_h = self.asp_ratio_cell * self.b_w def isEqual(self, mem): - if not self.is_equal(self.b_w, mem.b_w): return False - if not self.is_equal(self.b_h, mem.b_h): return False - if not self.is_equal(self.cell_a_w, mem.cell_a_w): return False - if not self.is_equal(self.cell_pmos_w, mem.cell_pmos_w): return False - if not self.is_equal(self.cell_nmos_w, mem.cell_nmos_w): return False - if not self.is_equal(self.Vbitpre, mem.Vbitpre): return False + if not is_equal(self.b_w, mem.b_w): return False + if not is_equal(self.b_h, mem.b_h): return False + if not is_equal(self.cell_a_w, mem.cell_a_w): return False + if not is_equal(self.cell_pmos_w, mem.cell_pmos_w): return False + if not is_equal(self.cell_nmos_w, mem.cell_nmos_w): return False + if not is_equal(self.Vbitpre, mem.Vbitpre): return False return True - def is_equal(self, first, second): - if (first == 0) and (second == 0): - return True - if (second == 0) or (second != second): - return True - if (first != first) or (second != second): # both are NaNs - return True - if first == 0: - if abs(first - second) < (second * 0.000001): - return True - else: - if abs(first - second) < (first * 0.000001): - return True - return False - def scan_five_input_double(self, line, name, unit_name, flavor, print_flag): temp = [0] * 5 unit = '' pattern = re.compile(rf"{name}\s+(\S+)\s+(\d+\.\d+)\s+(\d+\.\d+)\s+(\d+\.\d+)\s+(\d+\.\d+)\s+(\d+\.\d+)") match = pattern.search(line) - print(f'{line}, {match}') if match: unit = match.group(1) @@ -2131,6 +2161,9 @@ def reset(self): self.core_tx_density = 0 self.long_channel_leakage_reduction = 0 + self.logic_scaling_co_eff = sympy_var['logic_scaling_co_eff'] + self.core_tx_density = sympy_var['core_tx_density'] + def assign(self, in_file): try: with open(in_file, "r") as fp: @@ -2234,70 +2267,37 @@ def __init__(self, is_tag_=False, pure_ram_=0, pure_cam_=0, Nspd_=1.0, Ndwl_=1, self.cell = Area() self.cam_cell = Area() self.is_valid = False - print("SETING UP DYNAMIC PARAM") self.init_parameters() def init_parameters(self): - if self.is_tag: - self.ram_cell_tech_type = g_ip.tag_arr_ram_cell_tech_type - else: - self.ram_cell_tech_type = g_ip.data_arr_ram_cell_tech_type - + self.ram_cell_tech_type = g_ip.tag_arr_ram_cell_tech_type if self.is_tag else g_ip.data_arr_ram_cell_tech_type self.is_dram = (self.ram_cell_tech_type == lp_dram or self.ram_cell_tech_type == comm_dram) self.fully_assoc = bool(g_ip.fully_assoc) capacity_per_die = g_ip.cache_sz / NUMBER_STACKED_DIE_LAYERS wire_local = g_tp.wire_local - - print("HELLO!") + if self.pure_cam: self.init_CAM() - print("init_CAM") return if self.fully_assoc: self.init_FA() - print("init_FA") return if not self.calc_subarr_rc(capacity_per_die): - print("init rc") return - - print("HELLO4!") if self.is_tag: self.cell.h = g_tp.sram.b_h + 2 * wire_local.pitch * (g_ip.num_rw_ports - 1 + g_ip.num_rd_ports + g_ip.num_wr_ports) self.cell.w = g_tp.sram.b_w + 2 * wire_local.pitch * (g_ip.num_rw_ports - 1 + g_ip.num_wr_ports + (g_ip.num_rd_ports - g_ip.num_se_rd_ports)) + wire_local.pitch * g_ip.num_se_rd_ports - print("cell_tag!") else: if self.is_dram: self.cell.h = g_tp.dram.b_h self.cell.w = g_tp.dram.b_w - print(f"is_dram {self.cell.h}") - print(f"is_dram {self.cell.w}") else: self.cell.h = g_tp.sram.b_h + 2 * wire_local.pitch * (g_ip.num_wr_ports + g_ip.num_rw_ports - 1 + g_ip.num_rd_ports) self.cell.w = g_tp.sram.b_w + 2 * wire_local.pitch * (g_ip.num_rw_ports - 1 + (g_ip.num_rd_ports - g_ip.num_se_rd_ports) + g_ip.num_wr_ports) + g_tp.wire_local.pitch * g_ip.num_se_rd_ports - - print(f"g_tp.sram.b_h: {g_tp.sram.b_h}") - print(f"wire_local.pitch: {wire_local.pitch}") - print(f"g_ip.num_wr_ports: {g_ip.num_wr_ports}") - print(f"g_ip.num_rw_ports: {g_ip.num_rw_ports}") - print(f"g_ip.num_rd_ports: {g_ip.num_rd_ports}") - print() - - print(f"g_tp.sram.b_w: {g_tp.sram.b_w}") - print(f"wire_local.pitch: {wire_local.pitch}") - print(f"g_ip.num_rw_ports: {g_ip.num_rw_ports}") - print(f"g_ip.num_rd_ports: {g_ip.num_rd_ports}") - print(f"g_ip.num_se_rd_ports: {g_ip.num_se_rd_ports}") - print(f"g_ip.num_wr_ports: {g_ip.num_wr_ports}") - print() - - print(f"not is_dram {self.cell.h}") - print(f"not is_dram {self.cell.h}") - c_b_metal = self.cell.h * wire_local.C_per_um if self.is_dram: @@ -2324,13 +2324,13 @@ def init_parameters(self): C_bl = self.num_r_subarray * (Cbitrow_drain_cap + c_b_metal) self.dram_refresh_period = 0 - self.num_mats_h_dir = symbolic_convex_max(self.Ndwl // 2, 1) - self.num_mats_v_dir = symbolic_convex_max(self.Ndbl // 2, 1) + # RECENT CHANGE + self.num_mats_h_dir = max(self.Ndwl // 2, 1) + self.num_mats_v_dir = max(self.Ndbl // 2, 1) + self.num_mats = self.num_mats_h_dir * self.num_mats_v_dir - print(f'NUM_MATS {self.num_mats}') self.num_do_b_mat = symbolic_convex_max((self.num_subarrays / self.num_mats) * self.num_c_subarray / (self.deg_bl_muxing * self.Ndsam_lev_1 * self.Ndsam_lev_2), 1) - # TODO BREAK relational if not (self.fully_assoc or self.pure_cam) and self.num_do_b_mat < (self.num_subarrays / self.num_mats): return @@ -2347,10 +2347,6 @@ def init_parameters(self): else: self.num_do_b_subbank = g_ip.out_w deg_sa_mux_l1_non_assoc = self.Ndsam_lev_1 / g_ip.data_assoc - # TODO relational - simplify_deg_sa_mux_l1_non_assoc = sp.simplify(deg_sa_mux_l1_non_assoc) - if simplify_deg_sa_mux_l1_non_assoc.is_zero or simplify_deg_sa_mux_l1_non_assoc.is_negative: - return if deg_sa_mux_l1_non_assoc < 1: return else: @@ -2378,7 +2374,6 @@ def init_parameters(self): if (not self.is_tag) and (g_ip.is_main_mem) and (self.num_act_mats_hor_dir * self.num_do_b_mat * self.Ndsam_lev_1 * self.Ndsam_lev_2 < int(g_ip.out_w * g_ip.burst_len * g_ip.data_assoc)): return - #TODO BREAK relational if self.num_act_mats_hor_dir > self.num_mats_h_dir: return @@ -2393,11 +2388,11 @@ def init_parameters(self): self.num_di_b_subbank = self.num_di_b_mat * self.num_act_mats_hor_dir self.num_si_b_subbank = self.num_si_b_mat - num_addr_b_row_dec = sp.log(self.num_r_subarray, 2) + num_addr_b_row_dec = _log2(self.num_r_subarray) if self.fully_assoc or self.pure_cam: num_addr_b_row_dec += _log2(self.num_subarrays // self.num_mats) number_subbanks = self.num_mats // self.num_act_mats_hor_dir - self.number_subbanks_decode = sp.log(number_subbanks, 2) + self.number_subbanks_decode = _log2(number_subbanks) self.num_rw_ports = g_ip.num_rw_ports self.num_rd_ports = g_ip.num_rd_ports @@ -2405,20 +2400,14 @@ def init_parameters(self): self.num_se_rd_ports = g_ip.num_se_rd_ports self.num_search_ports = g_ip.num_search_ports - print("BruHHSKJnhd") - print(deg_sa_mux_l1_non_assoc) - - # TODO had to make int - deg_sa_mux_l1_non_assoc = deg_sa_mux_l1_non_assoc - if self.is_dram and self.is_main_mem: - self.number_addr_bits_mat = max(num_addr_b_row_dec, _log2(self.deg_bl_muxing) + _log2(deg_sa_mux_l1_non_assoc) + _log2(self.Ndsam_lev_2)) + self.number_addr_bits_mat = symbolic_convex_max(num_addr_b_row_dec, _log2(self.deg_bl_muxing) + _log2(deg_sa_mux_l1_non_assoc) + _log2(self.Ndsam_lev_2)) if g_ip.print_detail_debug: print(f"parameter.cc: number_addr_bits_mat = {num_addr_b_row_dec}") print(f"parameter.cc: num_addr_b_row_dec = {num_addr_b_row_dec}") print(f"parameter.cc: num_addr_b_mux_sel = {_log2(self.deg_bl_muxing) + _log2(deg_sa_mux_l1_non_assoc) + _log2(self.Ndsam_lev_2)}") else: - self.number_addr_bits_mat = num_addr_b_row_dec + _log2(self.deg_bl_muxing) + sp.log(deg_sa_mux_l1_non_assoc, 2) + _log2(self.Ndsam_lev_2) + self.number_addr_bits_mat = num_addr_b_row_dec + _log2(self.deg_bl_muxing) + _log2(deg_sa_mux_l1_non_assoc) + _log2(self.Ndsam_lev_2) if self.is_tag: self.num_di_b_bank_per_port = self.tagbits @@ -2443,23 +2432,17 @@ def init_CAM(self): return if g_ip.specific_tag: - self.tagbits = sp.ceiling(g_ip.tag_w / 8.0) * 8 + self.tagbits = math.ceil(g_ip.tag_w / 8.0) * 8 else: - self.tagbits = sp.ceiling((ADDRESS_BITS + EXTRA_TAG_BITS) / 8.0) * 8 + self.tagbits = math.ceil((ADDRESS_BITS + EXTRA_TAG_BITS) / 8.0) * 8 - self.tag_num_r_subarray = sp.ceiling(capacity_per_die / (g_ip.nbanks * self.tagbits / 8.0 * self.Ndbl)) + self.tag_num_r_subarray = math.ceil(capacity_per_die / (g_ip.nbanks * self.tagbits / 8.0 * self.Ndbl)) self.tag_num_c_subarray = self.tagbits - if self.tag_num_r_subarray == 0: - return - if self.tag_num_r_subarray > MAXSUBARRAYROWS: - return - if self.tag_num_c_subarray < MINSUBARRAYCOLS: - return - if self.tag_num_c_subarray > MAXSUBARRAYCOLS: + if self.tag_num_r_subarray == 0 or self.tag_num_r_subarray > MAXSUBARRAYROWS or self.tag_num_c_subarray < MINSUBARRAYCOLS or self.tag_num_c_subarray > MAXSUBARRAYCOLS: return - self.num_r_subarray = self.tag_num_r_subarray + self.num_r_subarray = self.tag_num_r_subarray self.num_subarrays = self.Ndwl * self.Ndbl self.cam_cell.h = g_tp.cam.b_h + 2 * wire_local.pitch * (g_ip.num_rw_ports - 1 + g_ip.num_rd_ports + g_ip.num_wr_ports) + 2 * wire_local.pitch * (g_ip.num_search_ports - 1) + wire_local.pitch * g_ip.num_se_rd_ports @@ -2486,17 +2469,17 @@ def init_CAM(self): self.num_mats_h_dir = 1 self.num_mats_v_dir = 1 else: - self.num_mats_h_dir = sp.floor(sp.sqrt(self.Ndbl / 4.0)) + self.num_mats_h_dir = math.floor(sp.sqrt(self.Ndbl / 4.0)) self.num_mats_v_dir = int(self.Ndbl / 4.0 / self.num_mats_h_dir) self.num_mats = self.num_mats_h_dir * self.num_mats_v_dir - self.num_so_b_mat = sp.ceiling(_log2(self.num_r_subarray)) + sp.ceiling(_log2(self.num_subarrays)) + self.num_so_b_mat = math.ceil(_log2(self.num_r_subarray)) + math.ceil(_log2(self.num_subarrays)) self.num_do_b_mat = self.tagbits deg_sa_mux_l1_non_assoc = 1 - self.num_so_b_subbank = sp.ceiling(_log2(self.num_r_subarray)) + sp.ceiling(_log2(self.num_subarrays)) + self.num_so_b_subbank = math.ceil(_log2(self.num_r_subarray)) + math.ceil(_log2(self.num_subarrays)) self.num_do_b_subbank = self.tag_num_c_subarray self.deg_senseamp_muxing_non_associativity = deg_sa_mux_l1_non_assoc @@ -2529,7 +2512,7 @@ def init_CAM(self): self.num_di_b_bank_per_port = self.tagbits self.num_si_b_bank_per_port = self.tagbits self.num_do_b_bank_per_port = self.tagbits - self.num_so_b_bank_per_port = sp.ceiling(_log2(self.num_r_subarray)) + sp.ceiling(_log2(self.num_subarrays)) + self.num_so_b_bank_per_port = math.ceil(_log2(self.num_r_subarray)) + math.ceil(_log2(self.num_subarrays)) if not self.is_tag and g_ip.data_assoc > 1 and not g_ip.fast_access: self.number_way_select_signals_mat = g_ip.data_assoc @@ -2544,22 +2527,17 @@ def init_FA(self): assert NUMBER_STACKED_DIE_LAYERS == 1 capacity_per_die = g_ip.cache_sz - # TODO CHECK if self.Ndwl != 1 or self.Ndcm != 1 or self.Nspd < 1 or self.Nspd > 1 or self.Ndsam_lev_1 != 1 or self.Ndsam_lev_2 != 1 or self.Ndbl < 2: return - - print("Got past init_FA check") if g_ip.specific_tag: self.tagbits = g_ip.tag_w else: - print(f'blksz {g_ip.block_sz}') self.tagbits = ADDRESS_BITS + EXTRA_TAG_BITS - _log2(g_ip.block_sz) self.tagbits = (((self.tagbits + 3) >> 2) << 2) - # TODO check ceiling self.tag_num_r_subarray = math.ceil(capacity_per_die / (g_ip.nbanks * g_ip.block_sz * self.Ndbl)) - self.tag_num_c_subarray = sp.ceiling((self.tagbits * self.Nspd / self.Ndwl)) + self.tag_num_c_subarray = math.ceil((self.tagbits * self.Nspd / self.Ndwl)) if self.tag_num_r_subarray == 0: return @@ -2590,7 +2568,6 @@ def init_FA(self): self.cell.h = g_tp.sram.b_h + 2 * wire_local.pitch * (g_ip.num_wr_ports + g_ip.num_rw_ports - 1 + g_ip.num_rd_ports) + 2 * wire_local.pitch * (g_ip.num_search_ports - 1) self.cell.w = g_tp.sram.b_w + 2 * wire_local.pitch * (g_ip.num_rw_ports - 1 + (g_ip.num_rd_ports - g_ip.num_se_rd_ports) + g_ip.num_wr_ports) + g_tp.wire_local.pitch * g_ip.num_se_rd_ports + 2 * wire_local.pitch * (g_ip.num_search_ports - 1) - c_b_metal = self.cell.h * wire_local.C_per_um c_b_metal = self.cam_cell.h * wire_local.C_per_um self.V_b_sense = symbolic_convex_max(0.05 * g_tp.sram_cell.Vdd, VBITSENSEMIN) self.deg_bl_muxing = 1 @@ -2608,7 +2585,7 @@ def init_FA(self): self.num_mats_h_dir = 1 self.num_mats_v_dir = 1 else: - self.num_mats_h_dir = sp.floor(sp.sqrt(self.Ndbl / 4.0)) + self.num_mats_h_dir = math.floor(sp.sqrt(self.Ndbl / 4.0)) self.num_mats_v_dir = int(self.Ndbl / 4.0 / self.num_mats_h_dir) self.num_mats = self.num_mats_h_dir * self.num_mats_v_dir @@ -2633,8 +2610,8 @@ def init_FA(self): self.num_di_b_subbank = self.num_di_b_mat * self.num_act_mats_hor_dir self.num_si_b_subbank = self.num_si_b_mat - num_addr_b_row_dec = sp.log(self.num_r_subarray, 2) - num_addr_b_row_dec += sp.log(self.num_subarrays / self.num_mats, 2) + num_addr_b_row_dec = _log2(self.num_r_subarray) + num_addr_b_row_dec += _log2(self.num_subarrays / self.num_mats) number_subbanks = self.num_mats / self.num_act_mats_hor_dir self.number_subbanks_decode = _log2(number_subbanks) @@ -2659,24 +2636,26 @@ def init_FA(self): self.is_valid = True + def ECC_adjustment(self): - self.num_do_b_mat += sp.ceiling(self.num_do_b_mat / num_bits_per_ecc_b_) - self.num_di_b_mat += sp.ceiling(self.num_di_b_mat / num_bits_per_ecc_b_) - self.num_di_b_subbank += sp.ceiling(self.num_di_b_subbank / num_bits_per_ecc_b_) - self.num_do_b_subbank += sp.ceiling(self.num_do_b_subbank / num_bits_per_ecc_b_) - self.num_di_b_bank_per_port += sp.ceiling(self.num_di_b_bank_per_port / num_bits_per_ecc_b_) - self.num_do_b_bank_per_port += sp.ceiling(self.num_do_b_bank_per_port / num_bits_per_ecc_b_) - - self.num_so_b_mat += sp.ceiling(self.num_so_b_mat / num_bits_per_ecc_b_) - self.num_si_b_mat += sp.ceiling(self.num_si_b_mat / num_bits_per_ecc_b_) - self.num_si_b_subbank += sp.ceiling(self.num_si_b_subbank / num_bits_per_ecc_b_) - self.num_so_b_subbank += sp.ceiling(self.num_so_b_subbank / num_bits_per_ecc_b_) - self.num_si_b_bank_per_port += sp.ceiling(self.num_si_b_bank_per_port / num_bits_per_ecc_b_) - self.num_so_b_bank_per_port += sp.ceiling(self.num_so_b_bank_per_port / num_bits_per_ecc_b_) + self.num_do_b_mat += int(math.ceil(self.num_do_b_mat / num_bits_per_ecc_b_)) + self.num_di_b_mat += int(math.ceil(self.num_di_b_mat / num_bits_per_ecc_b_)) + self.num_di_b_subbank += int(math.ceil(self.num_di_b_subbank / num_bits_per_ecc_b_)) + self.num_do_b_subbank += int(math.ceil(self.num_do_b_subbank / num_bits_per_ecc_b_)) + self.num_di_b_bank_per_port += int(math.ceil(self.num_di_b_bank_per_port / num_bits_per_ecc_b_)) + self.num_do_b_bank_per_port += int(math.ceil(self.num_do_b_bank_per_port / num_bits_per_ecc_b_)) + + self.num_so_b_mat += int(math.ceil(self.num_so_b_mat / num_bits_per_ecc_b_)) + self.num_si_b_mat += int(math.ceil(self.num_si_b_mat / num_bits_per_ecc_b_)) + self.num_si_b_subbank += int(math.ceil(self.num_si_b_subbank / num_bits_per_ecc_b_)) + self.num_so_b_subbank += int(math.ceil(self.num_so_b_subbank / num_bits_per_ecc_b_)) + self.num_si_b_bank_per_port += int(math.ceil(self.num_si_b_bank_per_port / num_bits_per_ecc_b_)) + self.num_so_b_bank_per_port += int(math.ceil(self.num_so_b_bank_per_port / num_bits_per_ecc_b_)) + def calc_subarr_rc(self, capacity_per_die): if self.Ndwl < 2 or self.Ndbl < 2: - print("Ndwl and Ndbl set less than 2 paramter.py") + print("Ndwl and Ndbl set less than 2 parameter.py") return False if self.is_dram and not self.is_tag and self.Ndcm > 1: @@ -2686,32 +2665,27 @@ def calc_subarr_rc(self, capacity_per_die): if g_ip.specific_tag: self.tagbits = g_ip.tag_w else: - self.tagbits = ADDRESS_BITS + EXTRA_TAG_BITS - sp.log(capacity_per_die, 2) + _log2(g_ip.tag_assoc * 2 - 1) - - print(f'{g_ip.nbanks} {g_ip.block_sz} {g_ip.tag_assoc} {self.Ndbl} {self.Nspd}') - self.num_r_subarray = sp.ceiling(capacity_per_die / (g_ip.nbanks * g_ip.block_sz * g_ip.tag_assoc * self.Ndbl * self.Nspd)) - self.num_c_subarray = sp.ceiling((self.tagbits * g_ip.tag_assoc * self.Nspd / self.Ndwl)) + self.tagbits = ADDRESS_BITS + EXTRA_TAG_BITS - math.log2(capacity_per_die) + _log2(g_ip.tag_assoc * 2 - 1) + + self.num_r_subarray = math.ceil(capacity_per_die / (g_ip.nbanks * g_ip.block_sz * g_ip.tag_assoc * self.Ndbl * self.Nspd)) + self.num_c_subarray = math.ceil((self.tagbits * g_ip.tag_assoc * self.Nspd / self.Ndwl)) else: - self.num_r_subarray = sp.ceiling(capacity_per_die / (g_ip.nbanks * g_ip.block_sz * g_ip.data_assoc * self.Ndbl * self.Nspd)) - self.num_c_subarray = sp.ceiling((8 * g_ip.block_sz * g_ip.data_assoc * self.Nspd / self.Ndwl)) + self.num_r_subarray = math.ceil(capacity_per_die / (g_ip.nbanks * g_ip.block_sz * g_ip.data_assoc * self.Ndbl * self.Nspd)) + self.num_c_subarray = math.ceil((8 * g_ip.block_sz * g_ip.data_assoc * self.Nspd / self.Ndwl)) if g_ip.is_3d_mem: capacity_per_die_double = float(g_ip.cache_sz) / g_ip.num_die_3d self.num_c_subarray = g_ip.page_sz_bits / self.Ndwl - # TODO check used to be 1 << - self.num_r_subarray = sp.Pow(2, sp.floor(_log2(float(g_ip.cache_sz) / g_ip.num_die_3d / self.num_c_subarray / g_ip.nbanks / self.Ndbl / self.Ndwl * 1024 * 1024 * 1024) + 0.5)) + self.num_r_subarray = 1 << int(math.floor(math.log2(float(g_ip.cache_sz) / g_ip.num_die_3d / self.num_c_subarray / g_ip.nbanks / self.Ndbl / self.Ndwl * 1024 * 1024 * 1024) + 0.5)) if g_ip.print_detail_debug: print(f"parameter.cc: capacity_per_die_double = {capacity_per_die_double} Gbit") print(f"parameter.cc: g_ip.nbanks * Ndbl * Ndwl = {g_ip.nbanks * self.Ndbl * self.Ndwl}") print(f"parameter.cc: num_r_subarray = {self.num_r_subarray}") print(f"parameter.cc: num_c_subarray = {self.num_c_subarray}") - print(self.num_r_subarray) - print(self.num_c_subarray) - # TODO RELATIONAL - # if self.num_r_subarray < MINSUBARRAYROWS or self.num_r_subarray == 0 or self.num_r_subarray > MAXSUBARRAYROWS: - # return False - # if self.num_c_subarray < MINSUBARRAYCOLS or self.num_c_subarray > MAXSUBARRAYCOLS: - # return False + if self.num_r_subarray < MINSUBARRAYROWS or self.num_r_subarray == 0 or self.num_r_subarray > MAXSUBARRAYROWS: + return False + if self.num_c_subarray < MINSUBARRAYCOLS or self.num_c_subarray > MAXSUBARRAYCOLS: + return False self.num_subarrays = self.Ndwl * self.Ndbl return True @@ -2832,7 +2806,7 @@ def scan_input_double_tsv_type(line, name, unit_name, proj_type, tsv_type, print - +### basic_circuit.py #### TO AVOID CIRCULAR DEPENDNCY UNI_LEAK_STACK_FACTOR = 0.43 @@ -2851,8 +2825,9 @@ def is_pow2(val): return (_log2(val) != _log2(val - 1)) def _log2(num): + num = int(num) if num == 0: - raise ValueError("log0?") + num = 1 log2 = 0 while num > 1: num >>= 1 @@ -2942,6 +2917,7 @@ def gate_C(width, wirelength, _is_dram=False, _is_sram=False, _is_wl_tr=False, _ dt = g_tp.sleep_tx # Sleep transistor else: dt = g_tp.peri_global + return (dt.C_g_ideal + dt.C_overlap + 3 * dt.C_fringe) * width + dt.l_phy * Cpolywire def gate_C_pass(width, wirelength, _is_dram=False, _is_sram=False, _is_wl_tr=False, _is_sleep_tx=False): @@ -2963,41 +2939,38 @@ def drain_C_(width, nchannel, stack, next_arg_thresh_folding_width_or_height_cel c_junc_sidewall = dt.C_junc_sidewall c_fringe = 2 * dt.C_fringe c_overlap = 2 * dt.C_overlap - drain_C_metal_connecting_folded_tr = 0 if next_arg_thresh_folding_width_or_height_cell == 0: w_folded_tr = fold_dimension - # print(f"fold_dimension {w_folded_tr}") else: - # print("else fold_dimension") h_tr_region = fold_dimension - 2 * g_tp.HPOWERRAIL ratio_p_to_n = 2.0 / (2.0 + 1.0) if nchannel: w_folded_tr = (1 - ratio_p_to_n) * (h_tr_region - g_tp.MIN_GAP_BET_P_AND_N_DIFFS) else: w_folded_tr = ratio_p_to_n * (h_tr_region - g_tp.MIN_GAP_BET_P_AND_N_DIFFS) - - # TODO RELATIONAL + num_folded_tr = sp.ceiling(width / w_folded_tr) - # print(f'num_folded_tr {num_folded_tr}') - # # print(width/w_folded_tr) - # w_folded_tr = sp.Piecewise( - # (width, num_folded_tr < 2), # Set w_folded_tr to width if num_folded_tr < 2 - # (w_folded_tr, True) # Keep w_folded_tr unchanged otherwise - # ) - if (not contains_any_symbol(num_folded_tr)) and num_folded_tr < 2: - w_folded_tr = width - # TODO VISIT + w_folded_tr = sp.Piecewise((width, num_folded_tr < 2), (w_folded_tr, True)) total_drain_w = (g_tp.w_poly_contact + 2 * g_tp.spacing_poly_to_contact) + (stack - 1) * g_tp.spacing_poly_to_poly - drain_h_for_sidewall = w_folded_tr + total_drain_w = sp.Piecewise( + (total_drain_w, num_folded_tr <= 1), + (total_drain_w + (num_folded_tr - 2) * (g_tp.w_poly_contact + 2 * g_tp.spacing_poly_to_contact) + (num_folded_tr - 1) * ((stack - 1) * g_tp.spacing_poly_to_poly), True) + ) + + drain_h_for_sidewall = sp.Piecewise((w_folded_tr, num_folded_tr <= 1), (0, num_folded_tr > 1)) + total_drain_height_for_cap_wrt_gate = w_folded_tr + 2 * w_folded_tr * (stack - 1) - if (not contains_any_symbol(num_folded_tr)) and num_folded_tr > 1: - total_drain_w += (num_folded_tr - 2) * (g_tp.w_poly_contact + 2 * g_tp.spacing_poly_to_contact) + (num_folded_tr - 1) * ((stack - 1) * g_tp.spacing_poly_to_poly) - if num_folded_tr % 2 == 0: - drain_h_for_sidewall = 0 - total_drain_height_for_cap_wrt_gate *= num_folded_tr - drain_C_metal_connecting_folded_tr = g_tp.wire_local.C_per_um * total_drain_w + total_drain_height_for_cap_wrt_gate = sp.Piecewise( + (total_drain_height_for_cap_wrt_gate, num_folded_tr <= 1), + (total_drain_height_for_cap_wrt_gate * num_folded_tr, True) + ) + + drain_C_metal_connecting_folded_tr = sp.Piecewise( + (0, num_folded_tr <= 1), + (g_tp.wire_local.C_per_um * total_drain_w, True) + ) drain_C_area = c_junc_area * total_drain_w * w_folded_tr drain_C_sidewall = c_junc_sidewall * (drain_h_for_sidewall + 2 * total_drain_w) @@ -3005,6 +2978,8 @@ def drain_C_(width, nchannel, stack, next_arg_thresh_folding_width_or_height_cel return drain_C_area + drain_C_sidewall + drain_C_wrt_gate + drain_C_metal_connecting_folded_tr + + def tr_R_on(width, nchannel, stack, _is_dram=False, _is_sram=False, _is_wl_tr=False, _is_sleep_tx=False): if _is_dram and _is_sram: dt = g_tp.dram_acc # DRAM cell access transistor @@ -3018,9 +2993,6 @@ def tr_R_on(width, nchannel, stack, _is_dram=False, _is_sram=False, _is_wl_tr=Fa dt = g_tp.peri_global restrans = dt.R_nch_on if nchannel else dt.R_pch_on - # print("tr_R_on") - # print(stack, restrans, width) - # print("end tr_R_on") return stack * restrans / width def R_to_w(res, nchannel, _is_dram=False, _is_sram=False, _is_wl_tr=False, _is_sleep_tx=False): @@ -3047,19 +3019,8 @@ def pmos_to_nmos_sz_ratio(_is_dram=False, _is_wl_tr=False, _is_sleep_tx=False): return g_tp.peri_global.n_to_p_eff_curr_drv_ratio def horowitz(inputramptime, tf, vs1, vs2, rise): - #print(tf) if inputramptime == 0 and vs1 == vs2: - # #TODO important relational cannot handle - # print("HERE????") - # print(vs1) - # print(-sp.log(vs1)) - # print(f"tf: {tf}") - # print(f"tf result: {tf * (-sp.log(vs1) if vs1 < 1 else sp.log(vs1))}") - # print("ENDHEREE") - # TODO RELATIONAL - return sp.Piecewise((-sp.log(vs1), vs1 < 1), (sp.log(vs1), vs1 >= 1)) - # print("here???") - # return tf * -sp.log(vs1) + return tf * sp.Piecewise((-sp.log(vs1), vs1 < 1), (sp.log(vs1), vs1 >= 1)) a = inputramptime / tf if rise == RISE: @@ -3070,6 +3031,7 @@ def horowitz(inputramptime, tf, vs1, vs2, rise): td = tf * sp.sqrt(sp.log(1.0 - vs1) ** 2 + 2 * a * b * vs1) + tf * (sp.log(1.0 - vs1) - sp.log(1.0 - vs2)) return td + def cmos_Ileak(nWidth, pWidth, _is_dram=False, _is_cell=False, _is_wl_tr=False, _is_sleep_tx=False): if not _is_dram and _is_cell: dt = g_tp.sram_cell # SRAM cell access transistor @@ -3159,9 +3121,7 @@ def cmos_Isub_leakage(nWidth, pWidth, fanin, g_type, _is_dram=False, _is_cell=Fa nmos_leak = simplified_nmos_leakage(nWidth, _is_dram, _is_cell, _is_wl_tr, _is_sleep_tx) pmos_leak = simplified_pmos_leakage(pWidth, _is_dram, _is_cell, _is_wl_tr, _is_sleep_tx) Isub = 0 - #TODO Check - #num_states = int(sp.Pow(2.0, fanin)) - num_states = sp.Pow(2.0, fanin) + num_states = int(sp.Pow(2.0, fanin)) if g_type == nmos: if fanin == 1: @@ -3219,9 +3179,7 @@ def cmos_Ig_leakage(nWidth, pWidth, fanin, g_type, _is_dram=False, _is_cell=Fals pmos_leak = cmos_Ig_p(pWidth, _is_dram, _is_cell, _is_wl_tr, _is_sleep_tx) Ig_on = 0 - #TODO Check - #num_states = int(sp.Pow(2.0, fanin)) - num_states = sp.Pow(2.0, fanin) + num_states = int(sp.Pow(2.0, fanin)) if g_type == nmos: if fanin == 1: diff --git a/cacti-main/cacti_python/parameter_org.py b/cacti-main/cacti_python/parameter_org.py deleted file mode 100644 index fb7d48b..0000000 --- a/cacti-main/cacti_python/parameter_org.py +++ /dev/null @@ -1,2386 +0,0 @@ -import math -import re -from const import * -from cacti_interface import InputParameter -import sympy as sp - -sympy_var = { - 'C_g_ideal': sp.symbols('C_g_ideal'), - 'C_fringe': sp.symbols('C_fringe'), - 'C_junc': sp.symbols('C_junc'), - 'C_junc_sw': sp.symbols('C_junc_sw'), - 'l_phy': sp.symbols('l_phy'), - 'l_elec': sp.symbols('l_elec'), - 'nmos_effective_resistance_multiplier': sp.symbols('nmos_effective_resistance_multiplier'), - 'Vdd': sp.symbols('Vdd'), - 'Vth': sp.symbols('Vth'), - 'Vdsat': sp.symbols('Vdsat'), - 'I_on_n': sp.symbols('I_on_n'), - 'I_on_p': sp.symbols('I_on_p'), - 'I_off_n': sp.symbols('I_off_n'), - 'I_g_on_n': sp.symbols('I_g_on_n'), - 'C_ox': sp.symbols('C_ox'), - 't_ox': sp.symbols('t_ox'), - 'n2p_drv_rt': sp.symbols('n2p_drv_rt'), - 'lch_lk_rdc': sp.symbols('lch_lk_rdc'), - 'Mobility_n': sp.symbols('Mobility_n'), - 'gmp_to_gmn_multiplier': sp.symbols('gmp_to_gmn_multiplier'), - 'vpp': sp.symbols('vpp'), - 'Wmemcella': sp.symbols('Wmemcella'), - 'Wmemcellpmos': sp.symbols('Wmemcellpmos'), - 'Wmemcellnmos': sp.symbols('Wmemcellnmos'), - 'area_cell': sp.symbols('area_cell'), - 'asp_ratio_cell': sp.symbols('asp_ratio_cell'), - 'vdd_cell': sp.symbols('vdd_cell'), - 'dram_cell_I_on': sp.symbols('dram_cell_I_on'), - 'dram_cell_Vdd': sp.symbols('dram_cell_Vdd'), - 'dram_cell_C': sp.symbols('dram_cell_C'), - 'dram_cell_I_off_worst_case_len_temp': sp.symbols('dram_cell_I_off_worst_case_len_temp'), - 'logic_scaling_co_eff': sp.symbols('logic_scaling_co_eff'), - 'core_tx_density': sp.symbols('core_tx_density'), - 'sckt_co_eff': sp.symbols('sckt_co_eff'), - 'chip_layout_overhead': sp.symbols('chip_layout_overhead'), - 'macro_layout_overhead': sp.symbols('macro_layout_overhead'), - 'sense_delay': sp.symbols('sense_delay'), - 'sense_dy_power': sp.symbols('sense_dy_power'), - 'wire_pitch': sp.symbols('wire_pitch'), - 'barrier_thickness': sp.symbols('barrier_thickness'), - 'dishing_thickness': sp.symbols('dishing_thickness'), - 'alpha_scatter': sp.symbols('alpha_scatter'), - 'aspect_ratio': sp.symbols('aspect_ratio'), - 'miller_value': sp.symbols('miller_value'), - 'horiz_dielectric_constant': sp.symbols('horiz_dielectric_constant'), - 'vert_dielectric_constant': sp.symbols('vert_dielectric_constant'), - 'ild_thickness': sp.symbols('ild_thickness'), - 'fringe_cap': sp.symbols('fringe_cap'), - 'resistivity': sp.symbols('resistivity'), - 'wire_r_per_micron': sp.symbols('wire_r_per_micron'), - 'wire_c_per_micron': sp.symbols('wire_c_per_micron'), - 'tsv_pitch': sp.symbols('tsv_pitch'), - 'tsv_diameter': sp.symbols('tsv_diameter'), - 'tsv_length': sp.symbols('tsv_length'), - 'tsv_dielec_thickness': sp.symbols('tsv_dielec_thickness'), - 'tsv_contact_resistance': sp.symbols('tsv_contact_resistance'), - 'tsv_depletion_width': sp.symbols('tsv_depletion_width'), - 'tsv_liner_dielectric_cons': sp.symbols('tsv_liner_dielectric_cons') -} - -class TechnologyParameter: - def __init__(self): - self.reset() - - def reset(self): - self.ram_wl_stitching_overhead_ = 0 - self.min_w_nmos_ = 0 - self.max_w_nmos_ = 0 - self.max_w_nmos_dec = 0 - self.unit_len_wire_del = 0 - self.FO4 = 0 - self.kinv = 0 - self.vpp = 0 - self.w_sense_en = 0 - self.w_sense_n = 0 - self.w_sense_p = 0 - self.sense_delay = 0 - self.sense_dy_power = 0 - self.w_iso = 0 - self.w_poly_contact = 0 - self.spacing_poly_to_poly = 0 - self.spacing_poly_to_contact = 0 - self.tsv_pitch = 0 - self.tsv_diameter = 0 - self.tsv_length = 0 - self.tsv_dielec_thickness = 0 - self.tsv_contact_resistance = 0 - self.tsv_depletion_width = 0 - self.tsv_liner_dielectric_constant = 0 - self.tsv_parasitic_capacitance_fine = 0 - self.tsv_parasitic_resistance_fine = 0 - self.tsv_minimum_area_fine = 0 - self.tsv_parasitic_capacitance_coarse = 0 - self.tsv_parasitic_resistance_coarse = 0 - self.tsv_minimum_area_coarse = 0 - self.w_comp_inv_p1 = 0 - self.w_comp_inv_p2 = 0 - self.w_comp_inv_p3 = 0 - self.w_comp_inv_n1 = 0 - self.w_comp_inv_n2 = 0 - self.w_comp_inv_n3 = 0 - self.w_eval_inv_p = 0 - self.w_eval_inv_n = 0 - self.w_comp_n = 0 - self.w_comp_p = 0 - self.dram_cell_I_on = 0 - self.dram_cell_Vdd = 0 - self.dram_cell_I_off_worst_case_len_temp = 0 - self.dram_cell_C = 0 - self.gm_sense_amp_latch = 0 - self.w_nmos_b_mux = 0 - self.w_nmos_sa_mux = 0 - self.w_pmos_bl_precharge = 0 - self.w_pmos_bl_eq = 0 - self.MIN_GAP_BET_P_AND_N_DIFFS = 0 - self.MIN_GAP_BET_SAME_TYPE_DIFFS = 0 - self.HPOWERRAIL = 0 - self.cell_h_def = 0 - self.chip_layout_overhead = 0 - self.macro_layout_overhead = 0 - self.sckt_co_eff = 0 - self.fringe_cap = 0 - self.h_dec = 0 - self.sram_cell = DeviceType() - self.dram_acc = DeviceType() - self.dram_wl = DeviceType() - self.peri_global = DeviceType() - self.cam_cell = DeviceType() - self.sleep_tx = DeviceType() - self.wire_local = InterconnectType() - self.wire_inside_mat = InterconnectType() - self.wire_outside_mat = InterconnectType() - self.scaling_factor = ScalingFactor() - self.sram = MemoryType() - self.dram = MemoryType() - self.cam = MemoryType() - - def init_symbolic(): - return - - def find_upper_and_lower_tech(self, technology, tech_lo, in_file_lo, tech_hi, in_file_hi): - print(technology) - if 179 < technology < 181: - tech_lo = 180 - in_file_lo = "tech_params/180nm.dat" - tech_hi = 180 - in_file_hi = "tech_params/180nm.dat" - elif 89 < technology < 91: - tech_lo = 90 - in_file_lo = "tech_params/90nm.dat" - tech_hi = 90 - in_file_hi = "tech_params/90nm.dat" - elif 64 < technology < 66: - tech_lo = 65 - in_file_lo = "tech_params/65nm.dat" - tech_hi = 65 - in_file_hi = "tech_params/65nm.dat" - elif 44 < technology < 46: - tech_lo = 45 - in_file_lo = "tech_params/45nm.dat" - tech_hi = 45 - in_file_hi = "tech_params/45nm.dat" - elif 31 < technology < 33: - tech_lo = 32 - in_file_lo = "tech_params/32nm.dat" - tech_hi = 32 - in_file_hi = "tech_params/32nm.dat" - elif 21 < technology < 23: - tech_lo = 22 - in_file_lo = "tech_params/22nm.dat" - tech_hi = 22 - in_file_hi = "tech_params/22nm.dat" - elif 90 < technology < 180: - tech_lo = 180 - in_file_lo = "tech_params/180nm.dat" - tech_hi = 90 - in_file_hi = "tech_params/90nm.dat" - elif 65 < technology < 90: - tech_lo = 90 - in_file_lo = "tech_params/90nm.dat" - tech_hi = 65 - in_file_hi = "tech_params/65nm.dat" - elif 45 < technology < 65: - tech_lo = 65 - in_file_lo = "tech_params/65nm.dat" - tech_hi = 45 - in_file_hi = "tech_params/45nm.dat" - elif 32 < technology < 45: - tech_lo = 45 - in_file_lo = "tech_params/45nm.dat" - tech_hi = 32 - in_file_hi = "tech_params/32nm.dat" - elif 22 < technology < 32: - tech_lo = 32 - in_file_lo = "tech_params/32nm.dat" - tech_hi = 22 - in_file_hi = "tech_params/22nm.dat" - else: - print("Invalid technology nodes") - exit(0) - - return tech_lo, in_file_lo, tech_hi, in_file_hi - - def assign_tsv(self, in_file): - for iter in range(2): # 0:fine 1:coarse - tsv_type = g_ip.tsv_is_subarray_type if iter == 0 else g_ip.tsv_os_bank_type - with open(in_file, "r") as fp: - lines = fp.readlines() - - self.tsv_pitch = sympy_var['tsv_pitch'] - self.tsv_diameter = sympy_var['tsv_diameter'] - self.tsv_length = sympy_var['tsv_length'] - self.tsv_dielec_thickness = sympy_var['tsv_dielec_thickness'] - self.tsv_contact_resistance = sympy_var['tsv_contact_resistance'] - self.tsv_depletion_width = sympy_var['tsv_depletion_width'] - self.tsv_liner_dielectric_constant = sympy_var['tsv_liner_dielectric_cons'] - - # for line in lines: - # if line.startswith("-tsv_pitch"): - # self.tsv_pitch = scan_input_double_tsv_type(line, "-tsv_pitch", "F/um", g_ip.ic_proj_type, tsv_type, g_ip.print_detail_debug) - # elif line.startswith("-tsv_diameter"): - # self.tsv_diameter = scan_input_double_tsv_type(line, "-tsv_diameter", "F/um", g_ip.ic_proj_type, tsv_type, g_ip.print_detail_debug) - # elif line.startswith("-tsv_length"): - # self.tsv_length = scan_input_double_tsv_type(line, "-tsv_length", "F/um", g_ip.ic_proj_type, tsv_type, g_ip.print_detail_debug) - # elif line.startswith("-tsv_dielec_thickness"): - # self.tsv_dielec_thickness = scan_input_double_tsv_type(line, "-tsv_dielec_thickness", "F/um", g_ip.ic_proj_type, tsv_type, g_ip.print_detail_debug) - # elif line.startswith("-tsv_contact_resistance"): - # self.tsv_contact_resistance = scan_input_double_tsv_type(line, "-tsv_contact_resistance", "F/um", g_ip.ic_proj_type, tsv_type, g_ip.print_detail_debug) - # elif line.startswith("-tsv_depletion_width"): - # self.tsv_depletion_width = scan_input_double_tsv_type(line, "-tsv_depletion_width", "F/um", g_ip.ic_proj_type, tsv_type, g_ip.print_detail_debug) - # elif line.startswith("-tsv_liner_dielectric_cons"): - # self.tsv_liner_dielectric_constant = scan_input_double_tsv_type(line, "-tsv_liner_dielectric_cons", "F/um", g_ip.ic_proj_type, tsv_type, g_ip.print_detail_debug) - - self.tsv_length *= g_ip.num_die_3d - if iter == 0: - self.tsv_parasitic_resistance_fine = tsv_resistance(BULK_CU_RESISTIVITY, self.tsv_length, self.tsv_diameter, self.tsv_contact_resistance) - self.tsv_parasitic_capacitance_fine = tsv_capacitance(self.tsv_length, self.tsv_diameter, self.tsv_pitch, self.tsv_dielec_thickness, self.tsv_liner_dielectric_constant, self.tsv_depletion_width) - self.tsv_minimum_area_fine = tsv_area(self.tsv_pitch) - else: - self.tsv_parasitic_resistance_coarse = tsv_resistance(BULK_CU_RESISTIVITY, self.tsv_length, self.tsv_diameter, self.tsv_contact_resistance) - self.tsv_parasitic_capacitance_coarse = tsv_capacitance(self.tsv_length, self.tsv_diameter, self.tsv_pitch, self.tsv_dielec_thickness, self.tsv_liner_dielectric_constant, self.tsv_depletion_width) - self.tsv_minimum_area_coarse = tsv_area(self.tsv_pitch) - - def init(self, technology, is_tag): - self.reset() - ram_cell_tech_type = g_ip.tag_arr_ram_cell_tech_type if is_tag else g_ip.data_arr_ram_cell_tech_type - peri_global_tech_type = g_ip.tag_arr_peri_global_tech_type if is_tag else g_ip.data_arr_peri_global_tech_type - tech_lo, tech_hi = 0, 0 - in_file_lo, in_file_hi = "", "" - - technology *= 1000.0 # in the unit of nm - - tech_lo, in_file_lo, tech_hi, in_file_hi = self.find_upper_and_lower_tech(technology, tech_lo, in_file_lo, tech_hi, in_file_hi) - - if (tech_lo == 22) and (tech_hi == 22): - if ram_cell_tech_type == 3: - print("current version does not support eDRAM technologies at 22nm") - exit(0) - - alpha = 1 if tech_lo == tech_hi else (technology - tech_hi) / (tech_lo - tech_hi) - print(in_file_lo) - with open(in_file_lo, "r") as fp: - lines = fp.readlines() - - self.dram_cell_I_on = 0 - self.dram_cell_Vdd = 0 - self.dram_cell_C = 0 - self.dram_cell_I_off_worst_case_len_temp = 0 - self.vpp = 0 - self.macro_layout_overhead = 0 - self.chip_layout_overhead = 0 - self.sckt_co_eff = 0 - - self.dram_cell_I_on = sympy_var['dram_cell_I_on'] - self.dram_cell_Vdd = sympy_var['dram_cell_Vdd'] - self.dram_cell_C = sympy_var['dram_cell_C'] - self.dram_cell_I_off_worst_case_len_temp = sympy_var['dram_cell_I_off_worst_case_len_temp'] - self.vpp = sympy_var['vpp'] - self.sckt_co_eff = sympy_var['sckt_co_eff'] - self.chip_layout_overhead = sympy_var['chip_layout_overhead'] - self.macro_layout_overhead = sympy_var['macro_layout_overhead'] - - # for line in lines: - # if line.startswith("-dram_cell_I_on"): - # self.dram_cell_I_on += alpha * scan_five_input_double(line, "-dram_cell_I_on", "F/um", ram_cell_tech_type, g_ip.print_detail_debug) - # elif line.startswith("-dram_cell_Vdd"): - # self.dram_cell_Vdd += alpha * scan_five_input_double(line, "-dram_cell_Vdd", "F/um", ram_cell_tech_type, g_ip.print_detail_debug) - # elif line.startswith("-dram_cell_C"): - # self.dram_cell_C += alpha * scan_five_input_double(line, "-dram_cell_C", "F/um", ram_cell_tech_type, g_ip.print_detail_debug) - # elif line.startswith("-dram_cell_I_off_worst_case_len_temp"): - # self.dram_cell_I_off_worst_case_len_temp += alpha * scan_five_input_double(line, "-dram_cell_I_off_worst_case_len_temp", "F/um", ram_cell_tech_type, g_ip.print_detail_debug) - # elif line.startswith("-vpp"): - # self.vpp += alpha * scan_five_input_double(line, "-vpp", "F/um", ram_cell_tech_type, g_ip.print_detail_debug) - # elif line.startswith("-sckt_co_eff"): - # self.sckt_co_eff += alpha * scan_single_input_double(line, "-sckt_co_eff", "F/um", g_ip.print_detail_debug) - # elif line.startswith("-chip_layout_overhead"): - # self.chip_layout_overhead += alpha * scan_single_input_double(line, "-chip_layout_overhead", "F/um", g_ip.print_detail_debug) - # elif line.startswith("-macro_layout_overhead"): - # self.macro_layout_overhead += alpha * scan_single_input_double(line, "-macro_layout_overhead", "F/um", g_ip.print_detail_debug) - - peri_global_lo = DeviceType() - peri_global_hi = DeviceType() - peri_global_lo.assign(in_file_lo, peri_global_tech_type, g_ip.temp) - print("peri lo") - peri_global_lo.display() - print() - peri_global_hi.assign(in_file_hi, peri_global_tech_type, g_ip.temp) - print("peri hi") - peri_global_hi.display() - print() - - self.peri_global.interpolate(alpha, peri_global_lo, peri_global_hi) - print("peri_global") - self.peri_global.display() - print() - - sleep_tx_lo = DeviceType() - sleep_tx_hi = DeviceType() - sleep_tx_lo.assign(in_file_lo, 1, g_ip.temp) - sleep_tx_hi.assign(in_file_hi, 1, g_ip.temp) - self.sleep_tx.interpolate(alpha, sleep_tx_lo, sleep_tx_hi) - - sram_cell_lo = DeviceType() - sram_cell_hi = DeviceType() - sram_cell_lo.assign(in_file_lo, ram_cell_tech_type, g_ip.temp) - sram_cell_hi.assign(in_file_hi, ram_cell_tech_type, g_ip.temp) - self.sram_cell.interpolate(alpha, sram_cell_lo, sram_cell_hi) - - dram_acc_lo = DeviceType() - dram_acc_hi = DeviceType() - dram_acc_lo.assign(in_file_lo, ram_cell_tech_type if ram_cell_tech_type == comm_dram else dram_cell_tech_flavor, g_ip.temp) - dram_acc_hi.assign(in_file_hi, ram_cell_tech_type if ram_cell_tech_type == comm_dram else dram_cell_tech_flavor, g_ip.temp) - self.dram_acc.interpolate(alpha, dram_acc_lo, dram_acc_hi) - if tech_lo <= 22: - pass - elif tech_lo <= 32: - self.dram_acc.Vth = 0.44129 if ram_cell_tech_type == lp_dram else 1.0 - elif tech_lo <= 45: - self.dram_acc.Vth = 0.44559 if ram_cell_tech_type == lp_dram else 1.0 - elif tech_lo <= 65: - self.dram_acc.Vth = 0.43806 if ram_cell_tech_type == lp_dram else 1.0 - elif tech_lo <= 90: - self.dram_acc.Vth = 0.4545 if ram_cell_tech_type == lp_dram else 1.0 - - self.dram_acc.Vdd = 0.0 - self.dram_acc.I_on_p = 0.0 - self.dram_acc.I_off_n = 0.0 - self.dram_acc.I_off_p = 0.0 - self.dram_acc.C_ox = 0.0 - self.dram_acc.t_ox = 0.0 - self.dram_acc.n_to_p_eff_curr_drv_ratio = 0.0 - - dram_wl_lo = DeviceType() - dram_wl_hi = DeviceType() - dram_wl_lo.assign(in_file_lo, ram_cell_tech_type if ram_cell_tech_type == comm_dram else dram_cell_tech_flavor, g_ip.temp) - dram_wl_hi.assign(in_file_hi, ram_cell_tech_type if ram_cell_tech_type == comm_dram else dram_cell_tech_flavor, g_ip.temp) - self.dram_wl.interpolate(alpha, dram_wl_lo, dram_wl_hi) - - self.dram_wl.Vdd = 0.0 - self.dram_wl.Vth = 0.0 - self.dram_wl.I_on_p = 0.0 - self.dram_wl.C_ox = 0.0 - self.dram_wl.t_ox = 0.0 - - if ram_cell_tech_type < 3: - self.dram_acc.reset() - self.dram_wl.reset() - - cam_cell_lo = DeviceType() - cam_cell_hi = DeviceType() - cam_cell_lo.assign(in_file_lo, ram_cell_tech_type, g_ip.temp) - cam_cell_hi.assign(in_file_hi, ram_cell_tech_type, g_ip.temp) - self.cam_cell.interpolate(alpha, cam_cell_lo, cam_cell_hi) - - dram_lo = MemoryType() - dram_hi = MemoryType() - dram_lo.assign(in_file_lo, ram_cell_tech_type, 2) # cell_type = dram(2) - dram_hi.assign(in_file_hi, ram_cell_tech_type, 2) - self.dram.interpolate(alpha, dram_lo, dram_hi) - - sram_lo = MemoryType() - sram_hi = MemoryType() - sram_lo.assign(in_file_lo, ram_cell_tech_type, 0) # cell_type = sram(0) - sram_hi.assign(in_file_hi, ram_cell_tech_type, 0) - self.sram.interpolate(alpha, sram_lo, sram_hi) - - cam_lo = MemoryType() - cam_hi = MemoryType() - cam_lo.assign(in_file_lo, ram_cell_tech_type, 1) # cell_type = sram(0) - cam_hi.assign(in_file_hi, ram_cell_tech_type, 1) - self.cam.interpolate(alpha, cam_lo, cam_hi) - - scaling_factor_lo = ScalingFactor() - scaling_factor_hi = ScalingFactor() - scaling_factor_lo.assign(in_file_lo) - scaling_factor_hi.assign(in_file_hi) - self.scaling_factor.interpolate(alpha, scaling_factor_lo, scaling_factor_hi) - - self.peri_global.Vcc_min += (alpha * peri_global_lo.Vdd + (1 - alpha) * peri_global_hi.Vdd) * 0.35 - self.sleep_tx.Vcc_min += alpha * sleep_tx_lo.Vdd + (1 - alpha) * sleep_tx_hi.Vdd - self.sram_cell.Vcc_min += (alpha * sram_cell_lo.Vdd + (1 - alpha) * sram_cell_hi.Vdd) * 0.65 - - with open(in_file_hi, "r") as fp: - lines = fp.readlines() - - self.sense_delay = sympy_var['sense_delay'] - self.sense_dy_power = sympy_var['sense_dy_power'] - self.sckt_co_eff = sympy_var['sckt_co_eff'] - self.chip_layout_overhead = sympy_var['chip_layout_overhead'] - self.macro_layout_overhead = sympy_var['macro_layout_overhead'] - self.dram_cell_I_on = sympy_var['dram_cell_I_on'] - self.dram_cell_Vdd = sympy_var['dram_cell_Vdd'] - self.dram_cell_C = sympy_var['dram_cell_C'] - self.dram_cell_I_off_worst_case_len_temp = sympy_var['dram_cell_I_off_worst_case_len_temp'] - self.vpp = sympy_var['vpp'] - - # for line in lines: - # if line.startswith("-sense_delay"): - # self.sense_delay = scan_single_input_double(line, "-sense_delay", "F/um", g_ip.print_detail_debug) - # elif line.startswith("-sense_dy_power"): - # self.sense_dy_power = scan_single_input_double(line, "-sense_dy_power", "F/um", g_ip.print_detail_debug) - # elif line.startswith("-sckt_co_eff"): - # self.sckt_co_eff += (1 - alpha) * scan_single_input_double(line, "-sckt_co_eff", "F/um", g_ip.print_detail_debug) - # elif line.startswith("-chip_layout_overhead"): - # self.chip_layout_overhead += (1 - alpha) * scan_single_input_double(line, "-chip_layout_overhead", "F/um", g_ip.print_detail_debug) - # elif line.startswith("-macro_layout_overhead"): - # self.macro_layout_overhead += (1 - alpha) * scan_single_input_double(line, "-macro_layout_overhead", "F/um", g_ip.print_detail_debug) - # elif line.startswith("-dram_cell_I_on"): - # self.dram_cell_I_on += (1 - alpha) * scan_five_input_double(line, "-dram_cell_I_on", "F/um", ram_cell_tech_type, g_ip.print_detail_debug) - # elif line.startswith("-dram_cell_Vdd"): - # self.dram_cell_Vdd += (1 - alpha) * scan_five_input_double(line, "-dram_cell_Vdd", "F/um", ram_cell_tech_type, g_ip.print_detail_debug) - # elif line.startswith("-dram_cell_C"): - # self.dram_cell_C += (1 - alpha) * scan_five_input_double(line, "-dram_cell_C", "F/um", ram_cell_tech_type, g_ip.print_detail_debug) - # elif line.startswith("-dram_cell_I_off_worst_case_len_temp"): - # self.dram_cell_I_off_worst_case_len_temp += (1 - alpha) * scan_five_input_double(line, "-dram_cell_I_off_worst_case_len_temp", "F/um", ram_cell_tech_type, g_ip.print_detail_debug) - # elif line.startswith("-vpp"): - # self.vpp += (1 - alpha) * scan_five_input_double(line, "-vpp", "F/um", ram_cell_tech_type, g_ip.print_detail_debug) - - self.w_comp_inv_p1 = 12.5 * g_ip.F_sz_um - self.w_comp_inv_n1 = 7.5 * g_ip.F_sz_um - self.w_comp_inv_p2 = 25 * g_ip.F_sz_um - self.w_comp_inv_n2 = 15 * g_ip.F_sz_um - self.w_comp_inv_p3 = 50 * g_ip.F_sz_um - self.w_comp_inv_n3 = 30 * g_ip.F_sz_um - self.w_eval_inv_p = 100 * g_ip.F_sz_um - self.w_eval_inv_n = 50 * g_ip.F_sz_um - self.w_comp_n = 12.5 * g_ip.F_sz_um - self.w_comp_p = 37.5 * g_ip.F_sz_um - self.MIN_GAP_BET_P_AND_N_DIFFS = 5 * g_ip.F_sz_um - self.MIN_GAP_BET_SAME_TYPE_DIFFS = 1.5 * g_ip.F_sz_um - self.HPOWERRAIL = 2 * g_ip.F_sz_um - self.cell_h_def = 50 * g_ip.F_sz_um - self.w_poly_contact = g_ip.F_sz_um - self.spacing_poly_to_contact = g_ip.F_sz_um - self.spacing_poly_to_poly = 1.5 * g_ip.F_sz_um - self.ram_wl_stitching_overhead_ = 7.5 * g_ip.F_sz_um - self.min_w_nmos_ = 3 * g_ip.F_sz_um / 2 - self.max_w_nmos_ = 100 * g_ip.F_sz_um - self.w_iso = 12.5 * g_ip.F_sz_um - self.w_sense_n = 3.75 * g_ip.F_sz_um - self.w_sense_p = 7.5 * g_ip.F_sz_um - self.w_sense_en = 5 * g_ip.F_sz_um - self.w_nmos_b_mux = 6 * self.min_w_nmos_ - self.w_nmos_sa_mux = 6 * self.min_w_nmos_ - self.w_pmos_bl_precharge = 6 * pmos_to_nmos_sz_ratio() * self.min_w_nmos_ - self.w_pmos_bl_eq = pmos_to_nmos_sz_ratio() * self.min_w_nmos_ - - if ram_cell_tech_type == comm_dram: - self.max_w_nmos_dec = 8 * g_ip.F_sz_um - self.h_dec = 8 # in the unit of memory cell height - else: - self.max_w_nmos_dec = self.max_w_nmos_ - self.h_dec = 4 # in the unit of memory cell height - - #TODO CHECK 388 for 180nm - print(self.peri_global.l_elec) - gmn_sense_amp_latch = (self.peri_global.Mobility_n / 2) * self.peri_global.C_ox * (self.w_sense_n / self.peri_global.l_elec) * self.peri_global.Vdsat - gmp_sense_amp_latch = self.peri_global.gmp_to_gmn_multiplier * gmn_sense_amp_latch - self.gm_sense_amp_latch = gmn_sense_amp_latch + gmp_sense_amp_latch - - wire_local_lo = InterconnectType() - wire_local_hi = InterconnectType() - wire_local_lo.assign(in_file_lo, g_ip.ic_proj_type, 3 if ram_cell_tech_type == comm_dram else 0) - wire_local_hi.assign(in_file_hi, g_ip.ic_proj_type, 3 if ram_cell_tech_type == comm_dram else 0) - self.wire_local.interpolate(alpha, wire_local_lo, wire_local_hi) - - wire_inside_mat_lo = InterconnectType() - wire_inside_mat_hi = InterconnectType() - wire_inside_mat_lo.assign(in_file_lo, g_ip.ic_proj_type, g_ip.wire_is_mat_type) - wire_inside_mat_hi.assign(in_file_hi, g_ip.ic_proj_type, g_ip.wire_is_mat_type) - self.wire_inside_mat.interpolate(alpha, wire_inside_mat_lo, wire_inside_mat_hi) - - wire_outside_mat_lo = InterconnectType() - wire_outside_mat_hi = InterconnectType() - wire_outside_mat_lo.assign(in_file_lo, g_ip.ic_proj_type, g_ip.wire_os_mat_type) - wire_outside_mat_hi.assign(in_file_hi, g_ip.ic_proj_type, g_ip.wire_os_mat_type) - self.wire_outside_mat.interpolate(alpha, wire_outside_mat_lo, wire_outside_mat_hi) - - self.unit_len_wire_del = self.wire_inside_mat.R_per_um * self.wire_inside_mat.C_per_um / 2 - - self.assign_tsv(in_file_hi) - - self.fringe_cap = wire_local_hi.fringe_cap - - rd = tr_R_on(self.min_w_nmos_, NCH, 1) - p_to_n_sizing_r = pmos_to_nmos_sz_ratio() - c_load = gate_C(self.min_w_nmos_ * (1 + p_to_n_sizing_r), 0.0) - tf = rd * c_load - self.kinv = horowitz(0, tf, 0.5, 0.5, RISE) - KLOAD = 1 - c_load = KLOAD * (drain_C_(self.min_w_nmos_, NCH, 1, 1, self.cell_h_def) + - drain_C_(self.min_w_nmos_ * p_to_n_sizing_r, PCH, 1, 1, self.cell_h_def) + - gate_C(self.min_w_nmos_ * 4 * (1 + p_to_n_sizing_r), 0.0)) - tf = rd * c_load - self.FO4 = horowitz(0, tf, 0.5, 0.5, RISE) - - def isEqual(self, tech): - if not is_equal(self.ram_wl_stitching_overhead_, tech.ram_wl_stitching_overhead_): - assert False # fs - if not is_equal(self.min_w_nmos_, tech.min_w_nmos_): - assert False # fs - if not is_equal(self.max_w_nmos_, tech.max_w_nmos_): - assert False # fs - if not is_equal(self.max_w_nmos_dec, tech.max_w_nmos_dec): - assert False # fs + ram_cell_tech_type - if not is_equal(self.unit_len_wire_del, tech.unit_len_wire_del): - assert False # wire_inside_mat - if not is_equal(self.FO4, tech.FO4): - assert False # fs - if not is_equal(self.kinv, tech.kinv): - assert False # fs - if not is_equal(self.vpp, tech.vpp): - assert False # input - if not is_equal(self.w_sense_en, tech.w_sense_en): - assert False # fs - if not is_equal(self.w_sense_n, tech.w_sense_n): - assert False # fs - if not is_equal(self.w_sense_p, tech.w_sense_p): - assert False # fs - if not is_equal(self.sense_delay, tech.sense_delay): - PRINT("sense_delay", self.sense_delay, tech) - assert False # input - if not is_equal(self.sense_dy_power, tech.sense_dy_power): - assert False # input - if not is_equal(self.w_iso, tech.w_iso): - assert False # fs - if not is_equal(self.w_poly_contact, tech.w_poly_contact): - assert False # fs - if not is_equal(self.spacing_poly_to_poly, tech.spacing_poly_to_poly): - assert False # fs - if not is_equal(self.spacing_poly_to_contact, tech.spacing_poly_to_contact): - assert False # fs - - # CACTI3D auxiliary variables - # if not is_equal(self.tsv_pitch, tech.tsv_pitch): - # assert False - # if not is_equal(self.tsv_diameter, tech.tsv_diameter): - # assert False - # if not is_equal(self.tsv_length, tech.tsv_length): - # assert False - # if not is_equal(self.tsv_dielec_thickness, tech.tsv_dielec_thickness): - # assert False - # if not is_equal(self.tsv_contact_resistance, tech.tsv_contact_resistance): - # assert False - # if not is_equal(self.tsv_depletion_width, tech.tsv_depletion_width): - # assert False - # if not is_equal(self.tsv_liner_dielectric_constant, tech.tsv_liner_dielectric_constant): - # assert False - - # CACTI3DD TSV params - if not is_equal(self.tsv_parasitic_capacitance_fine, tech.tsv_parasitic_capacitance_fine): - PRINT("tsv_parasitic_capacitance_fine", self.tsv_parasitic_capacitance_fine, tech) - assert False - if not is_equal(self.tsv_parasitic_resistance_fine, tech.tsv_parasitic_resistance_fine): - assert False - if not is_equal(self.tsv_minimum_area_fine, tech.tsv_minimum_area_fine): - assert False - - if not is_equal(self.tsv_parasitic_capacitance_coarse, tech.tsv_parasitic_capacitance_coarse): - assert False - if not is_equal(self.tsv_parasitic_resistance_coarse, tech.tsv_parasitic_resistance_coarse): - assert False - if not is_equal(self.tsv_minimum_area_coarse, tech.tsv_minimum_area_coarse): - assert False - - # fs - if not is_equal(self.w_comp_inv_p1, tech.w_comp_inv_p1): - assert False - if not is_equal(self.w_comp_inv_p2, tech.w_comp_inv_p2): - assert False - if not is_equal(self.w_comp_inv_p3, tech.w_comp_inv_p3): - assert False - if not is_equal(self.w_comp_inv_n1, tech.w_comp_inv_n1): - assert False - if not is_equal(self.w_comp_inv_n2, tech.w_comp_inv_n2): - assert False - if not is_equal(self.w_comp_inv_n3, tech.w_comp_inv_n3): - assert False - if not is_equal(self.w_eval_inv_p, tech.w_eval_inv_p): - assert False - if not is_equal(self.w_eval_inv_n, tech.w_eval_inv_n): - assert False - if not is_equal(self.w_comp_n, tech.w_comp_n): - assert False - if not is_equal(self.w_comp_p, tech.w_comp_p): - assert False - - if not is_equal(self.dram_cell_I_on, tech.dram_cell_I_on): - assert False # ram_cell_tech_type - if not is_equal(self.dram_cell_Vdd, tech.dram_cell_Vdd): - assert False - if not is_equal(self.dram_cell_I_off_worst_case_len_temp, tech.dram_cell_I_off_worst_case_len_temp): - assert False - if not is_equal(self.dram_cell_C, tech.dram_cell_C): - assert False - if not is_equal(self.gm_sense_amp_latch, tech.gm_sense_amp_latch): - assert False # depends on many things - - if not is_equal(self.w_nmos_b_mux, tech.w_nmos_b_mux): - assert False # fs - if not is_equal(self.w_nmos_sa_mux, tech.w_nmos_sa_mux): - assert False # fs - if not is_equal(self.w_pmos_bl_precharge, tech.w_pmos_bl_precharge): - PRINT("w_pmos_bl_precharge", self.w_pmos_bl_precharge, tech) - assert False # fs - if not is_equal(self.w_pmos_bl_eq, tech.w_pmos_bl_eq): - assert False # fs - if not is_equal(self.MIN_GAP_BET_P_AND_N_DIFFS, tech.MIN_GAP_BET_P_AND_N_DIFFS): - assert False # fs - if not is_equal(self.MIN_GAP_BET_SAME_TYPE_DIFFS, tech.MIN_GAP_BET_SAME_TYPE_DIFFS): - assert False # fs - if not is_equal(self.HPOWERRAIL, tech.HPOWERRAIL): - assert False # fs - if not is_equal(self.cell_h_def, tech.cell_h_def): - assert False # fs - - if not is_equal(self.chip_layout_overhead, tech.chip_layout_overhead): - assert False # input - if not is_equal(self.macro_layout_overhead, tech.macro_layout_overhead): - print(f"{self.macro_layout_overhead} vs. {tech.macro_layout_overhead}") - assert False - if not is_equal(self.sckt_co_eff, tech.sckt_co_eff): - assert False - - if not is_equal(self.fringe_cap, tech.fringe_cap): - PRINT("fringe_cap", self.fringe_cap, tech) - assert False # input - - if self.h_dec != tech.h_dec: - assert False # ram_cell_tech_type - - print("sram_cell") - self.sram_cell.isEqual(tech.sram_cell) # SRAM cell transistor - print("dram_acc") - self.dram_acc.isEqual(tech.dram_acc) # DRAM access transistor - print("dram_wl") - self.dram_wl.isEqual(tech.dram_wl) # DRAM wordline transistor - print("peri_global") - self.peri_global.isEqual(tech.peri_global) # peripheral global - print("cam_cell") - self.cam_cell.isEqual(tech.cam_cell) # SRAM cell transistor - - print("sleep_tx") - self.sleep_tx.isEqual(tech.sleep_tx) # Sleep transistor cell transistor - - print("wire_local") - self.wire_local.isEqual(tech.wire_local) - print("wire_inside_mat") - self.wire_inside_mat.isEqual(tech.wire_inside_mat) - print("wire_outside_mat") - self.wire_outside_mat.isEqual(tech.wire_outside_mat) - - print("scaling_factor") - self.scaling_factor.isEqual(tech.scaling_factor) - print("sram:") - self.sram.isEqual(tech.sram) - print("dram:") - self.dram.isEqual(tech.dram) - print("cam:") - self.cam.isEqual(tech.cam) - - return True - -def is_equal(first, second): - if first == 0 and second == 0: - return True - - if second == 0 or sp.isnan(second): - return True - - if sp.isnan(first) or sp.isnan(second): - return True - - if first == 0: - if abs(first - second) < (second * 0.000001): - return True - else: - if abs(first - second) < (first * 0.000001): - return True - - return False - -class DeviceType: - def __init__(self): - self.C_g_ideal = 0 - self.C_fringe = 0 - self.C_overlap = 0 - self.C_junc = 0 # C_junc_area - self.C_junc_sidewall = 0 - self.l_phy = 0 - self.l_elec = 0 - self.R_nch_on = 0 - self.R_pch_on = 0 - self.Vdd = 0 - self.Vth = 0 - self.Vcc_min = 0 # allowed min vcc; for memory cell it is the lowest vcc for data retention. for logic it is the vcc to balance the leakage reduction and wakeup latency - self.I_on_n = 0 - self.I_on_p = 0 - self.I_off_n = 0 - self.I_off_p = 0 - self.I_g_on_n = 0 - self.I_g_on_p = 0 - self.C_ox = 0 - self.t_ox = 0 - self.n_to_p_eff_curr_drv_ratio = 0 - self.long_channel_leakage_reduction = 0 - self.Mobility_n = 0 - - # auxiliary parameters - self.Vdsat = 0 - self.gmp_to_gmn_multiplier = 0 - - def reset(self): - self.C_g_ideal = 0 - self.C_fringe = 0 - self.C_overlap = 0 - self.C_junc = 0 # C_junc_area - self.C_junc_sidewall = 0 - self.l_phy = 0 - self.l_elec = 0 - self.R_nch_on = 0 - self.R_pch_on = 0 - self.Vdd = 0 - self.Vth = 0 - self.Vcc_min = 0 # allowed min vcc; for memory cell it is the lowest vcc for data retention. for logic it is the vcc to balance the leakage reduction and wakeup latency - self.I_on_n = 0 - self.I_on_p = 0 - self.I_off_n = 0 - self.I_off_p = 0 - self.I_g_on_n = 0 - self.I_g_on_p = 0 - self.C_ox = 0 - self.t_ox = 0 - self.n_to_p_eff_curr_drv_ratio = 0 - self.long_channel_leakage_reduction = 0 - self.Mobility_n = 0 - - # auxiliary parameters - self.Vdsat = 0 - self.gmp_to_gmn_multiplier = 0 - - def display(self, indent=0): - indent_str = ' ' * indent - print(f"{indent_str}C_g_ideal = {self.C_g_ideal} F/um") - print(f"{indent_str}C_fringe = {self.C_fringe} F/um") - print(f"{indent_str}C_overlap = {self.C_overlap} F/um") - print(f"{indent_str}C_junc = {self.C_junc} F/um^2") - print(f"{indent_str}C_junc_sw = {self.C_junc_sidewall} F/um^2") - print(f"{indent_str}l_phy = {self.l_phy} um") - print(f"{indent_str}l_elec = {self.l_elec} um") - print(f"{indent_str}R_nch_on = {self.R_nch_on} ohm-um") - print(f"{indent_str}R_pch_on = {self.R_pch_on} ohm-um") - print(f"{indent_str}Vdd = {self.Vdd} V") - print(f"{indent_str}Vth = {self.Vth} V") - print(f"{indent_str}I_on_n = {self.I_on_n} A/um") - print(f"{indent_str}I_on_p = {self.I_on_p} A/um") - print(f"{indent_str}I_off_n = {self.I_off_n} A/um") - print(f"{indent_str}I_off_p = {self.I_off_p} A/um") - print(f"{indent_str}C_ox = {self.C_ox} F/um^2") - print(f"{indent_str}t_ox = {self.t_ox} um") - print(f"{indent_str}n_to_p_eff_curr_drv_ratio = {self.n_to_p_eff_curr_drv_ratio}") - - def isEqual(self, dev): - if not is_equal(self.C_g_ideal, dev.C_g_ideal): self.display(); print("\n\n\n"); dev.display(); assert False - if not is_equal(self.C_fringe, dev.C_fringe): self.display(); print("\n\n\n"); dev.display(); assert False - if not is_equal(self.C_overlap, dev.C_overlap): self.display(); print("\n\n\n"); dev.display(); assert False - if not is_equal(self.C_junc, dev.C_junc): self.display(); print("\n\n\n"); dev.display(); assert False - if not is_equal(self.C_junc_sidewall, dev.C_junc_sidewall): self.display(); print("\n\n\n"); dev.display(); assert False - if not is_equal(self.l_phy, dev.l_phy): self.display(); print("\n\n\n"); dev.display(); assert False - if not is_equal(self.l_elec, dev.l_elec): self.display(); print("\n\n\n"); dev.display(); assert False - if not is_equal(self.R_nch_on, dev.R_nch_on): self.display(); print("\n\n\n"); dev.display(); assert False - if not is_equal(self.R_pch_on, dev.R_pch_on): self.display(); print("\n\n\n"); dev.display(); assert False - if not is_equal(self.Vdd, dev.Vdd): self.display(); print("\n\n\n"); dev.display(); assert False - if not is_equal(self.Vth, dev.Vth): self.display(); print("\n\n\n"); dev.display(); assert False - if not is_equal(self.I_on_n, dev.I_on_n): self.display(); print("\n\n\n"); dev.display(); assert False - if not is_equal(self.I_on_p, dev.I_on_p): self.display(); print("\n\n\n"); dev.display(); assert False - if not is_equal(self.I_off_n, dev.I_off_n): self.display(); print("\n\n\n"); dev.display(); assert False - if not is_equal(self.I_off_p, dev.I_off_p): self.display(); print("\n\n\n"); dev.display(); assert False - if not is_equal(self.I_g_on_n, dev.I_g_on_n): self.display(); print("\n\n\n"); dev.display(); assert False - if not is_equal(self.I_g_on_p, dev.I_g_on_p): self.display(); print("\n\n\n"); dev.display(); assert False - if not is_equal(self.C_ox, dev.C_ox): self.display(); print("\n\n\n"); dev.display(); assert False - if not is_equal(self.t_ox, dev.t_ox): self.display(); print("\n\n\n"); dev.display(); assert False - if not is_equal(self.n_to_p_eff_curr_drv_ratio, dev.n_to_p_eff_curr_drv_ratio): self.display(); print("\n\n\n"); dev.display(); assert False - if not is_equal(self.long_channel_leakage_reduction, dev.long_channel_leakage_reduction): self.display(); print("\n\n\n"); dev.display(); assert False - if not is_equal(self.Mobility_n, dev.Mobility_n): self.display(); print("\n\n\n"); dev.display(); assert False - return True - - def assign(self, in_file, tech_flavor, temperature): - with open(in_file, 'r') as fp: - lines = fp.readlines() - - nmos_effective_resistance_multiplier = 0 - - self.C_g_ideal = sympy_var['C_g_ideal'] - self.C_fringe = sympy_var['C_fringe'] - self.C_junc_sidewall = sympy_var['C_junc_sw'] - self.C_junc = sympy_var['C_junc'] - self.l_phy = sympy_var['l_phy'] - self.l_elec = sympy_var['l_elec'] - self.nmos_effective_resistance_multiplier = sympy_var['nmos_effective_resistance_multiplier'] - self.Vdd = sympy_var['Vdd'] - self.Vth = sympy_var['Vth'] - self.Vdsat = sympy_var['Vdsat'] - self.I_on_n = sympy_var['I_on_n'] - self.I_on_p = sympy_var['I_on_p'] - self.I_off_n = sympy_var['I_off_n'] - self.I_g_on_n = sympy_var['I_g_on_n'] - self.C_ox = sympy_var['C_ox'] - self.t_ox = sympy_var['t_ox'] - self.n_to_p_eff_curr_drv_ratio = sympy_var['n2p_drv_rt'] - self.long_channel_leakage_reduction = sympy_var['lch_lk_rdc'] - self.Mobility_n = sympy_var['Mobility_n'] - self.gmp_to_gmn_multiplier = sympy_var['gmp_to_gmn_multiplier'] - - # for line in lines: - # if line.startswith("-C_g_ideal"): - # self.C_g_ideal = scan_five_input_double(line, "-C_g_ideal", "F/um", tech_flavor, g_ip.print_detail_debug) - # continue - # if line.startswith("-C_fringe"): - # self.C_fringe = scan_five_input_double(line, "-C_fringe", "F/um", tech_flavor, g_ip.print_detail_debug) - # continue - # if line.startswith("-C_junc_sw"): - # self.C_junc_sidewall = scan_five_input_double(line, "-C_junc_sw", "F/um", tech_flavor, g_ip.print_detail_debug) - # continue - # if line.startswith("-C_junc"): - # self.C_junc = scan_five_input_double(line, "-C_junc", "F/um", tech_flavor, g_ip.print_detail_debug) - # continue - # if line.startswith("-l_phy"): - # self.l_phy = scan_five_input_double(line, "-l_phy", "F/um", tech_flavor, g_ip.print_detail_debug) - # continue - # if line.startswith("-l_elec"): - # print("HERE!") - # self.l_elec = scan_five_input_double(line, "-l_elec", "F/um", tech_flavor, g_ip.print_detail_debug) - # print(self.l_elec) - # print() - # continue - # if line.startswith("-nmos_effective_resistance_multiplier"): - # nmos_effective_resistance_multiplier = scan_five_input_double(line, "-nmos_effective_resistance_multiplier", "F/um", tech_flavor, g_ip.print_detail_debug) - # continue - # if line.startswith("-Vdd"): - # self.Vdd = scan_five_input_double(line, "-Vdd", "F/um", tech_flavor, g_ip.print_detail_debug) - # continue - # if line.startswith("-Vth"): - # self.Vth = scan_five_input_double(line, "-Vth", "F/um", tech_flavor, g_ip.print_detail_debug) - # continue - # if line.startswith("-Vdsat"): - # self.Vdsat = scan_five_input_double(line, "-Vdsat", "V", tech_flavor, g_ip.print_detail_debug) - # continue - # if line.startswith("-I_on_n"): - # self.I_on_n = scan_five_input_double(line, "-I_on_n", "F/um", tech_flavor, g_ip.print_detail_debug) - # continue - # if line.startswith("-I_on_p"): - # self.I_on_p = scan_five_input_double(line, "-I_on_p", "F/um", tech_flavor, g_ip.print_detail_debug) - # continue - # if line.startswith("-I_off_n"): - # scan_five_input_double_temperature(line, "-I_off_n", "F/um", tech_flavor, temperature, g_ip.print_detail_debug, self.I_off_n) - # continue - # if line.startswith("-I_g_on_n"): - # scan_five_input_double_temperature(line, "-I_g_on_n", "F/um", tech_flavor, temperature, g_ip.print_detail_debug, self.I_g_on_n) - # continue - # if line.startswith("-C_ox"): - # self.C_ox = scan_five_input_double(line, "-C_ox", "F/um", tech_flavor, g_ip.print_detail_debug) - # continue - # if line.startswith("-t_ox"): - # self.t_ox = scan_five_input_double(line, "-t_ox", "F/um", tech_flavor, g_ip.print_detail_debug) - # continue - # if line.startswith("-n2p_drv_rt"): - # self.n_to_p_eff_curr_drv_ratio = scan_five_input_double(line, "-n2p_drv_rt", "F/um", tech_flavor, g_ip.print_detail_debug) - # continue - # if line.startswith("-lch_lk_rdc"): - # self.long_channel_leakage_reduction = scan_five_input_double(line, "-lch_lk_rdc", "F/um", tech_flavor, g_ip.print_detail_debug) - # continue - # if line.startswith("-Mobility_n"): - # self.Mobility_n = scan_five_input_double(line, "-Mobility_n", "F/um", tech_flavor, g_ip.print_detail_debug) - # continue - # if line.startswith("-gmp_to_gmn_multiplier"): - # self.gmp_to_gmn_multiplier = scan_five_input_double(line, "-gmp_to_gmn_multiplier", "F/um", tech_flavor, g_ip.print_detail_debug) - # continue - - self.C_overlap = 0.2 * self.C_g_ideal - if tech_flavor >= 3: - if(self.I_on_n): - self.R_nch_on = nmos_effective_resistance_multiplier * g_tp.vpp / self.I_on_n - else: - if(self.I_on_n): - self.R_nch_on = nmos_effective_resistance_multiplier * self.Vdd / self.I_on_n - self.R_pch_on = self.n_to_p_eff_curr_drv_ratio * self.R_nch_on - self.I_off_p = self.I_off_n - self.I_g_on_p = self.I_g_on_n - if g_ip.print_detail_debug: - print(f"C_overlap: {self.C_overlap} F/um") - print(f"R_nch_on: {self.R_nch_on} ohm-micron") - print(f"R_pch_on: {self.R_pch_on} ohm-micron") - - def interpolate(self, alpha, dev1, dev2): - result = DeviceType() - self.C_g_ideal = alpha * dev1.C_g_ideal + (1 - alpha) * dev2.C_g_ideal - print(f'GLOBAL result {self.C_g_ideal}') - self.C_fringe = alpha * dev1.C_fringe + (1 - alpha) * dev2.C_fringe - self.C_overlap = alpha * dev1.C_overlap + (1 - alpha) * dev2.C_overlap - self.C_junc = alpha * dev1.C_junc + (1 - alpha) * dev2.C_junc - self.l_phy = alpha * dev1.l_phy + (1 - alpha) * dev2.l_phy - self.l_elec = alpha * dev1.l_elec + (1 - alpha) * dev2.l_elec - self.R_nch_on = alpha * dev1.R_nch_on + (1 - alpha) * dev2.R_nch_on - self.R_pch_on = alpha * dev1.R_pch_on + (1 - alpha) * dev2.R_pch_on - self.Vdd = alpha * dev1.Vdd + (1 - alpha) * dev2.Vdd - self.Vth = alpha * dev1.Vth + (1 - alpha) * dev2.Vth - self.Vcc_min = alpha * dev1.Vcc_min + (1 - alpha) * dev2.Vcc_min - self.I_on_n = alpha * dev1.I_on_n + (1 - alpha) * dev2.I_on_n - self.I_on_p = alpha * dev1.I_on_p + (1 - alpha) * dev2.I_on_p - self.I_off_n = alpha * dev1.I_off_n + (1 - alpha) * dev2.I_off_n - self.I_off_p = alpha * dev1.I_off_p + (1 - alpha) * dev2.I_off_p - self.I_g_on_n = alpha * dev1.I_g_on_n + (1 - alpha) * dev2.I_g_on_n - self.I_g_on_p = alpha * dev1.I_g_on_p + (1 - alpha) * dev2.I_g_on_p - self.C_ox = alpha * dev1.C_ox + (1 - alpha) * dev2.C_ox - self.t_ox = alpha * dev1.t_ox + (1 - alpha) * dev2.t_ox - self.n_to_p_eff_curr_drv_ratio = alpha * dev1.n_to_p_eff_curr_drv_ratio + (1 - alpha) * dev2.n_to_p_eff_curr_drv_ratio - self.long_channel_leakage_reduction = alpha * dev1.long_channel_leakage_reduction + (1 - alpha) * dev2.long_channel_leakage_reduction - self.Mobility_n = alpha * dev1.Mobility_n + (1 - alpha) * dev2.Mobility_n - self.Vdsat = alpha * dev1.Vdsat + (1 - alpha) * dev2.Vdsat - self.gmp_to_gmn_multiplier = alpha * dev1.gmp_to_gmn_multiplier + (1 - alpha) * dev2.gmp_to_gmn_multiplier - self.C_junc_sidewall = dev1.C_junc_sidewall - -class InterconnectType: - def __init__(self): - self.pitch = 0 - self.R_per_um = 0 - self.C_per_um = 0 - self.horiz_dielectric_constant = 0 - self.vert_dielectric_constant = 0 - self.aspect_ratio = 0 - self.miller_value = 0 - self.ild_thickness = 0 - - # auxiliary parameters - self.wire_width = 0 - self.wire_thickness = 0 - self.wire_spacing = 0 - self.barrier_thickness = 0 - self.dishing_thickness = 0 - self.alpha_scatter = 0 - self.fringe_cap = 0 - - self.reset() - - def reset(self): - self.pitch = 0 - self.R_per_um = 0 - self.C_per_um = 0 - self.horiz_dielectric_constant = 0 - self.vert_dielectric_constant = 0 - self.aspect_ratio = 0 - self.miller_value = 0 - self.ild_thickness = 0 - - # auxiliary parameters - self.wire_width = 0 - self.wire_thickness = 0 - self.wire_spacing = 0 - self.barrier_thickness = 0 - self.dishing_thickness = 0 - self.alpha_scatter = 0 - self.fringe_cap = 0 - - def is_equal(self, inter): - if not is_equal(self.pitch, inter.pitch): return False - if not is_equal(self.R_per_um, inter.R_per_um): return False - if not is_equal(self.C_per_um, inter.C_per_um): return False - if not is_equal(self.horiz_dielectric_constant, inter.horiz_dielectric_constant): return False - if not is_equal(self.vert_dielectric_constant, inter.vert_dielectric_constant): return False - if not is_equal(self.aspect_ratio, inter.aspect_ratio): return False - if not is_equal(self.miller_value, inter.miller_value): return False - if not is_equal(self.ild_thickness, inter.ild_thickness): return False - return True - - def display(self, indent=0): - indent_str = ' ' * indent - print(f"{indent_str}pitch = {self.pitch} um") - print(f"{indent_str}R_per_um = {self.R_per_um} ohm/um") - print(f"{indent_str}C_per_um = {self.C_per_um} F/um") - print(f"{indent_str}horiz_dielectric_constant = {self.horiz_dielectric_constant}") - print(f"{indent_str}vert_dielectric_constant = {self.vert_dielectric_constant}") - print(f"{indent_str}aspect_ratio = {self.aspect_ratio}") - print(f"{indent_str}miller_value = {self.miller_value}") - print(f"{indent_str}ild_thickness = {self.ild_thickness} um") - - def assign(self, in_file, projection_type, tech_flavor): - with open(in_file, 'r') as fp: - lines = fp.readlines() - - resistivity = 0 - print_debug = g_ip.print_detail_debug - - self.pitch = sympy_var['wire_pitch'] - self.barrier_thickness = sympy_var['barrier_thickness'] - self.dishing_thickness = sympy_var['dishing_thickness'] - self.alpha_scatter = sympy_var['alpha_scatter'] - self.aspect_ratio = sympy_var['aspect_ratio'] - self.miller_value = sympy_var['miller_value'] - self.horiz_dielectric_constant = sympy_var['horiz_dielectric_constant'] - self.vert_dielectric_constant = sympy_var['vert_dielectric_constant'] - self.ild_thickness = sympy_var['ild_thickness'] - self.fringe_cap = sympy_var['fringe_cap'] - self.R_per_um = sympy_var['wire_r_per_micron'] - self.C_per_um = sympy_var['wire_c_per_micron'] - self.resistivity = sympy_var['resistivity'] - - # for line in lines: - # if line.startswith("-wire_pitch"): - # self.pitch = scan_input_double_inter_type(line, "-wire_pitch", "um", g_ip.ic_proj_type, tech_flavor, print_debug) - # continue - # if line.startswith("-barrier_thickness"): - # self.barrier_thickness = scan_input_double_inter_type(line, "-barrier_thickness", "ohm", g_ip.ic_proj_type, tech_flavor, print_debug) - # continue - # if line.startswith("-dishing_thickness"): - # self.dishing_thickness = scan_input_double_inter_type(line, "-dishing_thickness", "um", g_ip.ic_proj_type, tech_flavor, print_debug) - # continue - # if line.startswith("-alpha_scatter"): - # self.alpha_scatter = scan_input_double_inter_type(line, "-alpha_scatter", "um", g_ip.ic_proj_type, tech_flavor, print_debug) - # continue - # if line.startswith("-aspect_ratio"): - # self.aspect_ratio = scan_input_double_inter_type(line, "-aspect_ratio", "um", g_ip.ic_proj_type, tech_flavor, print_debug) - # continue - # if line.startswith("-miller_value"): - # self.miller_value = scan_input_double_inter_type(line, "-miller_value", "um", g_ip.ic_proj_type, tech_flavor, print_debug) - # continue - # if line.startswith("-horiz_dielectric_constant"): - # self.horiz_dielectric_constant = scan_input_double_inter_type(line, "-horiz_dielectric_constant", "um", g_ip.ic_proj_type, tech_flavor, print_debug) - # continue - # if line.startswith("-vert_dielectric_constant"): - # self.vert_dielectric_constant = scan_input_double_inter_type(line, "-vert_dielectric_constant", "um", g_ip.ic_proj_type, tech_flavor, print_debug) - # continue - # if line.startswith("-ild_thickness"): - # self.ild_thickness = scan_input_double_inter_type(line, "-ild_thickness", "um", g_ip.ic_proj_type, tech_flavor, print_debug) - # continue - # if line.startswith("-fringe_cap"): - # self.fringe_cap = scan_input_double_inter_type(line, "-fringe_cap", "um", g_ip.ic_proj_type, tech_flavor, print_debug) - # continue - # if line.startswith("-wire_r_per_micron"): - # self.R_per_um = scan_input_double_inter_type(line, "-wire_r_per_micron", "um", g_ip.ic_proj_type, tech_flavor, print_debug) - # continue - # if line.startswith("-wire_c_per_micron"): - # self.C_per_um = scan_input_double_inter_type(line, "-wire_c_per_micron", "um", g_ip.ic_proj_type, tech_flavor, print_debug) - # continue - # if line.startswith("-resistivity"): - # resistivity = scan_input_double_inter_type(line, "-resistivity", "um", g_ip.ic_proj_type, tech_flavor, print_debug) - # continue - - self.pitch *= g_ip.F_sz_um - self.wire_width = self.pitch / 2 # micron - self.wire_thickness = self.aspect_ratio * self.wire_width # micron - self.wire_spacing = self.pitch - self.wire_width # micron - - if projection_type != 1 or tech_flavor != 3: - self.R_per_um = wire_resistance(resistivity, self.wire_width, - self.wire_thickness, self.barrier_thickness, self.dishing_thickness, self.alpha_scatter) # ohm/micron - if print_debug: - print(f"{self.R_per_um} = wire_resistance({resistivity}, {self.wire_width}, {self.wire_thickness}, {self.barrier_thickness}, {self.dishing_thickness}, {self.alpha_scatter})") - - self.C_per_um = wire_capacitance(self.wire_width, self.wire_thickness, self.wire_spacing, - self.ild_thickness, self.miller_value, self.horiz_dielectric_constant, - self.vert_dielectric_constant, self.fringe_cap) # F/micron - if print_debug: - print(f"{self.C_per_um} = wire_capacitance({self.wire_width}, {self.wire_thickness}, {self.wire_spacing}, {self.ild_thickness}, {self.miller_value}, {self.horiz_dielectric_constant}, {self.vert_dielectric_constant}, {self.fringe_cap})") - - def interpolate(self, alpha, inter1, inter2): - self.pitch = alpha * inter1.pitch + (1 - alpha) * inter2.pitch - self.R_per_um = alpha * inter1.R_per_um + (1 - alpha) * inter2.R_per_um - self.C_per_um = alpha * inter1.C_per_um + (1 - alpha) * inter2.C_per_um - self.horiz_dielectric_constant = alpha * inter1.horiz_dielectric_constant + (1 - alpha) * inter2.horiz_dielectric_constant - self.vert_dielectric_constant = alpha * inter1.vert_dielectric_constant + (1 - alpha) * inter2.vert_dielectric_constant - self.aspect_ratio = alpha * inter1.aspect_ratio + (1 - alpha) * inter2.aspect_ratio - self.miller_value = alpha * inter1.miller_value + (1 - alpha) * inter2.miller_value - self.ild_thickness = alpha * inter1.ild_thickness + (1 - alpha) * inter2.ild_thickness - -class MemoryType: - def __init__(self): - self.reset() - - def reset(self): - self.b_w = 0 - self.b_h = 0 - self.cell_a_w = 0 - self.cell_pmos_w = 0 - self.cell_nmos_w = 0 - self.Vbitpre = 0 - self.Vbitfloating = 0 - self.area_cell = 0 - self.asp_ratio_cell = 0 - - def assign(self, in_file, tech_flavor, cell_type): - try: - with open(in_file, "r") as fp: - lines = fp.readlines() - except FileNotFoundError: - print(f"{in_file} is missing!") - exit(-1) - - vdd_cell = 0 - vdd = 0 - - print(f'tech_flavor {tech_flavor}') - - vdd = sympy_var['Vdd'] - vdd_cell = sympy_var['vdd_cell'] - self.cell_a_w = sympy_var['Wmemcella'] - self.cell_pmos_w = sympy_var['Wmemcellpmos'] - self.cell_nmos_w = sympy_var['Wmemcellnmos'] - self.area_cell = sympy_var['area_cell'] - self.asp_ratio_cell = sympy_var['asp_ratio_cell'] - - # for line in lines: - # if line.startswith("-Vdd"): - # vdd = scan_five_input_double(line, "-Vdd", "V", tech_flavor, g_ip.print_detail_debug) - # continue - # if line.startswith("-vdd_cell"): - # scan_res = scan_five_input_double_mem_type(line, "-vdd_cell", "V", tech_flavor, cell_type, g_ip.print_detail_debug) - # vdd_cell = scan_res if scan_res != None else vdd_cell - # continue - # if line.startswith("-Wmemcella"): - # scan_res = scan_five_input_double_mem_type(line, "-Wmemcella", "V", tech_flavor, cell_type, g_ip.print_detail_debug) - # self.cell_a_w = scan_res if scan_res != None else self.cell_a_w - # continue - # if line.startswith("-Wmemcellpmos"): - # scan_res = scan_five_input_double_mem_type(line, "-Wmemcellpmos", "V", tech_flavor, cell_type, g_ip.print_detail_debug) - # self.cell_pmos_w = scan_res if scan_res != None else self.cell_pmos_w - # continue - # if line.startswith("-Wmemcellnmos"): - # scan_res = scan_five_input_double_mem_type(line, "-Wmemcellnmos", "V", tech_flavor, cell_type, g_ip.print_detail_debug) - # self.cell_nmos_w = scan_res if scan_res != None else self.cell_nmos_w - # continue - # if line.startswith("-area_cell"): - # scan_res = scan_five_input_double_mem_type(line, "-area_cell", "V", tech_flavor, cell_type, g_ip.print_detail_debug) - # self.area_cell = scan_res if scan_res != None else self.area_cell - # continue - # if line.startswith("-asp_ratio_cell"): - # scan_res = scan_five_input_double_mem_type(line, "-asp_ratio_cell", "V", tech_flavor, cell_type, g_ip.print_detail_debug) - # self.asp_ratio_cell = scan_res if scan_res != None else self.asp_ratio_cell - # continue - - # print(g_ip.F_sz_um) - # print(self.cell_pmos_w) - if cell_type != 2: - print(self.cell_a_w) - self.cell_a_w *= g_ip.F_sz_um - self.cell_pmos_w *= g_ip.F_sz_um - self.cell_nmos_w *= g_ip.F_sz_um - if cell_type != 2: - self.area_cell *= (g_ip.F_sz_um * g_ip.F_sz_um) - - #TODO 1028-1030 - self.b_w = sp.sqrt(self.area_cell / self.asp_ratio_cell) - self.b_h = self.asp_ratio_cell * self.b_w - if cell_type == 2: - self.Vbitpre = vdd_cell - else: - self.Vbitpre = vdd - - self.Vbitfloating = self.Vbitpre * 0.7 - - def interpolate(self, alpha, mem1, mem2): - self.cell_a_w = alpha * mem1.cell_a_w + (1 - alpha) * mem2.cell_a_w - self.cell_pmos_w = alpha * mem1.cell_pmos_w + (1 - alpha) * mem2.cell_pmos_w - self.cell_nmos_w = alpha * mem1.cell_nmos_w + (1 - alpha) * mem2.cell_nmos_w - self.area_cell = alpha * mem1.area_cell + (1 - alpha) * mem2.area_cell - self.asp_ratio_cell = alpha * mem1.asp_ratio_cell + (1 - alpha) * mem2.asp_ratio_cell - self.Vbitpre = mem2.Vbitpre - self.Vbitfloating = self.Vbitpre * 0.7 - - #TODO 1028-1030 - self.b_w = sp.sqrt(self.area_cell / self.asp_ratio_cell) - self.b_h = self.asp_ratio_cell * self.b_w - - def isEqual(self, mem): - if not self.is_equal(self.b_w, mem.b_w): return False - if not self.is_equal(self.b_h, mem.b_h): return False - if not self.is_equal(self.cell_a_w, mem.cell_a_w): return False - if not self.is_equal(self.cell_pmos_w, mem.cell_pmos_w): return False - if not self.is_equal(self.cell_nmos_w, mem.cell_nmos_w): return False - if not self.is_equal(self.Vbitpre, mem.Vbitpre): return False - return True - - def is_equal(self, first, second): - if (first == 0) and (second == 0): - return True - if (second == 0) or (second != second): - return True - if (first != first) or (second != second): # both are NaNs - return True - if first == 0: - if abs(first - second) < (second * 0.000001): - return True - else: - if abs(first - second) < (first * 0.000001): - return True - return False - - def scan_five_input_double(self, line, name, unit_name, flavor, print_flag): - temp = [0] * 5 - unit = '' - - pattern = re.compile(rf"{name}\s+(\S+)\s+(\d+\.\d+)\s+(\d+\.\d+)\s+(\d+\.\d+)\s+(\d+\.\d+)\s+(\d+\.\d+)") - match = pattern.search(line) - print(f'{line}, {match}') - - if match: - unit = match.group(1) - temp[0] = float(match.group(2)) - temp[1] = float(match.group(3)) - temp[2] = float(match.group(4)) - temp[3] = float(match.group(5)) - temp[4] = float(match.group(6)) - - if print_flag: - print(f"{name}[{flavor}]: {temp[flavor]} {unit}") - return temp[flavor] - return None - -class ScalingFactor: - def __init__(self): - self.reset() - - def reset(self): - self.logic_scaling_co_eff = 0 - self.core_tx_density = 0 - self.long_channel_leakage_reduction = 0 - - def assign(self, in_file): - try: - with open(in_file, "r") as fp: - lines = fp.readlines() - except FileNotFoundError: - print(f"{in_file} is missing!") - exit(-1) - - self.logic_scaling_co_eff = sympy_var['logic_scaling_co_eff'] - self.core_tx_density = sympy_var['core_tx_density'] - - # for line in lines: - # if line.startswith("-logic_scaling_co_eff"): - # self.logic_scaling_co_eff = scan_single_input_double(line, "-logic_scaling_co_eff", "F/um", g_ip.print_detail_debug) - # continue - # if line.startswith("-core_tx_density"): - # self.core_tx_density = scan_single_input_double(line, "-core_tx_density", "F/um", g_ip.print_detail_debug) - # continue - - def interpolate(self, alpha, dev1, dev2): - self.logic_scaling_co_eff = alpha * dev1.logic_scaling_co_eff + (1 - alpha) * dev2.logic_scaling_co_eff - self.core_tx_density = alpha * dev1.core_tx_density + (1 - alpha) * dev2.core_tx_density - - def isEqual(self, scal): - if not is_equal(self.logic_scaling_co_eff, scal.logic_scaling_co_eff): - self.display(0) - assert False - if not is_equal(self.core_tx_density, scal.core_tx_density): - self.display(0) - assert False - if not is_equal(self.long_channel_leakage_reduction, scal.long_channel_leakage_reduction): - self.display(0) - assert False - return True - - def display(self, indent=0): - indent_str = ' ' * indent - print(f"{indent_str}logic_scaling_co_eff = {self.logic_scaling_co_eff}") - print(f"{indent_str}core_tx_density = {self.core_tx_density}") - print(f"{indent_str}long_channel_leakage_reduction = {self.long_channel_leakage_reduction}") - -class Area: - h: float = 0.0 - w: float = 0.0 - -class DynamicParameter: - def __init__(self, is_tag_=False, pure_ram_=0, pure_cam_=0, Nspd_=1.0, Ndwl_=1, Ndbl_=1, Ndcm_=1, Ndsam_lev_1_=1, Ndsam_lev_2_=1, wt=None, is_main_mem_=False): - self.is_tag = is_tag_ - self.pure_ram = pure_ram_ - self.pure_cam = pure_cam_ - self.fully_assoc = False - self.tagbits = 0 - self.num_subarrays = 0 - self.num_mats = 0 - self.Nspd = Nspd_ - self.Ndwl = Ndwl_ - self.Ndbl = Ndbl_ - self.Ndcm = Ndcm_ - self.deg_bl_muxing = 0 - self.deg_senseamp_muxing_non_associativity = 0 - self.Ndsam_lev_1 = Ndsam_lev_1_ - self.Ndsam_lev_2 = Ndsam_lev_2_ - self.wtype = wt - self.number_addr_bits_mat = 0 - self.number_subbanks_decode = 0 - self.num_di_b_bank_per_port = 0 - self.num_do_b_bank_per_port = 0 - self.num_di_b_mat = 0 - self.num_do_b_mat = 0 - self.num_di_b_subbank = 0 - self.num_do_b_subbank = 0 - self.num_si_b_mat = 0 - self.num_so_b_mat = 0 - self.num_si_b_subbank = 0 - self.num_so_b_subbank = 0 - self.num_si_b_bank_per_port = 0 - self.num_so_b_bank_per_port = 0 - self.number_way_select_signals_mat = 0 - self.num_act_mats_hor_dir = 0 - self.num_act_mats_hor_dir_sl = 0 - self.is_dram = False - self.V_b_sense = 0.0 - self.num_r_subarray = 0 - self.num_c_subarray = 0 - self.tag_num_r_subarray = 0 - self.tag_num_c_subarray = 0 - self.data_num_r_subarray = 0 - self.data_num_c_subarray = 0 - self.num_mats_h_dir = 0 - self.num_mats_v_dir = 0 - self.ram_cell_tech_type = 0 - self.dram_refresh_period = 0.0 - self.use_inp_params = 0 - self.num_rw_ports = 0 - self.num_rd_ports = 0 - self.num_wr_ports = 0 - self.num_se_rd_ports = 0 - self.num_search_ports = 0 - self.out_w = 0 - self.is_main_mem = is_main_mem_ - self.cell = Area() - self.cam_cell = Area() - self.is_valid = False - self.init_parameters() - - def init_parameters(self): - if self.is_tag: - self.ram_cell_tech_type = g_ip.tag_arr_ram_cell_tech_type - else: - self.ram_cell_tech_type = g_ip.data_arr_ram_cell_tech_type - - self.is_dram = (self.ram_cell_tech_type == lp_dram or self.ram_cell_tech_type == comm_dram) - self.fully_assoc = bool(g_ip.fully_assoc) - capacity_per_die = g_ip.cache_sz / NUMBER_STACKED_DIE_LAYERS - wire_local = g_tp.wire_local - - if self.pure_cam: - self.init_CAM() - return - - if self.fully_assoc: - self.init_FA() - return - - if not self.calc_subarr_rc(capacity_per_die): - return - - if self.is_tag: - self.cell.h = g_tp.sram.b_h + 2 * wire_local.pitch * (g_ip.num_rw_ports - 1 + g_ip.num_rd_ports + g_ip.num_wr_ports) - self.cell.w = g_tp.sram.b_w + 2 * wire_local.pitch * (g_ip.num_rw_ports - 1 + g_ip.num_wr_ports + (g_ip.num_rd_ports - g_ip.num_se_rd_ports)) + wire_local.pitch * g_ip.num_se_rd_ports - else: - if self.is_dram: - self.cell.h = g_tp.dram.b_h - self.cell.w = g_tp.dram.b_w - else: - self.cell.h = g_tp.sram.b_h + 2 * wire_local.pitch * (g_ip.num_wr_ports + g_ip.num_rw_ports - 1 + g_ip.num_rd_ports) - self.cell.w = g_tp.sram.b_w + 2 * wire_local.pitch * (g_ip.num_rw_ports - 1 + (g_ip.num_rd_ports - g_ip.num_se_rd_ports) + g_ip.num_wr_ports) + g_tp.wire_local.pitch * g_ip.num_se_rd_ports - - c_b_metal = self.cell.h * wire_local.C_per_um - - if self.is_dram: - self.deg_bl_muxing = 1 - if self.ram_cell_tech_type == comm_dram: - Cbitrow_drain_cap = drain_C_(g_tp.dram.cell_a_w, NCH, 1, 0, self.cell.w, True, True) / 2.0 - C_bl = self.num_r_subarray * (Cbitrow_drain_cap + c_b_metal) - self.V_b_sense = (g_tp.dram_cell_Vdd / 2) * g_tp.dram_cell_C / (g_tp.dram_cell_C + C_bl) - if self.V_b_sense < VBITSENSEMIN and not (g_ip.is_3d_mem and g_ip.force_cache_config): - return - self.dram_refresh_period = 64e-3 - else: - Cbitrow_drain_cap = drain_C_(g_tp.dram.cell_a_w, NCH, 1, 0, self.cell.w, True, True) / 2.0 - C_bl = self.num_r_subarray * (Cbitrow_drain_cap + c_b_metal) - self.V_b_sense = (g_tp.dram_cell_Vdd / 2) * g_tp.dram_cell_C / (g_tp.dram_cell_C + C_bl) - if self.V_b_sense < VBITSENSEMIN: - return - self.V_b_sense = VBITSENSEMIN - self.dram_refresh_period = 0.9 * g_tp.dram_cell_C * VDD_STORAGE_LOSS_FRACTION_WORST * g_tp.dram_cell_Vdd / g_tp.dram_cell_I_off_worst_case_len_temp - else: - self.V_b_sense = max(0.05 * g_tp.sram_cell.Vdd, VBITSENSEMIN) - self.deg_bl_muxing = self.Ndcm - Cbitrow_drain_cap = drain_C_(g_tp.sram.cell_a_w, NCH, 1, 0, self.cell.w, False, True) / 2.0 - C_bl = self.num_r_subarray * (Cbitrow_drain_cap + c_b_metal) - self.dram_refresh_period = 0 - - self.num_mats_h_dir = max(self.Ndwl // 2, 1) - self.num_mats_v_dir = max(self.Ndbl // 2, 1) - self.num_mats = self.num_mats_h_dir * self.num_mats_v_dir - self.num_do_b_mat = max((self.num_subarrays / self.num_mats) * self.num_c_subarray / (self.deg_bl_muxing * self.Ndsam_lev_1 * self.Ndsam_lev_2), 1) - - if not (self.fully_assoc or self.pure_cam) and self.num_do_b_mat < (self.num_subarrays / self.num_mats): - return - - if not self.is_tag: - if self.is_main_mem: - self.num_do_b_subbank = g_ip.int_prefetch_w * g_ip.out_w - if g_ip.is_3d_mem: - self.num_do_b_subbank = g_ip.page_sz_bits - deg_sa_mux_l1_non_assoc = self.Ndsam_lev_1 - else: - if g_ip.fast_access: - self.num_do_b_subbank = g_ip.out_w * g_ip.data_assoc - deg_sa_mux_l1_non_assoc = self.Ndsam_lev_1 - else: - self.num_do_b_subbank = g_ip.out_w - deg_sa_mux_l1_non_assoc = self.Ndsam_lev_1 / g_ip.data_assoc - if deg_sa_mux_l1_non_assoc < 1: - return - else: - self.num_do_b_subbank = self.tagbits * g_ip.tag_assoc - if self.num_do_b_mat < self.tagbits: - return - deg_sa_mux_l1_non_assoc = self.Ndsam_lev_1 - - self.deg_senseamp_muxing_non_associativity = deg_sa_mux_l1_non_assoc - self.num_act_mats_hor_dir = self.num_do_b_subbank // self.num_do_b_mat - if g_ip.is_3d_mem and self.num_act_mats_hor_dir == 0: - self.num_act_mats_hor_dir = 1 - if self.num_act_mats_hor_dir == 0: - return - - if self.is_tag: - if not (self.fully_assoc or self.pure_cam): - self.num_do_b_mat = g_ip.tag_assoc // self.num_act_mats_hor_dir - self.num_do_b_subbank = self.num_act_mats_hor_dir * self.num_do_b_mat - - if (not g_ip.is_cache and self.is_main_mem) or (PAGE_MODE == 1 and self.is_dram): - if self.num_act_mats_hor_dir * self.num_do_b_mat * self.Ndsam_lev_1 * self.Ndsam_lev_2 != int(g_ip.page_sz_bits): - return - - if (not self.is_tag) and (g_ip.is_main_mem) and (self.num_act_mats_hor_dir * self.num_do_b_mat * self.Ndsam_lev_1 * self.Ndsam_lev_2 < int(g_ip.out_w * g_ip.burst_len * g_ip.data_assoc)): - return - - if self.num_act_mats_hor_dir > self.num_mats_h_dir: - return - - if not self.is_tag: - if g_ip.fast_access: - self.num_di_b_mat = self.num_do_b_mat // g_ip.data_assoc - else: - self.num_di_b_mat = self.num_do_b_mat - else: - self.num_di_b_mat = self.tagbits - - self.num_di_b_subbank = self.num_di_b_mat * self.num_act_mats_hor_dir - self.num_si_b_subbank = self.num_si_b_mat - - num_addr_b_row_dec = _log2(self.num_r_subarray) - if self.fully_assoc or self.pure_cam: - num_addr_b_row_dec += _log2(self.num_subarrays // self.num_mats) - number_subbanks = self.num_mats // self.num_act_mats_hor_dir - self.number_subbanks_decode = _log2(number_subbanks) - - self.num_rw_ports = g_ip.num_rw_ports - self.num_rd_ports = g_ip.num_rd_ports - self.num_wr_ports = g_ip.num_wr_ports - self.num_se_rd_ports = g_ip.num_se_rd_ports - self.num_search_ports = g_ip.num_search_ports - - if self.is_dram and self.is_main_mem: - self.number_addr_bits_mat = max(num_addr_b_row_dec, _log2(self.deg_bl_muxing) + _log2(self.deg_sa_mux_l1_non_assoc) + _log2(self.Ndsam_lev_2)) - if g_ip.print_detail_debug: - print(f"parameter.cc: number_addr_bits_mat = {num_addr_b_row_dec}") - print(f"parameter.cc: num_addr_b_row_dec = {num_addr_b_row_dec}") - print(f"parameter.cc: num_addr_b_mux_sel = {_log2(self.deg_bl_muxing) + _log2(self.deg_sa_mux_l1_non_assoc) + _log2(self.Ndsam_lev_2)}") - else: - self.number_addr_bits_mat = num_addr_b_row_dec + _log2(self.deg_bl_muxing) + _log2(self.deg_sa_mux_l1_non_assoc) + _log2(self.Ndsam_lev_2) - - if self.is_tag: - self.num_di_b_bank_per_port = self.tagbits - self.num_do_b_bank_per_port = g_ip.data_assoc - else: - self.num_di_b_bank_per_port = g_ip.out_w + g_ip.data_assoc - self.num_do_b_bank_per_port = g_ip.out_w - - if not self.is_tag and g_ip.data_assoc > 1 and not g_ip.fast_access: - self.number_way_select_signals_mat = g_ip.data_assoc - - if g_ip.add_ecc_b_: - self.ECC_adjustment() - - self.is_valid = True - - def init_CAM(self): - wire_local = g_tp.wire_local - capacity_per_die = g_ip.cache_sz / NUMBER_STACKED_DIE_LAYERS - - if self.Ndwl != 1 or self.Ndcm != 1 or self.Nspd < 1 or self.Nspd > 1 or self.Ndsam_lev_1 != 1 or self.Ndsam_lev_2 != 1 or self.Ndbl < 2: - return - - if g_ip.specific_tag: - self.tagbits = int(sp.ceiling(g_ip.tag_w / 8.0) * 8) - else: - self.tagbits = int(sp.ceiling((ADDRESS_BITS + EXTRA_TAG_BITS) / 8.0) * 8) - - self.tag_num_r_subarray = int(sp.ceiling(capacity_per_die / (g_ip.nbanks * self.tagbits / 8.0 * self.Ndbl))) - self.tag_num_c_subarray = self.tagbits - - if self.tag_num_r_subarray == 0: - return - if self.tag_num_r_subarray > MAXSUBARRAYROWS: - return - if self.tag_num_c_subarray < MINSUBARRAYCOLS: - return - if self.tag_num_c_subarray > MAXSUBARRAYCOLS: - return - self.num_r_subarray = self.tag_num_r_subarray - - self.num_subarrays = self.Ndwl * self.Ndbl - - self.cam_cell.h = g_tp.cam.b_h + 2 * wire_local.pitch * (g_ip.num_rw_ports - 1 + g_ip.num_rd_ports + g_ip.num_wr_ports) + 2 * wire_local.pitch * (g_ip.num_search_ports - 1) + wire_local.pitch * g_ip.num_se_rd_ports - self.cam_cell.w = g_tp.cam.b_w + 2 * wire_local.pitch * (g_ip.num_rw_ports - 1 + g_ip.num_rd_ports + g_ip.num_wr_ports) + 2 * wire_local.pitch * (g_ip.num_search_ports - 1) + wire_local.pitch * g_ip.num_se_rd_ports - - self.cell.h = g_tp.sram.b_h + 2 * wire_local.pitch * (g_ip.num_wr_ports + g_ip.num_rw_ports - 1 + g_ip.num_rd_ports) + 2 * wire_local.pitch * (g_ip.num_search_ports - 1) - self.cell.w = g_tp.sram.b_w + 2 * wire_local.pitch * (g_ip.num_rw_ports - 1 + (g_ip.num_rd_ports - g_ip.num_se_rd_ports) + g_ip.num_wr_ports) + g_tp.wire_local.pitch * g_ip.num_se_rd_ports + 2 * wire_local.pitch * (g_ip.num_search_ports - 1) - - c_b_metal = self.cell.h * wire_local.C_per_um - c_b_metal = self.cam_cell.h * wire_local.C_per_um - self.V_b_sense = max(0.05 * g_tp.sram_cell.Vdd, VBITSENSEMIN) - self.deg_bl_muxing = 1 - - Cbitrow_drain_cap = drain_C_(g_tp.cam.cell_a_w, NCH, 1, 0, self.cam_cell.w, False, True) / 2.0 - self.dram_refresh_period = 0 - - if self.Ndbl == 0: - print(" Invalid Ndbl \n") - exit(0) - elif self.Ndbl == 1: - self.num_mats_h_dir = 1 - self.num_mats_v_dir = 1 - elif self.Ndbl == 2: - self.num_mats_h_dir = 1 - self.num_mats_v_dir = 1 - else: - self.num_mats_h_dir = int(sp.floor(sp.sqrt(self.Ndbl / 4.0))) - self.num_mats_v_dir = int(self.Ndbl / 4.0 / self.num_mats_h_dir) - - self.num_mats = self.num_mats_h_dir * self.num_mats_v_dir - - self.num_so_b_mat = int(sp.ceiling(_log2(self.num_r_subarray)) + sp.ceiling(_log2(self.num_subarrays))) - self.num_do_b_mat = self.tagbits - - deg_sa_mux_l1_non_assoc = 1 - - self.num_so_b_subbank = int(sp.ceiling(_log2(self.num_r_subarray)) + sp.ceiling(_log2(self.num_subarrays))) - self.num_do_b_subbank = self.tag_num_c_subarray - - self.deg_senseamp_muxing_non_associativity = deg_sa_mux_l1_non_assoc - - self.num_act_mats_hor_dir = 1 - self.num_act_mats_hor_dir_sl = self.num_mats_h_dir - - if self.num_act_mats_hor_dir > self.num_mats_h_dir: - return - - self.num_di_b_mat = self.tagbits - self.num_si_b_mat = self.tagbits - - self.num_di_b_subbank = self.num_di_b_mat * self.num_act_mats_hor_dir - self.num_si_b_subbank = self.num_si_b_mat - - num_addr_b_row_dec = _log2(self.num_r_subarray) - num_addr_b_row_dec += _log2(self.num_subarrays / self.num_mats) - number_subbanks = self.num_mats / self.num_act_mats_hor_dir - self.number_subbanks_decode = _log2(number_subbanks) - - self.num_rw_ports = g_ip.num_rw_ports - self.num_rd_ports = g_ip.num_rd_ports - self.num_wr_ports = g_ip.num_wr_ports - self.num_se_rd_ports = g_ip.num_se_rd_ports - self.num_search_ports = g_ip.num_search_ports - - self.number_addr_bits_mat = num_addr_b_row_dec + _log2(self.deg_bl_muxing) + _log2(deg_sa_mux_l1_non_assoc) + _log2(self.Ndsam_lev_2) - - self.num_di_b_bank_per_port = self.tagbits - self.num_si_b_bank_per_port = self.tagbits - self.num_do_b_bank_per_port = self.tagbits - self.num_so_b_bank_per_port = int(sp.ceiling(_log2(self.num_r_subarray)) + sp.ceiling(_log2(self.num_subarrays))) - - if not self.is_tag and g_ip.data_assoc > 1 and not g_ip.fast_access: - self.number_way_select_signals_mat = g_ip.data_assoc - - if g_ip.add_ecc_b_: - self.ECC_adjustment() - - self.is_valid = True - - def init_FA(self): - wire_local = g_tp.wire_local - assert NUMBER_STACKED_DIE_LAYERS == 1 - capacity_per_die = g_ip.cache_sz - - if self.Ndwl != 1 or self.Ndcm != 1 or self.Nspd < 1 or self.Nspd > 1 or self.Ndsam_lev_1 != 1 or self.Ndsam_lev_2 != 1 or self.Ndbl < 2: - return - - if g_ip.specific_tag: - self.tagbits = g_ip.tag_w - else: - self.tagbits = ADDRESS_BITS + EXTRA_TAG_BITS - _log2(g_ip.block_sz) - self.tagbits = (((self.tagbits + 3) >> 2) << 2) - - self.tag_num_r_subarray = int(capacity_per_die / (g_ip.nbanks * g_ip.block_sz * self.Ndbl)) - self.tag_num_c_subarray = int(sp.ceiling((self.tagbits * self.Nspd / self.Ndwl))) - if self.tag_num_r_subarray == 0: - return - if self.tag_num_r_subarray > MAXSUBARRAYROWS: - return - if self.tag_num_c_subarray < MINSUBARRAYCOLS: - return - if self.tag_num_c_subarray > MAXSUBARRAYCOLS: - return - - self.data_num_r_subarray = self.tag_num_r_subarray - self.data_num_c_subarray = 8 * g_ip.block_sz - if self.data_num_r_subarray == 0: - return - if self.data_num_r_subarray > MAXSUBARRAYROWS: - return - if self.data_num_c_subarray < MINSUBARRAYCOLS: - return - if self.data_num_c_subarray > MAXSUBARRAYCOLS: - return - self.num_r_subarray = self.tag_num_r_subarray - - self.num_subarrays = self.Ndwl * self.Ndbl - - self.cam_cell.h = g_tp.cam.b_h + 2 * wire_local.pitch * (g_ip.num_rw_ports - 1 + g_ip.num_rd_ports + g_ip.num_wr_ports) + 2 * wire_local.pitch * (g_ip.num_search_ports - 1) + wire_local.pitch * g_ip.num_se_rd_ports - self.cam_cell.w = g_tp.cam.b_w + 2 * wire_local.pitch * (g_ip.num_rw_ports - 1 + g_ip.num_rd_ports + g_ip.num_wr_ports) + 2 * wire_local.pitch * (g_ip.num_search_ports - 1) + wire_local.pitch * g_ip.num_se_rd_ports - - self.cell.h = g_tp.sram.b_h + 2 * wire_local.pitch * (g_ip.num_wr_ports + g_ip.num_rw_ports - 1 + g_ip.num_rd_ports) + 2 * wire_local.pitch * (g_ip.num_search_ports - 1) - self.cell.w = g_tp.sram.b_w + 2 * wire_local.pitch * (g_ip.num_rw_ports - 1 + (g_ip.num_rd_ports - g_ip.num_se_rd_ports) + g_ip.num_wr_ports) + g_tp.wire_local.pitch * g_ip.num_se_rd_ports + 2 * wire_local.pitch * (g_ip.num_search_ports - 1) - - c_b_metal = self.cell.h * wire_local.C_per_um - c_b_metal = self.cam_cell.h * wire_local.C_per_um - self.V_b_sense = max(0.05 * g_tp.sram_cell.Vdd, VBITSENSEMIN) - self.deg_bl_muxing = 1 - - Cbitrow_drain_cap = drain_C_(g_tp.cam.cell_a_w, NCH, 1, 0, self.cam_cell.w, False, True) / 2.0 - self.dram_refresh_period = 0 - - if self.Ndbl == 0: - print(" Invalid Ndbl \n") - exit(0) - elif self.Ndbl == 1: - self.num_mats_h_dir = 1 - self.num_mats_v_dir = 1 - elif self.Ndbl == 2: - self.num_mats_h_dir = 1 - self.num_mats_v_dir = 1 - else: - self.num_mats_h_dir = int(sp.floor(sp.sqrt(self.Ndbl / 4.0))) - self.num_mats_v_dir = int(self.Ndbl / 4.0 / self.num_mats_h_dir) - - self.num_mats = self.num_mats_h_dir * self.num_mats_v_dir - - self.num_so_b_mat = self.data_num_c_subarray - self.num_do_b_mat = self.data_num_c_subarray + self.tagbits - - deg_sa_mux_l1_non_assoc = 1 - self.num_so_b_subbank = 8 * g_ip.block_sz - self.num_do_b_subbank = self.num_so_b_subbank + self.tag_num_c_subarray - - self.deg_senseamp_muxing_non_associativity = deg_sa_mux_l1_non_assoc - self.num_act_mats_hor_dir = 1 - self.num_act_mats_hor_dir_sl = self.num_mats_h_dir - - if self.num_act_mats_hor_dir > self.num_mats_h_dir: - return - - if self.fully_assoc: - self.num_di_b_mat = self.num_do_b_mat - self.num_si_b_mat = self.tagbits - self.num_di_b_subbank = self.num_di_b_mat * self.num_act_mats_hor_dir - self.num_si_b_subbank = self.num_si_b_mat - - num_addr_b_row_dec = _log2(self.num_r_subarray) - num_addr_b_row_dec += _log2(self.num_subarrays / self.num_mats) - number_subbanks = self.num_mats / self.num_act_mats_hor_dir - self.number_subbanks_decode = _log2(number_subbanks) - - self.num_rw_ports = g_ip.num_rw_ports - self.num_rd_ports = g_ip.num_rd_ports - self.num_wr_ports = g_ip.num_wr_ports - self.num_se_rd_ports = g_ip.num_se_rd_ports - self.num_search_ports = g_ip.num_search_ports - - self.number_addr_bits_mat = num_addr_b_row_dec + _log2(self.deg_bl_muxing) + _log2(deg_sa_mux_l1_non_assoc) + _log2(self.Ndsam_lev_2) - - self.num_di_b_bank_per_port = g_ip.out_w + self.tagbits - self.num_si_b_bank_per_port = self.tagbits - self.num_do_b_bank_per_port = g_ip.out_w + self.tagbits - self.num_so_b_bank_per_port = g_ip.out_w - - if not self.is_tag and g_ip.data_assoc > 1 and not g_ip.fast_access: - self.number_way_select_signals_mat = g_ip.data_assoc - - if g_ip.add_ecc_b_: - self.ECC_adjustment() - - self.is_valid = True - - def ECC_adjustment(self): - self.num_do_b_mat += int(sp.ceiling(self.num_do_b_mat / self.num_bits_per_ecc_b_)) - self.num_di_b_mat += int(sp.ceiling(self.num_di_b_mat / self.num_bits_per_ecc_b_)) - self.num_di_b_subbank += int(sp.ceiling(self.num_di_b_subbank / self.num_bits_per_ecc_b_)) - self.num_do_b_subbank += int(sp.ceiling(self.num_do_b_subbank / self.num_bits_per_ecc_b_)) - self.num_di_b_bank_per_port += int(sp.ceiling(self.num_di_b_bank_per_port / self.num_bits_per_ecc_b_)) - self.num_do_b_bank_per_port += int(sp.ceiling(self.num_do_b_bank_per_port / self.num_bits_per_ecc_b_)) - - self.num_so_b_mat += int(sp.ceiling(self.num_so_b_mat / self.num_bits_per_ecc_b_)) - self.num_si_b_mat += int(sp.ceiling(self.num_si_b_mat / self.num_bits_per_ecc_b_)) - self.num_si_b_subbank += int(sp.ceiling(self.num_si_b_subbank / self.num_bits_per_ecc_b_)) - self.num_so_b_subbank += int(sp.ceiling(self.num_so_b_subbank / self.num_bits_per_ecc_b_)) - self.num_si_b_bank_per_port += int(sp.ceiling(self.num_si_b_bank_per_port / self.num_bits_per_ecc_b_)) - self.num_so_b_bank_per_port += int(sp.ceiling(self.num_so_b_bank_per_port / self.num_bits_per_ecc_b_)) - - def calc_subarr_rc(self, capacity_per_die): - if self.Ndwl < 2 or self.Ndbl < 2: - return False - - if self.is_dram and not self.is_tag and self.Ndcm > 1: - return False - - if self.is_tag: - if g_ip.specific_tag: - self.tagbits = g_ip.tag_w - else: - self.tagbits = ADDRESS_BITS + EXTRA_TAG_BITS - _log2(capacity_per_die) + _log2(g_ip.tag_assoc * 2 - 1) - - self.num_r_subarray = int(sp.ceiling(capacity_per_die / (g_ip.nbanks * g_ip.block_sz * g_ip.tag_assoc * self.Ndbl * self.Nspd))) - self.num_c_subarray = int(sp.ceiling((self.tagbits * g_ip.tag_assoc * self.Nspd / self.Ndwl))) - else: - self.num_r_subarray = int(sp.ceiling(capacity_per_die / (g_ip.nbanks * g_ip.block_sz * g_ip.data_assoc * self.Ndbl * self.Nspd))) - self.num_c_subarray = int(sp.ceiling((8 * g_ip.block_sz * g_ip.data_assoc * self.Nspd / self.Ndwl))) - if g_ip.is_3d_mem: - capacity_per_die_double = float(g_ip.cache_sz) / g_ip.num_die_3d - self.num_c_subarray = g_ip.page_sz_bits / self.Ndwl - self.num_r_subarray = 1 << int(sp.floor(_log2(float(g_ip.cache_sz) / g_ip.num_die_3d / self.num_c_subarray / g_ip.nbanks / self.Ndbl / self.Ndwl * 1024 * 1024 * 1024) + 0.5)) - if g_ip.print_detail_debug: - print(f"parameter.cc: capacity_per_die_double = {capacity_per_die_double} Gbit") - print(f"parameter.cc: g_ip.nbanks * Ndbl * Ndwl = {g_ip.nbanks * self.Ndbl * self.Ndwl}") - print(f"parameter.cc: num_r_subarray = {self.num_r_subarray}") - print(f"parameter.cc: num_c_subarray = {self.num_c_subarray}") - - if self.num_r_subarray < MINSUBARRAYROWS or self.num_r_subarray == 0 or self.num_r_subarray > MAXSUBARRAYROWS: - return False - if self.num_c_subarray < MINSUBARRAYCOLS or self.num_c_subarray > MAXSUBARRAYCOLS: - return False - - self.num_subarrays = self.Ndwl * self.Ndbl - return True - - - - - - - - - - -# HELPERS -# CHECK THESE -def PRINT(A, X, tech): - print(f"{A}: {X} , {tech.X}") - -def scan_single_input_double(line, name, unit_name, print_output): - match = re.search(f"{name}\s+([^\s]+)\s+([^\s]+)", line) - if match: - unit = match.group(1) - temp = float(match.group(2)) - if print_output: - print(f"{name}: {temp} {unit}") - return temp - return 0.0 - -def scan_five_input_double(line, name, unit_name, flavor, print_output): - match = re.search(f"{name}\s+([^\s]+)\s+([^\s]+)\s+([^\s]+)\s+([^\s]+)\s+([^\s]+)\s+([^\s]+)", line) - if match: - unit = match.group(1) - temp = [float(match.group(i)) for i in range(2, 7)] - if print_output: - print(f"{name}[{flavor}]: {temp[flavor]} {unit}") - return temp[flavor] - return 0.0 - -def scan_five_input_double_temperature(line, name, unit_name, flavor, temperature, print_output, result): - match = re.search(f"{name}\s+([^\s]+)\s+([^\s]+)\s+([^\s]+)\s+([^\s]+)\s+([^\s]+)\s+([^\s]+)\s+([^\s]+)", line) - if match: - unit = match.group(1) - thermal_temp = int(match.group(2)) - temp = [float(match.group(i)) for i in range(3, 8)] - if thermal_temp == (temperature - 300): - if print_output: - print(f"{name}: {temp[flavor]} {unit}") - result = temp[flavor] - -def scan_input_double_inter_type(line, name, unit_name, proj_type, tech_flavor, print_output): - assert proj_type < NUMBER_INTERCONNECT_PROJECTION_TYPES - index = proj_type * NUMBER_WIRE_TYPES + tech_flavor - match = re.search(f"{name}\s+([^\s]+)\s+([^\s]+)\s+([^\s]+)\s+([^\s]+)\s+([^\s]+)\s+([^\s]+)\s+([^\s]+)\s+([^\s]+)\s+([^\s]+)", line) - if match: - unit = match.group(1) - temp = [float(match.group(i)) for i in range(2, 10)] - if print_output: - print(f"{name} {temp[index]} {unit}") - return temp[index] - return 0.0 - -def scan_five_input_double_mem_type(line, name, unit_name, flavor, cell_type, print_flag): - temp = [0.0] * 5 - unit = "" - - # Extract the relevant part of the line - relevant_line = line[len(name):].strip() - - # print(line) - # print(relevant_line) - - # Scan the input line and extract values - parts = relevant_line.split() - # print(parts) - # print(cell_type) - - unit = parts[0] - cell_type_temp = int(parts[1]) - temp[0] = float(parts[2]) - temp[1] = float(parts[3]) - temp[2] = float(parts[4]) - temp[3] = float(parts[5]) - temp[4] = float(parts[6]) - - result = None - if cell_type_temp == cell_type: - if print_flag: - print(f"{name}: {temp[flavor]} {unit}") - result = temp[flavor] - - return result - -def scan_input_double_tsv_type(line, name, unit_name, proj_type, tsv_type, print_flag): - assert proj_type < NUMBER_INTERCONNECT_PROJECTION_TYPES - index = proj_type * NUMBER_TSV_TYPES + tsv_type - temp = [0.0] * 6 - unit = "" - - # Extracting the values using regular expressions - match = re.search(rf"{name}\s+(\S+)\s+(\d+\.?\d*)\s+(\d+\.?\d*)\s+(\d+\.?\d*)\s+(\d+\.?\d*)\s+(\d+\.?\d*)\s+(\d+\.?\d*)", line) - - if match: - unit = match.group(1) - temp[0] = float(match.group(2)) - temp[1] = float(match.group(3)) - temp[2] = float(match.group(4)) - temp[3] = float(match.group(5)) - temp[4] = float(match.group(6)) - temp[5] = float(match.group(7)) - - if print_flag: - print(f"{name}: {temp[index]} {unit}") - - return temp[index] - else: - raise ValueError("Line does not match the expected format") - - - - - -#### TO AVOID CIRCULAR DEPENDNCY -UNI_LEAK_STACK_FACTOR = 0.43 - -def powers(base, n): - p = 1 - for i in range(1, n + 1): - p *= base - return p - -def is_pow2(val): - if val <= 0: - return False - elif val == 1: - return True - else: - return (_log2(val) != _log2(val - 1)) - -def _log2(num): - if num == 0: - raise ValueError("log0?") - log2 = 0 - while num > 1: - num >>= 1 - log2 += 1 - return log2 - -def factorial(n, m=1): - fa = m - for i in range(m + 1, n + 1): - fa *= i - return fa - -def combination(n, m): - return factorial(n, m + 1) // factorial(n - m) - - -outside_mat = "outside_mat" -inside_mat = "inside_mat" -local_wires = "local_wires" - - -Add_htree = "Add_htree" -Data_in_htree = "Data_in_htree" -Data_out_htree = "Data_out_htree" -Search_in_htree = "Search_in_htree" -Search_out_htree = "Search_out_htree" - - -Row_add_path = "Row_add_path" -Col_add_path = "Col_add_path" -Data_path = "Data_path" - - -nmos = "nmos" -pmos = "pmos" -inv = "inv" -nand = "nand" -nor = "nor" -tri = "tri" -tg = "tg" - -parallel = "parallel" -series = "series" - -# class WirePlacement: -# outside_mat = "outside_mat" -# inside_mat = "inside_mat" -# local_wires = "local_wires" - -# class HtreeType: -# Add_htree = "Add_htree" -# Data_in_htree = "Data_in_htree" -# Data_out_htree = "Data_out_htree" -# Search_in_htree = "Search_in_htree" -# Search_out_htree = "Search_out_htree" - -# class MemorybusType: -# Row_add_path = "Row_add_path" -# Col_add_path = "Col_add_path" -# Data_path = "Data_path" - -# class GateType: -# nmos = "nmos" -# pmos = "pmos" -# inv = "inv" -# nand = "nand" -# nor = "nor" -# tri = "tri" -# tg = "tg" - -# class HalfNetTopology: -# parallel = "parallel" -# series = "series" - -# def logtwo(x): -# assert x > 0 -# return sp.log(x) / sp.log(2.0) - -def gate_C(width, wirelength, _is_dram=False, _is_sram=False, _is_wl_tr=False, _is_sleep_tx=False): - if _is_dram and _is_sram: - dt = g_tp.dram_acc # DRAM cell access transistor - elif _is_dram and _is_wl_tr: - dt = g_tp.dram_wl # DRAM wordline transistor - elif not _is_dram and _is_sram: - dt = g_tp.sram_cell # SRAM cell access transistor - elif _is_sleep_tx: - dt = g_tp.sleep_tx # Sleep transistor - else: - dt = g_tp.peri_global - return (dt.C_g_ideal + dt.C_overlap + 3 * dt.C_fringe) * width + dt.l_phy * Cpolywire - -def gate_C_pass(width, wirelength, _is_dram=False, _is_sram=False, _is_wl_tr=False, _is_sleep_tx=False): - return gate_C(width, wirelength, _is_dram, _is_sram, _is_wl_tr, _is_sleep_tx) - -def drain_C_(width, nchannel, stack, next_arg_thresh_folding_width_or_height_cell, fold_dimension, _is_dram=False, _is_sram=False, _is_wl_tr=False, _is_sleep_tx=False): - if _is_dram and _is_sram: - dt = g_tp.dram_acc # DRAM cell access transistor - elif _is_dram and _is_wl_tr: - dt = g_tp.dram_wl # DRAM wordline transistor - elif not _is_dram and _is_sram: - dt = g_tp.sram_cell # SRAM cell access transistor - elif _is_sleep_tx: - dt = g_tp.sleep_tx # Sleep transistor - else: - dt = g_tp.peri_global - - c_junc_area = dt.C_junc - c_junc_sidewall = dt.C_junc_sidewall - c_fringe = 2 * dt.C_fringe - c_overlap = 2 * dt.C_overlap - drain_C_metal_connecting_folded_tr = 0 - - if next_arg_thresh_folding_width_or_height_cell == 0: - w_folded_tr = fold_dimension - else: - h_tr_region = fold_dimension - 2 * g_tp.HPOWERRAIL - ratio_p_to_n = 2.0 / (2.0 + 1.0) - if nchannel: - w_folded_tr = (1 - ratio_p_to_n) * (h_tr_region - g_tp.MIN_GAP_BET_P_AND_N_DIFFS) - else: - w_folded_tr = ratio_p_to_n * (h_tr_region - g_tp.MIN_GAP_BET_P_AND_N_DIFFS) - - num_folded_tr = int(sp.ceiling(width / w_folded_tr)) - if num_folded_tr < 2: - w_folded_tr = width - - total_drain_w = (g_tp.w_poly_contact + 2 * g_tp.spacing_poly_to_contact) + (stack - 1) * g_tp.spacing_poly_to_poly - drain_h_for_sidewall = w_folded_tr - total_drain_height_for_cap_wrt_gate = w_folded_tr + 2 * w_folded_tr * (stack - 1) - if num_folded_tr > 1: - total_drain_w += (num_folded_tr - 2) * (g_tp.w_poly_contact + 2 * g_tp.spacing_poly_to_contact) + (num_folded_tr - 1) * ((stack - 1) * g_tp.spacing_poly_to_poly) - if num_folded_tr % 2 == 0: - drain_h_for_sidewall = 0 - total_drain_height_for_cap_wrt_gate *= num_folded_tr - drain_C_metal_connecting_folded_tr = g_tp.wire_local.C_per_um * total_drain_w - - drain_C_area = c_junc_area * total_drain_w * w_folded_tr - drain_C_sidewall = c_junc_sidewall * (drain_h_for_sidewall + 2 * total_drain_w) - drain_C_wrt_gate = (c_fringe + c_overlap) * total_drain_height_for_cap_wrt_gate - - return drain_C_area + drain_C_sidewall + drain_C_wrt_gate + drain_C_metal_connecting_folded_tr - -def tr_R_on(width, nchannel, stack, _is_dram=False, _is_sram=False, _is_wl_tr=False, _is_sleep_tx=False): - if _is_dram and _is_sram: - dt = g_tp.dram_acc # DRAM cell access transistor - elif _is_dram and _is_wl_tr: - dt = g_tp.dram_wl # DRAM wordline transistor - elif not _is_dram and _is_sram: - dt = g_tp.sram_cell # SRAM cell access transistor - elif _is_sleep_tx: - dt = g_tp.sleep_tx # Sleep transistor - else: - dt = g_tp.peri_global - - restrans = dt.R_nch_on if nchannel else dt.R_pch_on - return stack * restrans / width - -def R_to_w(res, nchannel, _is_dram=False, _is_sram=False, _is_wl_tr=False, _is_sleep_tx=False): - if _is_dram and _is_sram: - dt = g_tp.dram_acc # DRAM cell access transistor - elif _is_dram and _is_wl_tr: - dt = g_tp.dram_wl # DRAM wordline transistor - elif not _is_dram and _is_sram: - dt = g_tp.sram_cell # SRAM cell access transistor - elif _is_sleep_tx: - dt = g_tp.sleep_tx # Sleep transistor - else: - dt = g_tp.peri_global - - restrans = dt.R_nch_on if nchannel else dt.R_pch_on - return restrans / res - -def pmos_to_nmos_sz_ratio(_is_dram=False, _is_wl_tr=False, _is_sleep_tx=False): - if _is_dram and _is_wl_tr: - return g_tp.dram_wl.n_to_p_eff_curr_drv_ratio - elif _is_sleep_tx: - return g_tp.sleep_tx.n_to_p_eff_curr_drv_ratio - else: - return g_tp.peri_global.n_to_p_eff_curr_drv_ratio - -def horowitz(inputramptime, tf, vs1, vs2, rise): - if inputramptime == 0 and vs1 == vs2: - return tf * (-sp.log(vs1) if vs1 < 1 else sp.log(vs1)) - - a = inputramptime / tf - if rise == RISE: - b = 0.5 - td = tf * sp.sqrt(sp.log(vs1) ** 2 + 2 * a * b * (1.0 - vs1)) + tf * (sp.log(vs1) - sp.log(vs2)) - else: - b = 0.4 - td = tf * sp.sqrt(sp.log(1.0 - vs1) ** 2 + 2 * a * b * vs1) + tf * (sp.log(1.0 - vs1) - sp.log(1.0 - vs2)) - return td - -def cmos_Ileak(nWidth, pWidth, _is_dram=False, _is_cell=False, _is_wl_tr=False, _is_sleep_tx=False): - if not _is_dram and _is_cell: - dt = g_tp.sram_cell # SRAM cell access transistor - elif _is_dram and _is_wl_tr: - dt = g_tp.dram_wl # DRAM wordline transistor - elif _is_sleep_tx: - dt = g_tp.sleep_tx # Sleep transistor - else: - dt = g_tp.peri_global # DRAM or SRAM all other transistors - - return nWidth * dt.I_off_n + pWidth * dt.I_off_p - -def simplified_nmos_Isat(nwidth, _is_dram=False, _is_cell=False, _is_wl_tr=False, _is_sleep_tx=False): - if not _is_dram and _is_cell: - dt = g_tp.sram_cell # SRAM cell access transistor - elif _is_dram and _is_wl_tr: - dt = g_tp.dram_wl # DRAM wordline transistor - elif _is_sleep_tx: - dt = g_tp.sleep_tx # Sleep transistor - else: - dt = g_tp.peri_global # DRAM or SRAM all other transistors - - return nwidth * dt.I_on_n - -def simplified_pmos_Isat(pwidth, _is_dram=False, _is_cell=False, _is_wl_tr=False, _is_sleep_tx=False): - if not _is_dram and _is_cell: - dt = g_tp.sram_cell # SRAM cell access transistor - elif _is_dram and _is_wl_tr: - dt = g_tp.dram_wl # DRAM wordline transistor - elif _is_sleep_tx: - dt = g_tp.sleep_tx # Sleep transistor - else: - dt = g_tp.peri_global # DRAM or SRAM all other transistors - - return pwidth * dt.I_on_n / dt.n_to_p_eff_curr_drv_ratio - -def simplified_nmos_leakage(nwidth, _is_dram=False, _is_cell=False, _is_wl_tr=False, _is_sleep_tx=False): - if not _is_dram and _is_cell: - dt = g_tp.sram_cell # SRAM cell access transistor - elif _is_dram and _is_wl_tr: - dt = g_tp.dram_wl # DRAM wordline transistor - elif _is_sleep_tx: - dt = g_tp.sleep_tx # Sleep transistor - else: - dt = g_tp.peri_global # DRAM or SRAM all other transistors - - return nwidth * dt.I_off_n - -def simplified_pmos_leakage(pwidth, _is_dram=False, _is_cell=False, _is_wl_tr=False, _is_sleep_tx=False): - if not _is_dram and _is_cell: - dt = g_tp.sram_cell # SRAM cell access transistor - elif _is_dram and _is_wl_tr: - dt = g_tp.dram_wl # DRAM wordline transistor - elif _is_sleep_tx: - dt = g_tp.sleep_tx # Sleep transistor - else: - dt = g_tp.peri_global # DRAM or SRAM all other transistors - - return pwidth * dt.I_off_p - -def cmos_Ig_n(nWidth, _is_dram=False, _is_cell=False, _is_wl_tr=False, _is_sleep_tx=False): - if not _is_dram and _is_cell: - dt = g_tp.sram_cell # SRAM cell access transistor - elif _is_dram and _is_wl_tr: - dt = g_tp.dram_wl # DRAM wordline transistor - elif _is_sleep_tx: - dt = g_tp.sleep_tx # Sleep transistor - else: - dt = g_tp.peri_global # DRAM or SRAM all other transistors - - return nWidth * dt.I_g_on_n - -def cmos_Ig_p(pWidth, _is_dram=False, _is_cell=False, _is_wl_tr=False, _is_sleep_tx=False): - if not _is_dram and _is_cell: - dt = g_tp.sram_cell # SRAM cell access transistor - elif _is_dram and _is_wl_tr: - dt = g_tp.dram_wl # DRAM wordline transistor - elif _is_sleep_tx: - dt = g_tp.sleep_tx # Sleep transistor - else: - dt = g_tp.peri_global # DRAM or SRAM all other transistors - - return pWidth * dt.I_g_on_p - -def cmos_Isub_leakage(nWidth, pWidth, fanin, g_type, _is_dram=False, _is_cell=False, _is_wl_tr=False, _is_sleep_tx=False, topo=series): - assert fanin >= 1 - nmos_leak = simplified_nmos_leakage(nWidth, _is_dram, _is_cell, _is_wl_tr, _is_sleep_tx) - pmos_leak = simplified_pmos_leakage(pWidth, _is_dram, _is_cell, _is_wl_tr, _is_sleep_tx) - Isub = 0 - num_states = int(sp.Pow(2.0, fanin)) - - if g_type == nmos: - if fanin == 1: - Isub = nmos_leak / num_states - else: - if topo == parallel: - Isub = nmos_leak * fanin / num_states - else: - for num_off_tx in range(1, fanin + 1): - Isub += nmos_leak * sp.Pow(UNI_LEAK_STACK_FACTOR, (num_off_tx - 1)) * combination(fanin, num_off_tx) - Isub /= num_states - - elif g_type == pmos: - if fanin == 1: - Isub = pmos_leak / num_states - else: - if topo == parallel: - Isub = pmos_leak * fanin / num_states - else: - for num_off_tx in range(1, fanin + 1): - Isub += pmos_leak * sp.Pow(UNI_LEAK_STACK_FACTOR, (num_off_tx - 1)) * combination(fanin, num_off_tx) - Isub /= num_states - - elif g_type == inv: - Isub = (nmos_leak + pmos_leak) / 2 - - elif g_type == nand: - Isub += fanin * pmos_leak - for num_off_tx in range(1, fanin + 1): - Isub += nmos_leak * sp.Pow(UNI_LEAK_STACK_FACTOR, (num_off_tx - 1)) * combination(fanin, num_off_tx) - Isub /= num_states - - elif g_type == nor: - for num_off_tx in range(1, fanin + 1): - Isub += pmos_leak * sp.Pow(UNI_LEAK_STACK_FACTOR, (num_off_tx - 1)) * combination(fanin, num_off_tx) - Isub += fanin * nmos_leak - Isub /= num_states - - elif g_type == tri: - Isub += (nmos_leak + pmos_leak) / 2 - Isub += nmos_leak * UNI_LEAK_STACK_FACTOR - Isub /= 2 - - elif g_type == tg: - Isub = (nmos_leak + pmos_leak) / 2 - - else: - raise ValueError("Invalid gate type") - - return Isub - -def cmos_Ig_leakage(nWidth, pWidth, fanin, g_type, _is_dram=False, _is_cell=False, _is_wl_tr=False, _is_sleep_tx=False, topo=series): - assert fanin >= 1 - nmos_leak = cmos_Ig_n(nWidth, _is_dram, _is_cell, _is_wl_tr, _is_sleep_tx) - pmos_leak = cmos_Ig_p(pWidth, _is_dram, _is_cell, _is_wl_tr, _is_sleep_tx) - Ig_on = 0 - num_states = int(sp.Pow(2.0, fanin)) - - if g_type == nmos: - if fanin == 1: - Ig_on = nmos_leak / num_states - else: - if topo == parallel: - for num_on_tx in range(1, fanin + 1): - Ig_on += nmos_leak * combination(fanin, num_on_tx) * num_on_tx - else: - Ig_on += nmos_leak * fanin - for num_on_tx in range(1, fanin): - Ig_on += nmos_leak * combination(fanin, num_on_tx) * num_on_tx / 2 - Ig_on /= num_states - - elif g_type == pmos: - if fanin == 1: - Ig_on = pmos_leak / num_states - else: - if topo == parallel: - for num_on_tx in range(1, fanin + 1): - Ig_on += pmos_leak * combination(fanin, num_on_tx) * num_on_tx - else: - Ig_on += pmos_leak * fanin - for num_on_tx in range(1, fanin): - Ig_on += pmos_leak * combination(fanin, num_on_tx) * num_on_tx / 2 - Ig_on /= num_states - - elif g_type == inv: - Ig_on = (nmos_leak + pmos_leak) / 2 - - elif g_type == nand: - for num_on_tx in range(1, fanin + 1): - Ig_on += pmos_leak * combination(fanin, num_on_tx) * num_on_tx - Ig_on += nmos_leak * fanin - for num_on_tx in range(1, fanin): - Ig_on += nmos_leak * combination(fanin, num_on_tx) * num_on_tx / 2 - Ig_on /= num_states - - elif g_type == nor: - Ig_on += pmos_leak * fanin - for num_on_tx in range(1, fanin): - Ig_on += pmos_leak * combination(fanin, num_on_tx) * num_on_tx / 2 - for num_on_tx in range(1, fanin + 1): - Ig_on += nmos_leak * combination(fanin, num_on_tx) * num_on_tx - Ig_on /= num_states - - elif g_type == tri: - Ig_on += (2 * nmos_leak + 2 * pmos_leak) / 2 - Ig_on += (nmos_leak + pmos_leak) / 2 - Ig_on /= 2 - - elif g_type == tg: - Ig_on = (nmos_leak + pmos_leak) / 2 - - else: - raise ValueError("Invalid gate type") - - return Ig_on - -def shortcircuit_simple(vt, velocity_index, c_in, c_out, w_nmos, w_pmos, i_on_n, i_on_p, i_on_n_in, i_on_p_in, vdd): - fo_n = i_on_n / i_on_n_in - fo_p = i_on_p / i_on_p_in - fanout = c_out / c_in - beta_ratio = i_on_p / i_on_n - vt_to_vdd_ratio = vt / vdd - - p_short_circuit_discharge_low = (10 / 3) * (pow((vdd - vt) - vt_to_vdd_ratio, 3.0) / pow(velocity_index, 2.0) / pow(2.0, 3 * vt_to_vdd_ratio * vt_to_vdd_ratio)) * c_in * vdd * vdd * fo_p * fo_p / fanout / beta_ratio - p_short_circuit_charge_low = (10 / 3) * (pow((vdd - vt) - vt_to_vdd_ratio, 3.0) / pow(velocity_index, 2.0) / pow(2.0, 3 * vt_to_vdd_ratio * vt_to_vdd_ratio)) * c_in * vdd * vdd * fo_n * fo_n / fanout * beta_ratio - - p_short_circuit_discharge = p_short_circuit_discharge_low - p_short_circuit_charge = p_short_circuit_charge_low - p_short_circuit = (p_short_circuit_discharge + p_short_circuit_charge) / 2 - - return p_short_circuit - -def shortcircuit(vt, velocity_index, c_in, c_out, w_nmos, w_pmos, i_on_n, i_on_p, i_on_n_in, i_on_p_in, vdd): - fo_p = i_on_p / i_on_p_in - fanout = 1 - beta_ratio = i_on_p / i_on_n - e = 2.71828 - f_alpha = 1 / (velocity_index + 2) - velocity_index / (2 * (velocity_index + 3)) + velocity_index / (velocity_index + 4) * (velocity_index / 2 - 1) - k_v = 0.9 / 0.8 + (vdd - vt) / 0.8 * sp.log(10 * (vdd - vt) / e) - g_v_alpha = (velocity_index + 1) * pow((1 - velocity_index), velocity_index) * pow((1 - velocity_index), velocity_index / 2) / f_alpha / pow((1 - velocity_index - velocity_index), (velocity_index / 2 + velocity_index + 2)) - h_v_alpha = pow(2, velocity_index) * (velocity_index + 1) * pow((1 - velocity_index), velocity_index) / pow((1 - velocity_index - velocity_index), (velocity_index + 1)) - - p_short_circuit_discharge = k_v * vdd * vdd * c_in * fo_p * fo_p / ((vdd - vt) * g_v_alpha * fanout * beta_ratio / 2 / k_v + h_v_alpha * fo_p) - return p_short_circuit_discharge - -def wire_resistance(resistivity, wire_width, wire_thickness, barrier_thickness, dishing_thickness, alpha_scatter): - resistance = alpha_scatter * resistivity / ((wire_thickness - barrier_thickness - dishing_thickness) * (wire_width - 2 * barrier_thickness)) - return resistance - -def wire_capacitance(wire_width, wire_thickness, wire_spacing, ild_thickness, miller_value, horiz_dielectric_constant, vert_dielectric_constant, fringe_cap): - vertical_cap = 2 * PERMITTIVITY_FREE_SPACE * vert_dielectric_constant * wire_width / ild_thickness - sidewall_cap = 2 * PERMITTIVITY_FREE_SPACE * miller_value * horiz_dielectric_constant * wire_thickness / wire_spacing - total_cap = vertical_cap + sidewall_cap + fringe_cap - return total_cap - -def tsv_resistance(resistivity, tsv_len, tsv_diam, tsv_contact_resistance): - resistance = resistivity * tsv_len / (math.pi * (tsv_diam / 2) ** 2) + tsv_contact_resistance - return resistance - -def tsv_capacitance(tsv_len, tsv_diam, tsv_pitch, dielec_thickness, liner_dielectric_constant, depletion_width): - e_si = PERMITTIVITY_FREE_SPACE * 11.9 - PI = math.pi - lateral_coupling_constant = 4.1 - diagonal_coupling_constant = 5.3 - - liner_cap = 2 * PI * PERMITTIVITY_FREE_SPACE * liner_dielectric_constant * tsv_len / sp.log(1 + dielec_thickness / (tsv_diam / 2)) - depletion_cap = 2 * PI * e_si * tsv_len / sp.log(1 + depletion_width / (dielec_thickness + tsv_diam / 2)) - self_cap = 1 / (1 / liner_cap + 1 / depletion_cap) - - lateral_coupling_cap = 0.4 * (0.225 * sp.log(0.97 * tsv_len / tsv_diam) + 0.53) * e_si / (tsv_pitch - tsv_diam) * PI * tsv_diam * tsv_len - diagonal_coupling_cap = 0.4 * (0.225 * sp.log(0.97 * tsv_len / tsv_diam) + 0.53) * e_si / (1.414 * tsv_pitch - tsv_diam) * PI * tsv_diam * tsv_len - - total_cap = self_cap + lateral_coupling_constant * lateral_coupling_cap + diagonal_coupling_constant * diagonal_coupling_cap - return total_cap - -def tsv_area(tsv_pitch): - return tsv_pitch ** 2 - - -g_ip = InputParameter() -g_tp = TechnologyParameter() diff --git a/cacti-main/cacti_python/subarray.py b/cacti-main/cacti_python/subarray.py index 6db37ea..9c853f8 100644 --- a/cacti-main/cacti_python/subarray.py +++ b/cacti-main/cacti_python/subarray.py @@ -1,5 +1,3 @@ -# TODO figure out area - import math from math import ceil, log, pow import sys @@ -22,16 +20,6 @@ def __init__(self, dp_, is_fa_): self.cell = dp_.cell self.cam_cell = dp_.cam_cell self.is_fa = is_fa_ - - print("\n CURR DEBUG") - print(f"Number of rows: {self.num_rows}") - print(f"Number of columns: {self.num_cols}") - print(f"Number of columns for FA CAM: {self.num_cols_fa_cam}") - print(f"Number of columns for FA RAM: {self.num_cols_fa_ram}") - print(f"Cell: {self.cell}") - print(f"CAM Cell: {self.cam_cell}") - print(f"Is FA: {self.is_fa}") - #self.area = Area() if not (is_fa_ or dp_.pure_cam): self.num_cols += sp.ceiling(self.num_cols / num_bits_per_ecc_b_) if g_ip.add_ecc_b_ else 0 diff --git a/cacti-main/cacti_python/uca.py b/cacti-main/cacti_python/uca.py index 0377278..7b8fc58 100644 --- a/cacti-main/cacti_python/uca.py +++ b/cacti-main/cacti_python/uca.py @@ -5,6 +5,7 @@ from .memorybus import Memorybus from .tsv import TSV import sympy as sp +import time # used to have component? class UCA(Component): @@ -17,7 +18,7 @@ def __init__(self, dyn_p): self.power_routing_to_bank = PowerDef() - #TODO relational + # CHANGE: relational # num_banks_ver_dir = 1 << int((math.log2(nbanks) / 2) if (h > w) else (math.log2(nbanks) - math.log2(nbanks) / 2)) num_banks_ver_dir = 1 << int(math.log2(self.nbanks) / 2) # if (self.bank.h > self.bank.w) else (math.log2(self.nbanks) - math.log2(self.nbanks) / 2)) num_banks_hor_dir = self.nbanks // num_banks_ver_dir @@ -305,19 +306,18 @@ def compute_delays(self, inrisetime): delay_array_to_mat = self.htree_in_add.delay + self.bank.htree_in_add.delay max_delay_before_row_decoder = delay_array_to_mat + self.bank.mat.r_predec.delay self.delay_array_to_sa_mux_lev_1_decoder = delay_array_to_mat + self.bank.mat.sa_mux_lev_1_predec.delay + self.bank.mat.sa_mux_lev_1_dec.delay + # self.delay_array_to_sa_mux_lev_1_decoder = self.bank.mat.sa_mux_lev_1_dec.delay self.delay_array_to_sa_mux_lev_2_decoder = delay_array_to_mat + self.bank.mat.sa_mux_lev_2_predec.delay + self.bank.mat.sa_mux_lev_2_dec.delay delay_inside_mat = self.bank.mat.row_dec.delay + self.bank.mat.delay_bitline + self.bank.mat.delay_sa - # TODO parallel test theses - #TODO hotfix - # if math.isnan(max_delay_before_row_decoder) or math.isnan(delay_inside_mat) or math.isnan(delay_array_to_mat) or math.isnan( self.bank.mat.b_mux_predec.delay) or math.isnan(self.bank.mat.bit_mux_dec.delay ) or math.isnan(self.bank.mat.delay_sa): - # self.delay_before_subarray_output_driver = 1 - # else: - # TODO MAX CHECK + # Change: MAX - option 1 and 2 make the expressions extremely long but have higher accuracy + + # OPTION 1: ORIGINAL # self.delay_before_subarray_output_driver = sp.Max(max_delay_before_row_decoder + delay_inside_mat, # delay_array_to_mat + self.bank.mat.b_mux_predec.delay + self.bank.mat.bit_mux_dec.delay + self.bank.mat.delay_sa, # sp.Max(self.delay_array_to_sa_mux_lev_1_decoder, self.delay_array_to_sa_mux_lev_2_decoder)) + # OPTION 2: USING symbolic... takes a while # print("before uca compute delay max") # self.delay_before_subarray_output_driver = symbolic_convex_max(max_delay_before_row_decoder + delay_inside_mat, # (delay_array_to_mat + self.bank.mat.b_mux_predec.delay + @@ -325,19 +325,18 @@ def compute_delays(self, inrisetime): # tmp_max = symbolic_convex_max(self.delay_array_to_sa_mux_lev_1_decoder, self.delay_array_to_sa_mux_lev_2_decoder) # self.delay_before_subarray_output_driver = symbolic_convex_max(self.delay_before_subarray_output_driver, tmp_max) - # selected random - # # Option 1 - self.delay_before_subarray_output_driver = max_delay_before_row_decoder + delay_inside_mat - # # Option 2 - # self.delay_before_subarray_output_driver = delay_array_to_mat + self.bank.mat.b_mux_predec.delay + self.bank.mat.bit_mux_dec.delay + self.bank.mat.delay_sa - # # Option 3 + # OPTION 3: just select 1 - will decrease accuracy + # a + # self.delay_before_subarray_output_driver = max_delay_before_row_decoder + delay_inside_mat + # b + self.delay_before_subarray_output_driver = delay_array_to_mat + self.bank.mat.b_mux_predec.delay + self.bank.mat.bit_mux_dec.delay + self.bank.mat.delay_sa + # c # self.delay_before_subarray_output_driver = self.delay_array_to_sa_mux_lev_1_decoder - # Option 4 + # d # self.delay_before_subarray_output_driver = self.delay_array_to_sa_mux_lev_2_decoder self.delay_from_subarray_out_drv_to_out = self.bank.mat.delay_subarray_out_drv_htree + self.bank.htree_out_data.delay + self.htree_out_data.delay self.access_time = self.bank.mat.delay_comparator - print("after uca compute delay max") if self.dp.fully_assoc: ram_delay_inside_mat = self.bank.mat.delay_bitline + self.bank.mat.delay_matchchline @@ -347,7 +346,8 @@ def compute_delays(self, inrisetime): if self.dp.is_main_mem: t_rcd = max_delay_before_row_decoder + delay_inside_mat - cas_latency = symbolic_convex_max(self.delay_array_to_sa_mux_lev_1_decoder, self.delay_array_to_sa_mux_lev_2_decoder) + self.delay_from_subarray_out_drv_to_out + # cas_latency = symbolic_convex_max(self.delay_array_to_sa_mux_lev_1_decoder, self.delay_array_to_sa_mux_lev_2_decoder) + self.delay_from_subarray_out_drv_to_out + cas_latency = self.delay_array_to_sa_mux_lev_1_decoder + self.delay_from_subarray_out_drv_to_out self.access_time = t_rcd + cas_latency if not self.dp.fully_assoc: @@ -355,14 +355,11 @@ def compute_delays(self, inrisetime): if self.dp.is_dram: temp += self.bank.mat.delay_writeback - #print(f'temp {temp}') - print(f'UCA AFTER TEMP') + # Uneeded since for cycle time # temp = symbolic_convex_max(temp, self.bank.mat.r_predec.delay) # temp = symbolic_convex_max(temp, self.bank.mat.b_mux_predec.delay) # temp = symbolic_convex_max(temp, self.bank.mat.sa_mux_lev_1_predec.delay) # temp = symbolic_convex_max(temp, self.bank.mat.sa_mux_lev_2_predec.delay) - - # MAX, but uneeded for access_time # temp = sp.Max( # temp, # self.bank.mat.r_predec.delay, @@ -371,13 +368,12 @@ def compute_delays(self, inrisetime): # self.bank.mat.sa_mux_lev_2_predec.delay # ) - print ("before max temp") + # Uneeded since for cycle time # max1 = symbolic_convex_max(self.bank.mat.r_predec.delay, self.bank.mat.b_mux_predec.delay) # max2 = symbolic_convex_max(self.bank.mat.sa_mux_lev_1_predec.delay, self.bank.mat.sa_mux_lev_2_predec.delay) # max3 = symbolic_convex_max(max1, max2) # temp = symbolic_convex_max(temp, max3) temp = self.bank.mat.r_predec.delay - print ("after max temp") else: ram_delay_inside_mat = self.bank.mat.delay_bitline + self.bank.mat.delay_matchchline @@ -386,22 +382,21 @@ def compute_delays(self, inrisetime): # temp = symbolic_convex_max(temp, self.bank.mat.sa_mux_lev_1_predec.delay) # temp = symbolic_convex_max(temp, self.bank.mat.sa_mux_lev_2_predec.delay) - # MAX, but uneeded for access_time + # Uneeded since for cycle time # temp = sp.Max( # temp, # self.bank.mat.b_mux_predec.delay, # self.bank.mat.sa_mux_lev_1_predec.delay, # self.bank.mat.sa_mux_lev_2_predec.delay # ) - print ("before max temp") + + # Uneeded since for cycle time # max1 = symbolic_convex_max(temp, self.bank.mat.b_mux_predec.delay) # max2 = symbolic_convex_max(self.bank.mat.sa_mux_lev_2_predec.delay, self.bank.mat.sa_mux_lev_1_predec.delay) # temp = symbolic_convex_max(max1, max2) temp = self.bank.mat.b_mux_predec.delay - print ("after max temp") - print ("UCA MAX NEXT") - print(g_ip.rpters_in_htree) + print ("UCA completed... please wait for expression to write.") g_ip.rpters_in_htree = True if g_ip.rpters_in_htree == False: temp = symbolic_convex_max(temp, self.bank.htree_in_add.max_unpipelined_link_delay) @@ -409,14 +404,6 @@ def compute_delays(self, inrisetime): delay_req_network = max_delay_before_row_decoder delay_rep_network = self.delay_from_subarray_out_drv_to_out - - #TODO delay_rep_network nan - # if math.isnan(delay_req_network): - # delay_req_network = 0 - # if math.isnan(delay_rep_network): - # delay_rep_network = 0 - # print(delay_req_network) - # print(delay_rep_network) self.multisubbank_interleave_cycle_time = symbolic_convex_max(delay_req_network, delay_rep_network) if self.dp.is_main_mem: @@ -503,11 +490,13 @@ def compute_power_energy(self): self.htree_in_data.power.readOp.leakage + self.htree_out_data.power.readOp.leakage ) + self.power_routing_to_bank.readOp.gate_leakage += ( self.htree_in_add.power.readOp.gate_leakage + self.htree_in_data.power.readOp.gate_leakage + self.htree_out_data.power.readOp.gate_leakage ) + if self.dp.fully_assoc or self.dp.pure_cam: self.power_routing_to_bank.readOp.leakage += self.htree_in_search.power.readOp.leakage + self.htree_out_search.power.readOp.leakage self.power_routing_to_bank.readOp.gate_leakage += self.htree_in_search.power.readOp.gate_leakage + self.htree_out_search.power.readOp.gate_leakage @@ -541,6 +530,7 @@ def compute_power_energy(self): self.bank.mat.power_bitline.readOp.dynamic ) * self.dp.num_act_mats_hor_dir ) + self.dyn_read_energy_remaining_words_in_burst = ( symbolic_convex_max(g_ip.burst_len / g_ip.int_prefetch_w, 1) - 1 ) * ( @@ -554,9 +544,10 @@ def compute_power_energy(self): self.bank.htree_out_data.power.readOp.dynamic + self.power_routing_to_bank.readOp.dynamic ) + self.dyn_read_energy_from_closed_page += self.dyn_read_energy_remaining_words_in_burst self.dyn_read_energy_from_open_page += self.dyn_read_energy_remaining_words_in_burst - + self.activate_energy = ( self.htree_in_add.power.readOp.dynamic + self.bank.htree_in_add.power_bit.readOp.dynamic * self.bank.num_addr_b_routed_to_mat_for_act + @@ -566,6 +557,7 @@ def compute_power_energy(self): self.bank.mat.power_sa.readOp.dynamic ) * self.dp.num_act_mats_hor_dir ) + self.read_energy = ( self.htree_in_add.power.readOp.dynamic + self.bank.htree_in_add.power_bit.readOp.dynamic * self.bank.num_addr_b_routed_to_mat_for_rd_or_wr + @@ -579,6 +571,7 @@ def compute_power_energy(self): self.bank.htree_out_data.power.readOp.dynamic + self.htree_in_data.power.readOp.dynamic ) * g_ip.burst_len + self.write_energy = ( self.htree_in_add.power.readOp.dynamic + self.bank.htree_in_add.power_bit.readOp.dynamic * self.bank.num_addr_b_routed_to_mat_for_rd_or_wr + @@ -591,6 +584,7 @@ def compute_power_energy(self): self.bank.mat.power_sa_mux_lev_2_decoders.readOp.dynamic ) * self.dp.num_act_mats_hor_dir ) * g_ip.burst_len + self.precharge_energy = ( self.bank.mat.power_bitline.readOp.dynamic + self.bank.mat.power_bl_precharge_eq_drv.readOp.dynamic @@ -676,6 +670,7 @@ def compute_power_energy(self): if not self.dp.is_tag: self.power.readOp.dynamic = self.dyn_read_energy_from_closed_page + self.power.writeOp.dynamic = ( self.dyn_read_energy_from_closed_page - self.dyn_read_energy_remaining_words_in_burst - diff --git a/cacti-main/cacti_python/wire.py b/cacti-main/cacti_python/wire.py index e796e85..d1da1bc 100644 --- a/cacti-main/cacti_python/wire.py +++ b/cacti-main/cacti_python/wire.py @@ -5,53 +5,82 @@ from .component import Component from .cacti_interface import * +import math + class Wire(Component): - global_comp = Component() + global_ = Component() global_5 = Component() global_10 = Component() global_20 = Component() global_30 = Component() low_swing = Component() initialized = 0 - wire_width_init = 0 - wire_spacing_init = 0 + wire_width_init = None + wire_spacing_init = None - def __init__(self, wire_model = 0, length = 1, nsense=1, width_scaling=1, spacing_scaling=1, wire_placement=outside_mat, resistivity=CU_RESISTIVITY, dt=g_tp.peri_global): + def __init__(self, wire_model=0, wl=1, n=1, w_s=1, s_s=1, wp=outside_mat, resistivity=CU_RESISTIVITY, dt=g_tp.peri_global): super().__init__() self.wt = wire_model - self.wire_length = length * 1e-6 - self.nsense = nsense - self.w_scale = width_scaling - self.s_scale = spacing_scaling + self.wire_length = wl * 1e-6 + self.nsense = n + self.w_scale = w_s + self.s_scale = s_s self.resistivity = resistivity - self.deviceType = dt if dt else g_tp.peri_global # TODO check this - self.wire_placement = wire_placement + self.deviceType = dt + self.wire_placement = wp self.min_w_pmos = self.deviceType.n_to_p_eff_curr_drv_ratio * g_tp.min_w_nmos_ self.in_rise_time = 0 self.out_rise_time = 0 - self.repeater_spacing = 1 - # TODO set arbitrary for now - self.repeater_size = 0 + self.repeated_wire = [] + + self.transmitter = Component() + self.l_wire = Component() + self.sense_amp = Component() + + # CHECK + self.repeater_spacing = 0 + self.wire_width = 0 + self.wire_spacing = 0 + self.repeater_size = 0 + + if Wire.initialized != 1: + print("Initializing Wire") + self.__init_wire_simple(w_s, s_s, wp, resistivity, dt) - print("WRIE CHECKPINT 0") - self.calculate_wire_stats() - print("WRIE CHECKPINT 0") + self.repeater_spacing *= 1e6 self.wire_length *= 1e6 self.wire_width *= 1e6 self.wire_spacing *= 1e6 - - self.transmitter = Component() - self.l_wire = Component() - self.sense_amp = Component() - self.min_w_pmos = self.deviceType.n_to_p_eff_curr_drv_ratio * g_tp.min_w_nmos_ # assert self.wire_length > 0 # assert self.power.readOp.dynamic > 0 # assert self.power.readOp.leakage > 0 # assert self.power.readOp.gate_leakage > 0 + def __init_wire_simple(self, w_s, s_s, wp, resis, dt): + if self.wire_placement == outside_mat: + self.wire_width = g_tp.wire_outside_mat.pitch / 2 + elif self.wire_placement == inside_mat: + self.wire_width = g_tp.wire_inside_mat.pitch / 2 + else: + self.wire_width = g_tp.wire_local.pitch / 2 + + self.wire_spacing = self.wire_width + + self.wire_width *= (self.w_scale * 1e-6 / 2) + self.wire_spacing *= (self.s_scale * 1e-6 / 2) + + Wire.initialized = 1 + self.init_wire() + Wire.wire_width_init = self.wire_width + Wire.wire_spacing_init = self.wire_spacing + + # assert self.power.readOp.dynamic > 0 + # assert self.power.readOp.leakage > 0 + # assert self.power.readOp.gate_leakage > 0 + def __del__(self): pass @@ -64,114 +93,108 @@ def calculate_wire_stats(self): self.wire_width = g_tp.wire_local.pitch / 2 self.wire_spacing = self.wire_width - self.wire_width *= self.w_scale * 1e-6 / 2 - self.wire_spacing *= self.s_scale * 1e-6 / 2 - - if self.wt != Low_swing: - if self.wt == Global: - self.delay = self.global_comp.delay * self.wire_length - self.power.readOp.dynamic = self.global_comp.power.readOp.dynamic * self.wire_length - self.power.readOp.leakage = self.global_comp.power.readOp.leakage * self.wire_length - self.power.readOp.gate_leakage = self.global_comp.power.readOp.gate_leakage * self.wire_length - self.repeater_spacing = self.global_comp.area.w - self.repeater_size = self.global_comp.area.h - self.area.set_area((self.wire_length / self.repeater_spacing) * - compute_gate_area(INV, 1, self.min_w_pmos * self.repeater_size, - g_tp.min_w_nmos_ * self.repeater_size, g_tp.cell_h_def)) - elif self.wt == Global_5: + + self.wire_width *= (self.w_scale * 1e-6 / 2) + self.wire_spacing *= (self.s_scale * 1e-6 / 2) + + if self.wt != 'Low_swing': + if self.wt == 'Global': + self.delay = Wire.global_.delay * self.wire_length + self.power.readOp.dynamic = Wire.global_.power.readOp.dynamic * self.wire_length + self.power.readOp.leakage = Wire.global_.power.readOp.leakage * self.wire_length + self.power.readOp.gate_leakage = Wire.global_.power.readOp.gate_leakage * self.wire_length + self.repeater_spacing = Wire.global_.area.w + self.repeater_size = Wire.global_.area.h + + self.area.set_area((self.wire_length / self.repeater_spacing) * compute_gate_area(INV, 1, self.min_w_pmos * self.repeater_size, g_tp.min_w_nmos_ * self.repeater_size, g_tp.cell_h_def)) + elif self.wt == 'Global_5': self.delay = Wire.global_5.delay * self.wire_length self.power.readOp.dynamic = Wire.global_5.power.readOp.dynamic * self.wire_length self.power.readOp.leakage = Wire.global_5.power.readOp.leakage * self.wire_length self.power.readOp.gate_leakage = Wire.global_5.power.readOp.gate_leakage * self.wire_length self.repeater_spacing = Wire.global_5.area.w self.repeater_size = Wire.global_5.area.h - self.area.set_area((self.wire_length / self.repeater_spacing) * - compute_gate_area(INV, 1, self.min_w_pmos * self.repeater_size, - g_tp.min_w_nmos_ * self.repeater_size, g_tp.cell_h_def)) - elif self.wt == Global_10: + + self.area.set_area((self.wire_length / self.repeater_spacing) * compute_gate_area('INV', 1, self.min_w_pmos * self.repeater_size, g_tp.min_w_nmos_ * self.repeater_size, g_tp.cell_h_def)) + elif self.wt == 'Global_10': self.delay = Wire.global_10.delay * self.wire_length self.power.readOp.dynamic = Wire.global_10.power.readOp.dynamic * self.wire_length self.power.readOp.leakage = Wire.global_10.power.readOp.leakage * self.wire_length self.power.readOp.gate_leakage = Wire.global_10.power.readOp.gate_leakage * self.wire_length self.repeater_spacing = Wire.global_10.area.w self.repeater_size = Wire.global_10.area.h - self.area.set_area((self.wire_length / self.repeater_spacing) * - compute_gate_area(INV, 1, self.min_w_pmos * self.repeater_size, - g_tp.min_w_nmos_ * self.repeater_size, g_tp.cell_h_def)) - elif self.wt == Global_20: + + self.area.set_area((self.wire_length / self.repeater_spacing) * compute_gate_area('INV', 1, self.min_w_pmos * self.repeater_size, g_tp.min_w_nmos_ * self.repeater_size, g_tp.cell_h_def)) + elif self.wt == 'Global_20': self.delay = Wire.global_20.delay * self.wire_length self.power.readOp.dynamic = Wire.global_20.power.readOp.dynamic * self.wire_length self.power.readOp.leakage = Wire.global_20.power.readOp.leakage * self.wire_length self.power.readOp.gate_leakage = Wire.global_20.power.readOp.gate_leakage * self.wire_length self.repeater_spacing = Wire.global_20.area.w self.repeater_size = Wire.global_20.area.h - self.area.set_area((self.wire_length / self.repeater_spacing) * - compute_gate_area(INV, 1, self.min_w_pmos * self.repeater_size, - g_tp.min_w_nmos_ * self.repeater_size, g_tp.cell_h_def)) - elif self.wt == Global_30: + + self.area.set_area((self.wire_length / self.repeater_spacing) * compute_gate_area('INV', 1, self.min_w_pmos * self.repeater_size, g_tp.min_w_nmos_ * self.repeater_size, g_tp.cell_h_def)) + elif self.wt == 'Global_30': self.delay = Wire.global_30.delay * self.wire_length self.power.readOp.dynamic = Wire.global_30.power.readOp.dynamic * self.wire_length self.power.readOp.leakage = Wire.global_30.power.readOp.leakage * self.wire_length self.power.readOp.gate_leakage = Wire.global_30.power.readOp.gate_leakage * self.wire_length self.repeater_spacing = Wire.global_30.area.w self.repeater_size = Wire.global_30.area.h - self.area.set_area((self.wire_length / self.repeater_spacing) * - compute_gate_area(INV, 1, self.min_w_pmos * self.repeater_size, - g_tp.min_w_nmos_ * self.repeater_size, g_tp.cell_h_def)) - print("HOTSPOT") - print(self.deviceType.Vth) - #TODO important self.deviceType.Vth is not symbolic for some reason - if(self.deviceType.Vth == 0): - self.deviceType.Vth = 1 + + self.area.set_area((self.wire_length / self.repeater_spacing) * compute_gate_area('INV', 1, self.min_w_pmos * self.repeater_size, g_tp.min_w_nmos_ * self.repeater_size, g_tp.cell_h_def)) + else: # Check wr wire type + self.delay = Wire.global_.delay * self.wire_length + self.power.readOp.dynamic = Wire.global_.power.readOp.dynamic * self.wire_length + self.power.readOp.leakage = Wire.global_.power.readOp.leakage * self.wire_length + self.power.readOp.gate_leakage = Wire.global_.power.readOp.gate_leakage * self.wire_length + self.repeater_spacing = Wire.global_.area.w + self.repeater_size = Wire.global_.area.h + + self.area.set_area((self.wire_length / self.repeater_spacing) * compute_gate_area(INV, 1, self.min_w_pmos * self.repeater_size, g_tp.min_w_nmos_ * self.repeater_size, g_tp.cell_h_def)) self.out_rise_time = self.delay * self.repeater_spacing / self.deviceType.Vth - elif self.wt == Low_swing: + elif self.wt == 'Low_swing': self.low_swing_model() self.repeater_spacing = self.wire_length self.repeater_size = 1 else: - assert False + raise AssertionError() def signal_fall_time(self): timeconst = (drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) + drain_C_(self.min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + - gate_C(self.min_w_pmos + g_tp.min_w_nmos_, 0)) * tr_R_on(self.min_w_pmos, PCH, 1) + gate_C(self.min_w_pmos + g_tp.min_w_nmos_, 0)) * \ + tr_R_on(self.min_w_pmos, PCH, 1) rt = horowitz(0, timeconst, self.deviceType.Vth / self.deviceType.Vdd, self.deviceType.Vth / self.deviceType.Vdd, FALL) / (self.deviceType.Vdd - self.deviceType.Vth) timeconst = (drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) + drain_C_(self.min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + - gate_C(self.min_w_pmos + g_tp.min_w_nmos_, 0)) * tr_R_on(g_tp.min_w_nmos_, NCH, 1) + gate_C(self.min_w_pmos + g_tp.min_w_nmos_, 0)) * \ + tr_R_on(g_tp.min_w_nmos_, NCH, 1) ft = horowitz(rt, timeconst, self.deviceType.Vth / self.deviceType.Vdd, self.deviceType.Vth / self.deviceType.Vdd, RISE) / self.deviceType.Vth return ft def signal_rise_time(self): timeconst = (drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) + drain_C_(self.min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + - gate_C(self.min_w_pmos + g_tp.min_w_nmos_, 0)) * tr_R_on(g_tp.min_w_nmos_, NCH, 1) - # TODO need to check why deviceType - if (self.deviceType.Vdd == 0): - self.deviceType.Vdd = 1 + gate_C(self.min_w_pmos + g_tp.min_w_nmos_, 0)) * \ + tr_R_on(g_tp.min_w_nmos_, NCH, 1) rt = horowitz(0, timeconst, self.deviceType.Vth / self.deviceType.Vdd, self.deviceType.Vth / self.deviceType.Vdd, RISE) / self.deviceType.Vth timeconst = (drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) + drain_C_(self.min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + - gate_C(self.min_w_pmos + g_tp.min_w_nmos_, 0)) * tr_R_on(self.min_w_pmos, PCH, 1) + gate_C(self.min_w_pmos + g_tp.min_w_nmos_, 0)) * \ + tr_R_on(self.min_w_pmos, PCH, 1) ft = horowitz(rt, timeconst, self.deviceType.Vth / self.deviceType.Vdd, self.deviceType.Vth / self.deviceType.Vdd, FALL) / (self.deviceType.Vdd - self.deviceType.Vth) return ft - def set_in_rise_time(self, rt): - self.in_rise_time = rt - - def print_wire(self): - pass - def wire_cap(self, length, call_from_outside=False): epsilon0 = 8.8542e-12 - - if self.wire_placement == outside_mat: + if self.wire_placement == 'outside_mat': aspect_ratio = g_tp.wire_outside_mat.aspect_ratio horiz_dielectric_constant = g_tp.wire_outside_mat.horiz_dielectric_constant vert_dielectric_constant = g_tp.wire_outside_mat.vert_dielectric_constant miller_value = g_tp.wire_outside_mat.miller_value ild_thickness = g_tp.wire_outside_mat.ild_thickness - elif self.wire_placement == inside_mat: + elif self.wire_placement == 'inside_mat': aspect_ratio = g_tp.wire_inside_mat.aspect_ratio horiz_dielectric_constant = g_tp.wire_inside_mat.horiz_dielectric_constant vert_dielectric_constant = g_tp.wire_inside_mat.vert_dielectric_constant @@ -189,93 +212,107 @@ def wire_cap(self, length, call_from_outside=False): self.wire_spacing *= 1e-6 wire_height = self.wire_width / self.w_scale * aspect_ratio - sidewall = miller_value * horiz_dielectric_constant * (wire_height / self.wire_spacing) * epsilon0 adj = miller_value * vert_dielectric_constant * self.wire_width / (ild_thickness * 1e-6) * epsilon0 - tot_cap = sidewall + adj + g_tp.fringe_cap * 1e6 + tot_cap = (sidewall + adj + (g_tp.fringe_cap * 1e6)) if call_from_outside: self.wire_width *= 1e6 self.wire_spacing *= 1e6 return tot_cap * length - + def wire_res(self, length): alpha_scatter = 1.05 dishing_thickness = 0 barrier_thickness = 0 - if self.wire_placement == outside_mat: + if self.wire_placement == 'outside_mat': aspect_ratio = g_tp.wire_outside_mat.aspect_ratio - elif self.wire_placement == inside_mat: + elif self.wire_placement == 'inside_mat': aspect_ratio = g_tp.wire_inside_mat.aspect_ratio else: aspect_ratio = g_tp.wire_local.aspect_ratio - return (alpha_scatter * self.resistivity * 1e-6 * length) / ( - (aspect_ratio * self.wire_width / self.w_scale - dishing_thickness - barrier_thickness) * - (self.wire_width - 2 * barrier_thickness)) + return (alpha_scatter * self.resistivity * 1e-6 * length / + ((aspect_ratio * self.wire_width / self.w_scale - dishing_thickness - barrier_thickness) * + (self.wire_width - 2 * barrier_thickness))) def low_swing_model(self): - length = self.wire_length + len_ = self.wire_length beta = pmos_to_nmos_sz_ratio() - inputrise = self.signal_rise_time() if self.in_rise_time == 0 else self.in_rise_time - cwire = self.wire_cap(length) - rwire = self.wire_res(length) + inputrise = self.in_rise_time if self.in_rise_time != 0 else self.signal_rise_time() - RES_ADJ = 8.6 + cwire = self.wire_cap(len_) + rwire = self.wire_res(len_) + RES_ADJ = 8.6 driver_res = (-8 * g_tp.FO4 / (math.log(0.5) * cwire)) / RES_ADJ nsize = R_to_w(driver_res, NCH) - nsize = min(nsize, g_tp.max_w_nmos_) - nsize = symbolic_convex_max(nsize, g_tp.min_w_nmos_) + # RECENT CHANGE: MAX - ignore to reduce expression length + # nsize = sp.Min(nsize, g_tp.max_w_nmos_) + # nsize = symbolic_convex_max(nsize, g_tp.min_w_nmos_) + + # CHANGE: RELATIONAL + # if rwire * cwire > 8 * g_tp.FO4: + # nsize = g_tp.max_w_nmos_ - if rwire * cwire > 8 * g_tp.FO4: - nsize = g_tp.max_w_nmos_ + nsize = sp.Piecewise( + (g_tp.max_w_nmos_, rwire * cwire > 8 * g_tp.FO4), + (nsize, True) + ) - st_eff = sp.sqrt((2 + beta / 1 + beta) * gate_C(nsize, 0) / ( - gate_C(2 * g_tp.min_w_nmos_, 0) + gate_C(2 * self.min_w_pmos, 0))) + st_eff = sp.sqrt((2 + beta / 1 + beta) * gate_C(nsize, 0) / + (gate_C(2 * g_tp.min_w_nmos_, 0) + gate_C(2 * self.min_w_pmos, 0))) req_cin = ((2 + beta / 1 + beta) * gate_C(nsize, 0)) / st_eff inv_size = req_cin / (gate_C(self.min_w_pmos, 0) + gate_C(g_tp.min_w_nmos_, 0)) + + # RECENT CHANGE: MAX - ignore to reduce expression size inv_size = symbolic_convex_max(inv_size, 1) res_eq = 2 * tr_R_on(g_tp.min_w_nmos_, NCH, 1) - cap_eq = (2 * drain_C_(self.min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + - drain_C_(2 * g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) + - gate_C(inv_size * g_tp.min_w_nmos_, 0) + - gate_C(inv_size * self.min_w_pmos, 0)) + cap_eq = 2 * drain_C_(self.min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + \ + drain_C_(2 * g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) + \ + gate_C(inv_size * g_tp.min_w_nmos_, 0) + \ + gate_C(inv_size * self.min_w_pmos, 0) timeconst = res_eq * cap_eq - self.delay = horowitz(inputrise, timeconst, self.deviceType.Vth / self.deviceType.Vdd, self.deviceType.Vth / self.deviceType.Vdd, RISE) + self.delay = horowitz(inputrise, timeconst, self.deviceType.Vth / self.deviceType.Vdd, + self.deviceType.Vth / self.deviceType.Vdd, RISE) temp_power = cap_eq * self.deviceType.Vdd * self.deviceType.Vdd inputrise = self.delay / (self.deviceType.Vdd - self.deviceType.Vth) res_eq = tr_R_on(inv_size * self.min_w_pmos, PCH, 1) - cap_eq = (drain_C_(inv_size * self.min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + - drain_C_(inv_size * g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) + - gate_C(nsize, 0)) - + cap_eq = drain_C_(inv_size * self.min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + \ + drain_C_(inv_size * g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) + \ + gate_C(nsize, 0) timeconst = res_eq * cap_eq - self.delay += horowitz(inputrise, timeconst, self.deviceType.Vth / self.deviceType.Vdd, self.deviceType.Vth / self.deviceType.Vdd, FALL) + + self.delay += horowitz(inputrise, timeconst, self.deviceType.Vth / self.deviceType.Vdd, + self.deviceType.Vth / self.deviceType.Vdd, FALL) temp_power += cap_eq * self.deviceType.Vdd * self.deviceType.Vdd self.transmitter.delay = self.delay self.transmitter.power.readOp.dynamic = temp_power * 2 - self.transmitter.power.readOp.leakage = self.deviceType.Vdd * ( - 4 * cmos_Isub_leakage(g_tp.min_w_nmos_, self.min_w_pmos, 2, nand) + - 4 * cmos_Isub_leakage(g_tp.min_w_nmos_, self.min_w_pmos, 1, inv)) - self.transmitter.power.readOp.gate_leakage = self.deviceType.Vdd * ( - 4 * cmos_Ig_leakage(g_tp.min_w_nmos_, self.min_w_pmos, 2, nand) + - 4 * cmos_Ig_leakage(g_tp.min_w_nmos_, self.min_w_pmos, 1, inv)) + self.transmitter.power.readOp.leakage = self.deviceType.Vdd * \ + (4 * cmos_Isub_leakage(g_tp.min_w_nmos_, self.min_w_pmos, 2, 'nand') + + 4 * cmos_Isub_leakage(g_tp.min_w_nmos_, self.min_w_pmos, 1, 'inv')) + + self.transmitter.power.readOp.gate_leakage = self.deviceType.Vdd * \ + (4 * cmos_Ig_leakage(g_tp.min_w_nmos_, self.min_w_pmos, 2, 'nand') + + 4 * cmos_Ig_leakage(g_tp.min_w_nmos_, self.min_w_pmos, 1, 'inv')) inputrise = self.delay / self.deviceType.Vth - cap_eq = cwire + drain_C_(nsize, NCH, 1, 1, g_tp.cell_h_def) * 2 + self.nsense * self.sense_amp_input_cap() - timeconst = (tr_R_on(nsize, NCH, 1) * RES_ADJ) * (cwire + drain_C_(nsize, NCH, 1, 1, g_tp.cell_h_def) * 2) + rwire * cwire / 2 + ( - tr_R_on(nsize, NCH, 1) * RES_ADJ + rwire) * self.nsense * self.sense_amp_input_cap() + cap_eq = cwire + 2 * drain_C_(nsize, NCH, 1, 1, g_tp.cell_h_def) + \ + self.nsense * self.sense_amp_input_cap() + timeconst = (tr_R_on(nsize, NCH, 1) * RES_ADJ) * (cwire + 2 * drain_C_(nsize, NCH, 1, 1, g_tp.cell_h_def)) + \ + rwire * cwire / 2 + \ + (tr_R_on(nsize, NCH, 1) * RES_ADJ + rwire) * \ + self.nsense * self.sense_amp_input_cap() self.delay += horowitz(inputrise, timeconst, self.deviceType.Vth / self.deviceType.Vdd, 0.25, 0) VOL_SWING = 0.1 @@ -284,8 +321,11 @@ def low_swing_model(self): self.l_wire.delay = self.delay - self.transmitter.delay self.l_wire.power.readOp.dynamic = temp_power - self.transmitter.power.readOp.dynamic - self.l_wire.power.readOp.leakage = self.deviceType.Vdd * (4 * cmos_Isub_leakage(nsize, 0, 1, nmos)) - self.l_wire.power.readOp.gate_leakage = self.deviceType.Vdd * (4 * cmos_Ig_leakage(nsize, 0, 1, nmos)) + self.l_wire.power.readOp.leakage = self.deviceType.Vdd * \ + (4 * cmos_Isub_leakage(nsize, 0, 1, nmos)) + + self.l_wire.power.readOp.gate_leakage = self.deviceType.Vdd * \ + (4 * cmos_Ig_leakage(nsize, 0, 1, nmos)) self.delay += g_tp.sense_delay @@ -296,199 +336,244 @@ def low_swing_model(self): self.sense_amp.power.readOp.gate_leakage = 0 self.power.readOp.dynamic = temp_power + self.sense_amp.power.readOp.dynamic - self.power.readOp.leakage = self.transmitter.power.readOp.leakage + self.l_wire.power.readOp.leakage + self.sense_amp.power.readOp.leakage - self.power.readOp.gate_leakage = self.transmitter.power.readOp.gate_leakage + self.l_wire.power.readOp.gate_leakage + self.sense_amp.power.readOp.gate_leakage - + self.power.readOp.leakage = self.transmitter.power.readOp.leakage + \ + self.l_wire.power.readOp.leakage + \ + self.sense_amp.power.readOp.leakage + self.power.readOp.gate_leakage = self.transmitter.power.readOp.gate_leakage + \ + self.l_wire.power.readOp.gate_leakage + \ + self.sense_amp.power.readOp.gate_leakage + def sense_amp_input_cap(self): - return (drain_C_(g_tp.w_iso, PCH, 1, 1, g_tp.cell_h_def) + - gate_C(g_tp.w_sense_en + g_tp.w_sense_n, 0) + - drain_C_(g_tp.w_sense_n, NCH, 1, 1, g_tp.cell_h_def) + - drain_C_(g_tp.w_sense_p, PCH, 1, 1, g_tp.cell_h_def)) + return ( + drain_C_(g_tp.w_iso, PCH, 1, 1, g_tp.cell_h_def) + + gate_C(g_tp.w_sense_en + g_tp.w_sense_n, 0) + + drain_C_(g_tp.w_sense_n, NCH, 1, 1, g_tp.cell_h_def) + + drain_C_(g_tp.w_sense_p, PCH, 1, 1, g_tp.cell_h_def) + ) def delay_optimal_wire(self): - len = self.wire_length + len_ = self.wire_length beta = pmos_to_nmos_sz_ratio() switching = 0 short_ckt = 0 tc = 0 input_cap = gate_C(g_tp.min_w_nmos_ + self.min_w_pmos, 0) - out_cap = (drain_C_(self.min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + - drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def)) - out_res = (tr_R_on(g_tp.min_w_nmos_, NCH, 1) + - tr_R_on(self.min_w_pmos, PCH, 1)) / 2 - wr = self.wire_res(len) - wc = self.wire_cap(len) + out_cap = ( + drain_C_(self.min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + + drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) + ) + out_res = ( + tr_R_on(g_tp.min_w_nmos_, NCH, 1) + + tr_R_on(self.min_w_pmos, PCH, 1) + ) / 2 + wr = self.wire_res(len_) + wc = self.wire_cap(len_) repeater_scaling = sp.sqrt(out_res * wc / (wr * input_cap)) - self.repeater_spacing = sp.sqrt(2 * out_res * (out_cap + input_cap) / - ((wr / len) * (wc / len))) + self.repeater_spacing = sp.sqrt(2 * out_res * (out_cap + input_cap) / ((wr / len_) * (wc / len_))) self.repeater_size = repeater_scaling - switching = (repeater_scaling * (input_cap + out_cap) + - self.repeater_spacing * (wc / len)) * self.deviceType.Vdd * self.deviceType.Vdd - tc = (out_res * (input_cap + out_cap) + - out_res * wc / len * self.repeater_spacing / repeater_scaling + - wr / len * self.repeater_spacing * input_cap * repeater_scaling + - 0.5 * (wr / len) * (wc / len) * self.repeater_spacing * self.repeater_spacing) - self.delay = 0.693 * tc * len / self.repeater_spacing + + switching = ( + (repeater_scaling * (input_cap + out_cap) + + self.repeater_spacing * (wc / len_)) * + self.deviceType.Vdd * self.deviceType.Vdd + ) + + tc = ( + out_res * (input_cap + out_cap) + + out_res * wc / len_ * self.repeater_spacing / repeater_scaling + + wr / len_ * self.repeater_spacing * input_cap * repeater_scaling + + 0.5 * (wr / len_) * (wc / len_) * self.repeater_spacing * self.repeater_spacing + ) + + self.delay = 0.693 * tc * len_ / self.repeater_spacing + Ishort_ckt = 65e-6 - short_ckt = (self.deviceType.Vdd * g_tp.min_w_nmos_ * Ishort_ckt * 1.0986 * - repeater_scaling * tc) - self.area.set_area((len / self.repeater_spacing) * - compute_gate_area(INV, 1, self.min_w_pmos * repeater_scaling, - g_tp.min_w_nmos_ * repeater_scaling, g_tp.cell_h_def)) - self.power.readOp.dynamic = (len / self.repeater_spacing) * (switching + short_ckt) - self.power.readOp.leakage = ((len / self.repeater_spacing) * - self.deviceType.Vdd * - cmos_Isub_leakage(g_tp.min_w_nmos_ * repeater_scaling, - beta * g_tp.min_w_nmos_ * repeater_scaling, 1, inv)) - self.power.readOp.gate_leakage = ((len / self.repeater_spacing) * - self.deviceType.Vdd * - cmos_Ig_leakage(g_tp.min_w_nmos_ * repeater_scaling, - beta * g_tp.min_w_nmos_ * repeater_scaling, 1, inv)) + short_ckt = ( + self.deviceType.Vdd * g_tp.min_w_nmos_ * Ishort_ckt * 1.0986 * + repeater_scaling * tc + ) + + self.area.set_area( + (len_ / self.repeater_spacing) * + compute_gate_area( + INV, 1, self.min_w_pmos * repeater_scaling, + g_tp.min_w_nmos_ * repeater_scaling, g_tp.cell_h_def + ) + ) + + self.power.readOp.dynamic = (len_ / self.repeater_spacing) * (switching + short_ckt) + self.power.readOp.leakage = ( + (len_ / self.repeater_spacing) * + self.deviceType.Vdd * + cmos_Isub_leakage( + g_tp.min_w_nmos_ * repeater_scaling, beta * g_tp.min_w_nmos_ * repeater_scaling, 1, inv + ) + ) + self.power.readOp.gate_leakage = ( + (len_ / self.repeater_spacing) * + self.deviceType.Vdd * + cmos_Ig_leakage( + g_tp.min_w_nmos_ * repeater_scaling, beta * g_tp.min_w_nmos_ * repeater_scaling, 1, inv + ) + ) def init_wire(self): self.wire_length = 1 self.delay_optimal_wire() - sp = self.repeater_spacing * 1e6 - si = self.repeater_size + # sp = self.repeater_spacing * 1e6 # in microns + sp = int(g_ip.repeater_spacing) # CHANGE: ARRAY LOGIC + + # si = self.repeater_size + si = int(g_ip.repeater_size) # CHANGE: ARRAY LOGIC + # si = 85.6553 + + + # CHANGE: ARRAY LOGIC - cannot index with symbolic expression, so we have to use value self.repeated_wire.append(Component()) for j in range(int(sp), int(4 * sp), 100): for i in range(int(si), 1, -1): - pow = self.wire_model(j * 1e-6, i) - if j == sp and i == si: - self.global_comp.delay = self.delay - self.global_comp.power = pow - self.global_comp.area.h = si - self.global_comp.area.w = sp * 1e-6 - self.repeated_wire[-1].delay = self.delay - self.repeated_wire[-1].power.readOp = pow.readOp - self.repeated_wire[-1].area.w = j * 1e-6 + pow_, del_ = self.wire_model(j * 1e-6, i) + + if j == int(sp) and i == int(si): + Wire.global_.delay = del_ + Wire.global_.power = pow_ + Wire.global_.area.h = si + Wire.global_.area.w = sp * 1e-6 # m + + self.repeated_wire[-1].delay = del_ + self.repeated_wire[-1].power.readOp = pow_.readOp + self.repeated_wire[-1].area.w = j * 1e-6 # m self.repeated_wire[-1].area.h = i self.repeated_wire.append(Component()) + self.repeated_wire.pop() self.update_fullswing() - l_wire = Wire(Low_swing, 0.001, 1) - self.low_swing.delay = l_wire.delay - self.low_swing.power = l_wire.power + + # CHANGE: SET LOGIC - just set global_ to be spacing and size + Wire.global_.area.h = si + Wire.global_.area.w = sp * 1e-6 # m + + l_wire = Wire('Low_swing', 0.001, 1) + Wire.low_swing.delay = l_wire.delay + Wire.low_swing.power = l_wire.power del l_wire def update_fullswing(self): - del_values = [self.global_comp.delay * (1 + 0.3), self.global_comp.delay * (1 + 0.2), - self.global_comp.delay * (1 + 0.1), self.global_comp.delay * (1 + 0.05)] + deltas = [ + self.global_.delay + self.global_.delay * 0.05, + self.global_.delay + self.global_.delay * 0.1, + self.global_.delay + self.global_.delay * 0.2, + self.global_.delay + self.global_.delay * 0.3 + ] + i = 4 while i > 0: - threshold = del_values[i - 1] - cost = BIGNUM - for component in self.repeated_wire: - if component.delay > threshold: - self.repeated_wire.remove(component) - else: - ncost = (component.power.readOp.dynamic / self.global_comp.power.readOp.dynamic + - component.power.readOp.leakage / self.global_comp.power.readOp.leakage) - if ncost < cost: - cost = ncost - if i == 4: - self.global_30 = component - elif i == 3: - self.global_20 = component - elif i == 2: - self.global_10 = component - elif i == 1: - self.global_5 = component + threshold = deltas[i - 1] + cost = float('inf') + for citer in list(self.repeated_wire): + # CHANGE: RELATIONAL LOGIC + # if citer.delay > threshold: + # self.repeated_wire.remove(citer) + # else: + ncost = citer.power.readOp.dynamic / self.global_.power.readOp.dynamic + \ + citer.power.readOp.leakage / self.global_.power.readOp.leakage + + # CHANGE: RELATIONAL LOGIC + # if ncost < cost: + cost = ncost + if i == 4: + Wire.global_30.delay = citer.delay + Wire.global_30.power = citer.power + Wire.global_30.area = citer.area + elif i == 3: + Wire.global_20.delay = citer.delay + Wire.global_20.power = citer.power + Wire.global_20.area = citer.area + elif i == 2: + Wire.global_10.delay = citer.delay + Wire.global_10.power = citer.power + Wire.global_10.area = citer.area + elif i == 1: + Wire.global_5.delay = citer.delay + Wire.global_5.power = citer.power + Wire.global_5.area = citer.area i -= 1 def wire_model(self, space, size): - len = 1 + ptemp = PowerDef() + len_ = 1 beta = pmos_to_nmos_sz_ratio() switching = 0 short_ckt = 0 tc = 0 input_cap = gate_C(g_tp.min_w_nmos_ + self.min_w_pmos, 0) - out_cap = (drain_C_(self.min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + - drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def)) + out_cap = drain_C_(self.min_w_pmos, PCH, 1, 1, g_tp.cell_h_def) + \ + drain_C_(g_tp.min_w_nmos_, NCH, 1, 1, g_tp.cell_h_def) out_res = (tr_R_on(g_tp.min_w_nmos_, NCH, 1) + tr_R_on(self.min_w_pmos, PCH, 1)) / 2 - wr = self.wire_res(len) - wc = self.wire_cap(len) + wr = self.wire_res(len_) + wc = self.wire_cap(len_) + self.repeater_spacing = space self.repeater_size = size + switching = (self.repeater_size * (input_cap + out_cap) + - self.repeater_spacing * (wc / len)) * self.deviceType.Vdd * self.deviceType.Vdd - tc = (out_res * (input_cap + out_cap) + - out_res * wc / len * self.repeater_spacing / self.repeater_size + - wr / len * self.repeater_spacing * out_cap * self.repeater_size + - 0.5 * (wr / len) * (wc / len) * self.repeater_spacing * self.repeater_spacing) - self.delay = 0.693 * tc * len / self.repeater_spacing + self.repeater_spacing * (wc / len_)) * self.deviceType.Vdd * self.deviceType.Vdd + + tc = out_res * (input_cap + out_cap) + \ + out_res * wc / len_ * self.repeater_spacing / self.repeater_size + \ + wr / len_ * self.repeater_spacing * out_cap * self.repeater_size + \ + 0.5 * (wr / len_) * (wc / len_) * self.repeater_spacing * self.repeater_spacing + + delay = 0.693 * tc * len_ / self.repeater_spacing + Ishort_ckt = 65e-6 - short_ckt = (self.deviceType.Vdd * g_tp.min_w_nmos_ * Ishort_ckt * 1.0986 * - self.repeater_size * tc) - ptemp = PowerDef() - ptemp.readOp.dynamic = ((len / self.repeater_spacing) * (switching + short_ckt)) - ptemp.readOp.leakage = ((len / self.repeater_spacing) * - self.deviceType.Vdd * - cmos_Isub_leakage(g_tp.min_w_nmos_ * self.repeater_size, - beta * g_tp.min_w_nmos_ * self.repeater_size, 1, inv)) - ptemp.readOp.gate_leakage = ((len / self.repeater_spacing) * - self.deviceType.Vdd * - cmos_Ig_leakage(g_tp.min_w_nmos_ * self.repeater_size, - beta * g_tp.min_w_nmos_ * self.repeater_size, 1, inv)) - return ptemp + short_ckt = self.deviceType.Vdd * g_tp.min_w_nmos_ * Ishort_ckt * 1.0986 * \ + self.repeater_size * tc - def print_wire(self): - print("\nWire Properties:\n\n") - print(f" Delay Optimal\n\tRepeater size - {self.global_comp.area.h} " - f"\n\tRepeater spacing - {self.global_comp.area.w * 1e3} (mm)" - f"\n\tDelay - {self.global_comp.delay * 1e6} (ns/mm)" - f"\n\tPowerD - {self.global_comp.power.readOp.dynamic * 1e6} (nJ/mm)" - f"\n\tPowerL - {self.global_comp.power.readOp.leakage} (mW/mm)" - f"\n\tPowerLgate - {self.global_comp.power.readOp.gate_leakage} (mW/mm)\n") - print(f"\tWire width - {self.wire_width_init * 1e6} microns\n") - print(f"\tWire spacing - {self.wire_spacing_init * 1e6} microns\n") + ptemp.readOp.dynamic = (len_ / self.repeater_spacing) * (switching + short_ckt) + ptemp.readOp.leakage = (len_ / self.repeater_spacing) * \ + self.deviceType.Vdd * \ + cmos_Isub_leakage(g_tp.min_w_nmos_ * self.repeater_size, + beta * g_tp.min_w_nmos_ * self.repeater_size, 1, inv) - print(f" 5% Overhead\n\tRepeater size - {self.global_5.area.h} " - f"\n\tRepeater spacing - {self.global_5.area.w * 1e3} (mm)" - f"\n\tDelay - {self.global_5.delay * 1e6} (ns/mm)" - f"\n\tPowerD - {self.global_5.power.readOp.dynamic * 1e6} (nJ/mm)" - f"\n\tPowerL - {self.global_5.power.readOp.leakage} (mW/mm)" - f"\n\tPowerLgate - {self.global_5.power.readOp.gate_leakage} (mW/mm)\n") - print(f"\tWire width - {self.wire_width_init * 1e6} microns\n") - print(f"\tWire spacing - {self.wire_spacing_init * 1e6} microns\n") + ptemp.readOp.gate_leakage = (len_ / self.repeater_spacing) * \ + self.deviceType.Vdd * \ + cmos_Ig_leakage(g_tp.min_w_nmos_ * self.repeater_size, + beta * g_tp.min_w_nmos_ * self.repeater_size, 1, inv) - print(f" 10% Overhead\n\tRepeater size - {self.global_10.area.h} " - f"\n\tRepeater spacing - {self.global_10.area.w * 1e3} (mm)" - f"\n\tDelay - {self.global_10.delay * 1e6} (ns/mm)" - f"\n\tPowerD - {self.global_10.power.readOp.dynamic * 1e6} (nJ/mm)" - f"\n\tPowerL - {self.global_10.power.readOp.leakage} (mW/mm)" - f"\n\tPowerLgate - {self.global_10.power.readOp.gate_leakage} (mW/mm)\n") - print(f"\tWire width - {self.wire_width_init * 1e6} microns\n") - print(f"\tWire spacing - {self.wire_spacing_init * 1e6} microns\n") + return ptemp, delay - print(f" 20% Overhead\n\tRepeater size - {self.global_20.area.h} " - f"\n\tRepeater spacing - {self.global_20.area.w * 1e3} (mm)" - f"\n\tDelay - {self.global_20.delay * 1e6} (ns/mm)" - f"\n\tPowerD - {self.global_20.power.readOp.dynamic * 1e6} (nJ/mm)" - f"\n\tPowerL - {self.global_20.power.readOp.leakage} (mW/mm)" - f"\n\tPowerLgate - {self.global_20.power.readOp.gate_leakage} (mW/mm)\n") - print(f"\tWire width - {self.wire_width_init * 1e6} microns\n") + def print_wire(self): + print("\nWire Properties:\n") + print(f" Delay Optimal\n\tRepeater size - {self.global_.area.h}" + f" \n\tRepeater spacing - {self.global_.area.w * 1e3} (mm)" + f" \n\tDelay - {self.global_.delay * 1e6} (ns/mm)" + f" \n\tPowerD - {self.global_.power.readOp.dynamic * 1e6} (nJ/mm)" + f" \n\tPowerL - {self.global_.power.readOp.leakage} (mW/mm)" + f" \n\tPowerLgate - {self.global_.power.readOp.gate_leakage} (mW/mm)") + print(f"\tWire width - {self.wire_width_init * 1e6} microns") print(f"\tWire spacing - {self.wire_spacing_init * 1e6} microns\n") - print(f" 30% Overhead\n\tRepeater size - {self.global_30.area.h} " - f"\n\tRepeater spacing - {self.global_30.area.w * 1e3} (mm)" - f"\n\tDelay - {self.global_30.delay * 1e6} (ns/mm)" - f"\n\tPowerD - {self.global_30.power.readOp.dynamic * 1e6} (nJ/mm)" - f"\n\tPowerL - {self.global_30.power.readOp.leakage} (mW/mm)" - f"\n\tPowerLgate - {self.global_30.power.readOp.gate_leakage} (mW/mm)\n") - print(f"\tWire width - {self.wire_width_init * 1e6} microns\n") - print(f"\tWire spacing - {self.wire_spacing_init * 1e6} microns\n") + for overhead, global_comp in zip(["5%", "10%", "20%", "30%"], + [self.global_5, self.global_10, self.global_20, self.global_30]): + print(f" {overhead} Overhead\n\tRepeater size - {global_comp.area.h}" + f" \n\tRepeater spacing - {global_comp.area.w * 1e3} (mm)" + f" \n\tDelay - {global_comp.delay * 1e6} (ns/mm)" + f" \n\tPowerD - {global_comp.power.readOp.dynamic * 1e6} (nJ/mm)" + f" \n\tPowerL - {global_comp.power.readOp.leakage} (mW/mm)" + f" \n\tPowerLgate - {global_comp.power.readOp.gate_leakage} (mW/mm)") + print(f"\tWire width - {self.wire_width_init * 1e6} microns") + print(f"\tWire spacing - {self.wire_spacing_init * 1e6} microns\n") print(" Low-swing wire (1 mm) - Note: Unlike repeated wires, \n\tdelay and power " "values of low-swing wires do not\n\thave a linear relationship with length." f" \n\tdelay - {self.low_swing.delay * 1e9} (ns)" f" \n\tpowerD - {self.low_swing.power.readOp.dynamic * 1e9} (nJ)" f" \n\tPowerL - {self.low_swing.power.readOp.leakage} (mW)" - f" \n\tPowerLgate - {self.low_swing.power.readOp.gate_leakage} (mW)\n") - print(f"\tWire width - {self.wire_width_init * 2} microns\n") - print(f"\tWire spacing - {self.wire_spacing_init * 2} microns\n") - print() - - + f" \n\tPowerLgate - {self.low_swing.power.readOp.gate_leakage} (mW)") + print(f"\tWire width - {self.wire_width_init * 2} microns") + print(f"\tWire spacing - {self.wire_spacing_init * 2} microns\n\n") + def set_in_rise_time(self, rt): + self.in_rise_time = rt \ No newline at end of file diff --git a/cacti-main/causality_trace.md b/cacti-main/causality_trace.md new file mode 100644 index 0000000..d25a786 --- /dev/null +++ b/cacti-main/causality_trace.md @@ -0,0 +1,49 @@ +Causality Generated: +Flags: -verbal, -simple +[To obtain more detailed output, use the `-detailed` flag] + +Directory: cacti + +### Causality Trace for Cache Simulator + +#### **OVERVIEW:** +Changes energy calculations for cache simulation. + +#### **component.py:** +- Changing `total_diff_w` from relational to set value impacted **4** files downstream. +- **Changed files:** `mat.py`, `bank.py`, `uca.py`, `uca_org.py` +- **Details:** + - `mat.py` now produces a different result for energy, **3** vars are no longer used. + - `bank.py` calls from `mat.py`, **0** vars are no longer used. + - `uca.py` now produces a different result for energy, **2** vars are no longer used. + - `uca_org.py` now produces a different result for `cycle_time`, **2** vars are no longer used. + +#### **cache.py:** +- Modified `cache_size` calculation to account for new cache hierarchy structure. +- Change affected **5** files downstream. +- **Changed files:** memory.py, controller.py, lru.py, write_buffer.py, replacement_policy.py +- **Details:** + - `memory.py` now produces a different result for latency, **4** vars are no longer used. + - `controller.py` calls `cache_size` from `cache.py`, **1** var is no longer used. + - `lru.py` now calculates a different hit rate, **2** vars are no longer used. + - `write_buffer.py` depends on `cache_size`, **3** vars are no longer used. + - `replacement_policy.py` now adjusts eviction strategies, **1** var is no longer used. + +#### **latency.py:** +- Revised the `latency_model` to use a more accurate timing function. +- Change affected **3** files downstream. +- **Changed files:** read_path.py, write_path.py, hit_rate.py +- **Details:** + - `read_path.py` now produces a different read time, **1** var is no longer used. + - `write_path.py` now produces a different write time, **0** vars are no longer used. + - `hit_rate.py` now computes a different average hit rate, **2** vars are no longer used. + +#### **config.py:** +- Updated configuration parsing to include new cache policies. +- Change affected **2** files downstream. +- **Changed files:** cache.py, controller.py +- **Details:** + - `cache.py` now utilizes new configuration parameters, **3** vars are no longer used. + - `controller.py` now reads updated policies, **0** vars are no longer used. + +[To obtain more detailed output, use the `-detailed` flag] diff --git a/cacti-main/component.cc b/cacti-main/component.cc index e06f99f..ea48659 100644 --- a/cacti-main/component.cc +++ b/cacti-main/component.cc @@ -212,8 +212,6 @@ int Component::logical_effort( w_n[i] = MAX(w_n[i], g_tp.min_w_nmos_); w_p[i] = p_to_n_sz_ratio * w_n[i]; - cout << "The number of gates before is: " << num_gates << endl; - if (w_n[i] > max_w_nmos) // && !g_ip->is_3d_mem) { double C_ld = gate_C((1 + p_to_n_sz_ratio) * max_w_nmos, 0, is_dram_, false, is_wl_tr_); @@ -233,8 +231,6 @@ int Component::logical_effort( w_p[i] = p_to_n_sz_ratio * w_n[i]; } - cout << "The number of gates after is: " << num_gates << endl; - assert(num_gates <= MAX_NUMBER_GATES_STAGE); return num_gates; } diff --git a/cacti-main/io.cc b/cacti-main/io.cc index bcfeac4..fadb0ea 100644 --- a/cacti-main/io.cc +++ b/cacti-main/io.cc @@ -2600,6 +2600,11 @@ void output_data_csv(const uca_org_t & fin_res, string fn) file << "IO power termination and bias, "; // MODIFIED end + // MODIFIED to output wire repeater metrics + file << "Repeater spacing, "; + file << "Repeater size "; + // MODIFIED end + // file << "Resistance per unit micron (ohm-micron), "; // file << "Capacitance per unit micron (fF per micron), "; // file << "Unit-length wire delay (ps), "; @@ -2721,6 +2726,11 @@ void output_data_csv(const uca_org_t & fin_res, string fn) file << testextio.extio_power_term() << ", "; // MODIFIED end + // MODIFIED to output wire repeater metrics + file << g_ip->repeater_spacing << ", "; + file << g_ip->repeater_size; + // MODIFIED end + // file << g_tp.wire_inside_mat.R_per_um << ", "; // file << g_tp.wire_inside_mat.C_per_um / 1e-15 << ", "; // file << g_tp.unit_len_wire_del / 1e-12 << ", "; diff --git a/cacti-main/mem_cache.cfg b/cacti-main/mem_cache.cfg index 8a846ea..ca273ee 100644 --- a/cacti-main/mem_cache.cfg +++ b/cacti-main/mem_cache.cfg @@ -23,7 +23,7 @@ # Multiple banks connected using a bus -UCA bank count 1 --technology (u) 0.090 +-technology (u) 0.09 # following three parameters are meaningful only for main memories -page size (bits) 8192 diff --git a/cacti-main/validate_mem_cache.cfg b/cacti-main/mem_validate_cache.cfg similarity index 98% rename from cacti-main/validate_mem_cache.cfg rename to cacti-main/mem_validate_cache.cfg index 1415108..ca273ee 100644 --- a/cacti-main/validate_mem_cache.cfg +++ b/cacti-main/mem_validate_cache.cfg @@ -1,5 +1,5 @@ # Cache size --size (bytes) 128 +-size (bytes) 131072 # power gating -Array Power Gating - "false" @@ -23,7 +23,7 @@ # Multiple banks connected using a bus -UCA bank count 1 --technology (u) 0.032 +-technology (u) 0.09 # following three parameters are meaningful only for main memories -page size (bits) 8192 @@ -43,7 +43,7 @@ -Tag array peripheral type - "itrs-lop" # Bus width include data bits and address bits required by the decoder --output/input bus width 128 +-output/input bus width 64 # 300-400 in steps of 10 -operating temperature (K) 360 @@ -138,7 +138,7 @@ # Number of Physical Ranks -num_mem_dq 2 # Width of the Memory Data Bus --mem_data_width 128 +-mem_data_width 64 # RTT Termination Resistance -rtt_value 10000 # RON Termination Resistance diff --git a/cacti-main/nuca.cc b/cacti-main/nuca.cc index 02e44a1..5dd0f39 100644 --- a/cacti-main/nuca.cc +++ b/cacti-main/nuca.cc @@ -414,7 +414,7 @@ Nuca::sim_nuca() { delete router_s[i]; } - g_ip->display_ip(); + // g_ip->display_ip(); // g_ip->force_cache_config = true; // g_ip->ndwl = 8; // g_ip->ndbl = 16; diff --git a/cacti-main/validate_cache_2.cfg b/cacti-main/validate_cache_2.cfg deleted file mode 100644 index ebfcca1..0000000 --- a/cacti-main/validate_cache_2.cfg +++ /dev/null @@ -1,167 +0,0 @@ -# Cache size --size (bytes) 131072 - -# power gating --Array Power Gating - "false" --WL Power Gating - "false" --CL Power Gating - "false" --Bitline floating - "false" --Interconnect Power Gating - "false" --Power Gating Performance Loss "0.01" - -# Line size --block size (bytes) 64 - -# To model Fully Associative cache, set associativity to zero --associativity 0 - --read-write port 1 --exclusive read port 0 --exclusive write port 0 --single ended read ports 0 --search port 1 - -# Multiple banks connected using a bus --UCA bank count 1 --technology (u) 0.032 - -# following three parameters are meaningful only for main memories --page size (bits) 8192 --burst length 8 --internal prefetch width 8 - -# following parameter can have one of five values --Data array cell type - "itrs-lop" - -# following parameter can have one of three values --Data array peripheral type - "itrs-lop" - -# following parameter can have one of five values --Tag array cell type - "itrs-lop" - -# following parameter can have one of three values --Tag array peripheral type - "itrs-lop" - -# Bus width include data bits and address bits required by the decoder --output/input bus width 256 - -# 300-400 in steps of 10 --operating temperature (K) 360 - -# Type of memory --cache type "cache" - -# to model special structure like branch target buffers, directory, etc. -# change the tag size parameter -# if you want cacti to calculate the tagbits, set the tag size to "default" --tag size (b) "default" - -# fast - data and tag access happen in parallel -# sequential - data array is accessed after accessing the tag array -# normal - data array lookup and tag access happen in parallel -# final data block is broadcasted in data array h-tree -# after getting the signal from the tag array --access mode (normal, sequential, fast) - "normal" - -# DESIGN OBJECTIVE for UCA (or banks in NUCA) --design objective (weight delay, dynamic power, leakage power, cycle time, area) 0:0:0:100:0 - -# Percentage deviation from the minimum value --deviate (delay, dynamic power, leakage power, cycle time, area) 20:100000:100000:100000:100000 - -# Objective for NUCA --NUCAdesign objective (weight delay, dynamic power, leakage power, cycle time, area) 100:100:0:0:100 --NUCAdeviate (delay, dynamic power, leakage power, cycle time, area) 10:10000:10000:10000:10000 - -# Set optimize tag to ED or ED^2 to obtain a cache configuration optimized for -# energy-delay or energy-delay sq. product -# Note: Optimize tag will disable weight or deviate values mentioned above -# Set it to NONE to let weight and deviate values determine the -# appropriate cache configuration --Optimize ED or ED^2 (ED, ED^2, NONE): "ED^2" --Cache model (NUCA, UCA) - "UCA" - -# In order for CACTI to find the optimal NUCA bank value the following -# variable should be assigned 0. --NUCA bank count 0 - -# Wire signaling --Wire signaling (fullswing, lowswing, default) - "Global_30" --Wire inside mat - "semi-global" --Wire outside mat - "semi-global" --Interconnect projection - "conservative" - -# Contention in network --Core count 8 --Cache level (L2/L3) - "L3" --Add ECC - "true" --Print level (DETAILED, CONCISE) - "DETAILED" - -# for debugging --Print input parameters - "true" -# force CACTI to model the cache with the -# following Ndbl, Ndwl, Nspd, Ndsam, -# and Ndcm values --Force cache config - "false" --Ndwl 2 --Ndbl 2 --Nspd 2 --Ndcm 1 --Ndsam1 1 --Ndsam2 2 - -#### Default CONFIGURATION values for baseline external IO parameters to DRAM. More details can be found in the CACTI-IO technical report (), especially Chapters 2 and 3. -# Memory Type --dram_type "DDR3" -# Memory State --io state "WRITE" -# Address bus timing --addr_timing 1.0 -# Memory Density --mem_density 4 Gb -# IO frequency --bus_freq 800 MHz -# Duty Cycle --duty_cycle 1.0 -# Activity factor for Data --activity_dq 1.0 -# Activity factor for Control/Address --activity_ca 0.5 -# Number of DQ pins --num_dq 72 -# Number of DQS pins --num_dqs 18 -# Number of CA pins --num_ca 25 -# Number of CLK pins --num_clk 2 -# Number of Physical Ranks --num_mem_dq 2 -# Width of the Memory Data Bus --mem_data_width 8 -# RTT Termination Resistance --rtt_value 10000 -# RON Termination Resistance --ron_value 34 -# Time of flight for DQ -# tflight_value -# Parameter related to MemCAD -# Number of BoBs --num_bobs 1 -# Memory System Capacity in GB --capacity 80 -# Number of Channel per BoB --num_channels_per_bob 1 -# First Metric for ordering different design points --first metric "Cost" -# Second Metric for ordering different design points --second metric "Bandwidth" -# Third Metric for ordering different design points --third metric "Energy" -# Possible DIMM option to consider --DIMM model "ALL" -# If channels of each bob have the same configurations --mirror_in_bob "F" -# if we want to see all channels/bobs/memory configurations explored -# -verbose "T" -# -verbose "F" diff --git a/cacti-main/validate_cache.cfg b/cacti-main/validate_mem_energy_cache.cfg similarity index 97% rename from cacti-main/validate_cache.cfg rename to cacti-main/validate_mem_energy_cache.cfg index db2205e..ca273ee 100644 --- a/cacti-main/validate_cache.cfg +++ b/cacti-main/validate_mem_energy_cache.cfg @@ -13,13 +13,13 @@ -block size (bytes) 64 # To model Fully Associative cache, set associativity to zero --associativity 0 +-associativity 2 -read-write port 1 -exclusive read port 0 -exclusive write port 0 -single ended read ports 0 --search port 1 +-search port 0 # Multiple banks connected using a bus -UCA bank count 1 @@ -43,13 +43,13 @@ -Tag array peripheral type - "itrs-lop" # Bus width include data bits and address bits required by the decoder --output/input bus width 256 +-output/input bus width 64 # 300-400 in steps of 10 -operating temperature (K) 360 # Type of memory --cache type "cache" +-cache type "main memory" # to model special structure like branch target buffers, directory, etc. # change the tag size parameter @@ -138,7 +138,7 @@ # Number of Physical Ranks -num_mem_dq 2 # Width of the Memory Data Bus --mem_data_width 8 +-mem_data_width 64 # RTT Termination Resistance -rtt_value 10000 # RON Termination Resistance diff --git a/cacti-main/wire.cc b/cacti-main/wire.cc index 55a08ae..b986586 100644 --- a/cacti-main/wire.cc +++ b/cacti-main/wire.cc @@ -31,6 +31,8 @@ #include "wire.h" #include "cmath" +#include "parameter.h" + // use this constructor to calculate wire stats Wire::Wire( enum Wire_type wire_model, @@ -58,6 +60,7 @@ Wire::Wire( wire_length *= 1e6; wire_width *= 1e6; wire_spacing *= 1e6; + assert(wire_length > 0); assert(power.readOp.dynamic > 0); assert(power.readOp.leakage > 0); @@ -616,6 +619,14 @@ Wire::init_wire(){ sp = repeater_spacing; sp *= 1e6; // in microns + cout << "BOOGA Repeater Spacing: " << sp << endl; + cout << "BOOGA Repeater Size: " << si << endl; + + // MODIFIED keep track of repeater metrics + g_ip->repeater_spacing = sp; + g_ip->repeater_size = si; + // end MODIFIED + double i, j, del; repeated_wire.push_back(Component()); for (j=sp; j < 4*sp; j+=100) {