From df8f8c1fbcfc1e143eba664ef80546eb392981f1 Mon Sep 17 00:00:00 2001 From: Lorenzo Leone Date: Mon, 17 Nov 2025 19:07:02 +0100 Subject: [PATCH 1/2] hw: Add possibility of removing CDC cut between SoC and Cluster --- hw/chimera_cluster_adapter.sv | 153 +++++++++++++++++------------- hw/clusters/chimera_cluster.sv | 4 +- target/sim/src/vip_chimera_soc.sv | 2 +- 3 files changed, 92 insertions(+), 67 deletions(-) diff --git a/hw/chimera_cluster_adapter.sv b/hw/chimera_cluster_adapter.sv index 7e8c97c..af293d3 100644 --- a/hw/chimera_cluster_adapter.sv +++ b/hw/chimera_cluster_adapter.sv @@ -13,6 +13,8 @@ module chimera_cluster_adapter #( parameter int WidePassThroughRegionStart = '0, // End address of Memory Island parameter int WidePassThroughRegionEnd = '0, + // Add AXI CDC between the cluster and SoC, + parameter bit EnAxiCdc = 1'b0, parameter type narrow_in_req_t = logic, parameter type narrow_in_resp_t = logic, @@ -330,71 +332,92 @@ module chimera_cluster_adapter #( .mst_resp_i(axi_from_cluster_wide_resp) ); - // AXI Narrow CDC from SoC to Cluster - - axi_cdc #( - .aw_chan_t (axi_narrow_soc_in_aw_chan_t), - .w_chan_t (axi_narrow_soc_in_w_chan_t), - .b_chan_t (axi_narrow_soc_in_b_chan_t), - .ar_chan_t (axi_narrow_soc_in_ar_chan_t), - .r_chan_t (axi_narrow_soc_in_r_chan_t), - .axi_req_t (narrow_in_req_t), - .axi_resp_t(narrow_in_resp_t) - ) narrow_slv_cdc ( - .src_clk_i (soc_clk_i), - .src_rst_ni(rst_ni), - .src_req_i (narrow_in_req_i), - .src_resp_o(narrow_in_resp_o), - - .dst_clk_i (clu_clk_i), - .dst_rst_ni(rst_ni), - .dst_req_o (axi_to_cluster_narrow_req), - .dst_resp_i(axi_to_cluster_narrow_resp) - ); - - // AXI Narrow CDC from Cluster to SoC - - axi_cdc #( - .aw_chan_t (axi_narrow_soc_out_aw_chan_t), - .w_chan_t (axi_narrow_soc_out_w_chan_t), - .b_chan_t (axi_narrow_soc_out_b_chan_t), - .ar_chan_t (axi_narrow_soc_out_ar_chan_t), - .r_chan_t (axi_narrow_soc_out_r_chan_t), - .axi_req_t (narrow_out_req_t), - .axi_resp_t(narrow_out_resp_t) - ) narrow_mst_cdc ( - .src_clk_i (clu_clk_i), - .src_rst_ni(rst_ni), - .src_req_i (axi_from_cluster_narrow_req), - .src_resp_o(axi_from_cluster_narrow_resp), - - .dst_clk_i (soc_clk_i), - .dst_rst_ni(rst_ni), - .dst_req_o (narrow_out_req_o[0]), - .dst_resp_i(narrow_out_resp_i[0]) - ); - - // AXI Wide CDC from Cluster to SoC - - axi_cdc #( - .aw_chan_t (axi_wide_clu_out_aw_chan_t), - .w_chan_t (axi_wide_clu_out_w_chan_t), - .b_chan_t (axi_wide_clu_out_b_chan_t), - .ar_chan_t (axi_wide_clu_out_ar_chan_t), - .r_chan_t (axi_wide_clu_out_r_chan_t), - .axi_req_t (wide_out_req_t), - .axi_resp_t(wide_out_resp_t) - ) wide_mst_cdc ( - .src_clk_i (clu_clk_i), - .src_rst_ni(rst_ni), - .src_req_i (axi_from_cluster_wide_req), - .src_resp_o(axi_from_cluster_wide_resp), - - .dst_clk_i (soc_clk_i), - .dst_rst_ni(rst_ni), - .dst_req_o (axi_from_cluster_wide_premux_req), - .dst_resp_i(axi_from_cluster_wide_premux_resp) - ); + // Choose carefully if you need or not AXI CDC. + // If so, check the FIFO_DEPTH and SYNC_STAGES parameters in axi_cdc. + // They have to be set consciously not to limit the bandwidth. + if (EnAxiCdc) begin : gen_axi_cdcs + // AXI Narrow CDC from SoC to Cluster + axi_cdc #( + .aw_chan_t (axi_narrow_soc_in_aw_chan_t), + .w_chan_t (axi_narrow_soc_in_w_chan_t), + .b_chan_t (axi_narrow_soc_in_b_chan_t), + .ar_chan_t (axi_narrow_soc_in_ar_chan_t), + .r_chan_t (axi_narrow_soc_in_r_chan_t), + .axi_req_t (narrow_in_req_t), + .axi_resp_t(narrow_in_resp_t), + .LogDepth (3), + .SyncStages(2) + ) narrow_slv_cdc ( + .src_clk_i (soc_clk_i), + .src_rst_ni(rst_ni), + .src_req_i (narrow_in_req_i), + .src_resp_o(narrow_in_resp_o), + + .dst_clk_i (clu_clk_i), + .dst_rst_ni(rst_ni), + .dst_req_o (axi_to_cluster_narrow_req), + .dst_resp_i(axi_to_cluster_narrow_resp) + ); + + // AXI Narrow CDC from Cluster to SoC + + axi_cdc #( + .aw_chan_t (axi_narrow_soc_out_aw_chan_t), + .w_chan_t (axi_narrow_soc_out_w_chan_t), + .b_chan_t (axi_narrow_soc_out_b_chan_t), + .ar_chan_t (axi_narrow_soc_out_ar_chan_t), + .r_chan_t (axi_narrow_soc_out_r_chan_t), + .axi_req_t (narrow_out_req_t), + .axi_resp_t(narrow_out_resp_t), + .LogDepth (3), + .SyncStages(2) + ) narrow_mst_cdc ( + .src_clk_i (clu_clk_i), + .src_rst_ni(rst_ni), + .src_req_i (axi_from_cluster_narrow_req), + .src_resp_o(axi_from_cluster_narrow_resp), + + .dst_clk_i (soc_clk_i), + .dst_rst_ni(rst_ni), + .dst_req_o (narrow_out_req_o[0]), + .dst_resp_i(narrow_out_resp_i[0]) + ); + + // AXI Wide CDC from Cluster to SoC + + axi_cdc #( + .aw_chan_t (axi_wide_clu_out_aw_chan_t), + .w_chan_t (axi_wide_clu_out_w_chan_t), + .b_chan_t (axi_wide_clu_out_b_chan_t), + .ar_chan_t (axi_wide_clu_out_ar_chan_t), + .r_chan_t (axi_wide_clu_out_r_chan_t), + .axi_req_t (wide_out_req_t), + .axi_resp_t(wide_out_resp_t), + .LogDepth (3), + .SyncStages(2) + ) wide_mst_cdc ( + .src_clk_i (clu_clk_i), + .src_rst_ni(rst_ni), + .src_req_i (axi_from_cluster_wide_req), + .src_resp_o(axi_from_cluster_wide_resp), + + .dst_clk_i (soc_clk_i), + .dst_rst_ni(rst_ni), + .dst_req_o (axi_from_cluster_wide_premux_req), + .dst_resp_i(axi_from_cluster_wide_premux_resp) + ); + + end else begin : gen_no_axi_cdcs + // Direct connections if no CDC is needed + assign narrow_in_resp_o = axi_to_cluster_narrow_resp; + assign axi_to_cluster_narrow_req = narrow_in_req_i; + + assign narrow_out_req_o[0] = axi_from_cluster_narrow_req; + assign axi_from_cluster_narrow_resp = narrow_out_resp_i[0]; + + assign axi_from_cluster_wide_premux_req = axi_from_cluster_wide_req; + assign axi_from_cluster_wide_resp = axi_from_cluster_wide_premux_resp; + end // Validate parameters `ifndef VERILATOR diff --git a/hw/clusters/chimera_cluster.sv b/hw/clusters/chimera_cluster.sv index ba45bc2..951e672 100644 --- a/hw/clusters/chimera_cluster.sv +++ b/hw/clusters/chimera_cluster.sv @@ -179,7 +179,9 @@ module chimera_cluster .wide_out_resp_t(wide_out_resp_t), .clu_wide_out_req_t (axi_cluster_out_wide_req_t), - .clu_wide_out_resp_t(axi_cluster_out_wide_resp_t) + .clu_wide_out_resp_t(axi_cluster_out_wide_resp_t), + // Make sure the SoC and Clusters in the TB run at the same frequency if CDCs are disabled + .EnAxiCdc (1'b0) ) i_cluster_axi_adapter ( .soc_clk_i(soc_clk_i), diff --git a/target/sim/src/vip_chimera_soc.sv b/target/sim/src/vip_chimera_soc.sv index 2d64ae7..7ae9b66 100644 --- a/target/sim/src/vip_chimera_soc.sv +++ b/target/sim/src/vip_chimera_soc.sv @@ -21,7 +21,7 @@ module vip_chimera_soc parameter type axi_ext_mst_rsp_t = logic, // Timing parameter time ClkPeriodClu = 2ns, - parameter time ClkPeriodSys = 5ns, + parameter time ClkPeriodSys = 2ns, parameter time ClkPeriodJtag = 20ns, parameter time ClkPeriodRtc = 30518ns, parameter int unsigned RstCycles = 5, From 55fb79223e836c1583fa01df5e12f1a122c8b01a Mon Sep 17 00:00:00 2001 From: Lorenzo Leone Date: Thu, 20 Nov 2025 14:29:13 +0100 Subject: [PATCH 2/2] hw: Push clk gating into the cluster domain --- Makefile | 6 +++--- hw/chimera_clu_domain.sv | 7 +++++-- hw/chimera_cluster_adapter.sv | 1 + hw/chimera_top_wrapper.sv | 21 ++++++++------------- hw/clusters/chimera_cluster.sv | 16 +++++++++++++--- 5 files changed, 30 insertions(+), 21 deletions(-) diff --git a/Makefile b/Makefile index 42656e5..e797b76 100644 --- a/Makefile +++ b/Makefile @@ -62,11 +62,11 @@ python-venv: .venv # Documentation # ################# -.PHONY: help +.PHONY: help h Black=\033[0m Green=\033[1;32m -help: ## Show an overview of all Makefile targets. +help h: ## Show an overview of all Makefile targets. @echo -e "Makefile ${Green}targets${Black} for chimera" @echo -e "Use 'make ' where is one of:" @echo -e "" @@ -75,4 +75,4 @@ help: ## Show an overview of all Makefile targets. /^##@/ { section = substr($$0, 5); printf "\033[1m%s:\033[0m\n", section; next } \ /^[a-zA-Z0-9._-]+:.*##/ { \ printf " " green "%-20s" black " %s\n", $$1, $$2 \ - }' $(MAKEFILE_LIST) \ No newline at end of file + }' $(MAKEFILE_LIST) diff --git a/hw/chimera_clu_domain.sv b/hw/chimera_clu_domain.sv index 10d1b82..2a00270 100644 --- a/hw/chimera_clu_domain.sv +++ b/hw/chimera_clu_domain.sv @@ -25,8 +25,10 @@ module chimera_clu_domain parameter type wide_out_resp_t = logic ) ( input logic soc_clk_i, - input logic [ ExtClusters-1:0] clu_clk_i, + input logic clu_clk_i, input logic [ ExtClusters-1:0] rst_ni, + // Signal to enable or disable the cluster clock signal + input logic [ ExtClusters-1:0] clu_clk_en_i, input logic [ ExtClusters-1:0] widemem_bypass_i, input logic [ 31:0] boot_addr_i, //----------------------------- @@ -186,8 +188,9 @@ module chimera_clu_domain .wide_out_resp_t (wide_out_resp_t) ) i_chimera_cluster ( .soc_clk_i(soc_clk_i), - .clu_clk_i(clu_clk_i[extClusterIdx]), + .clu_clk_i(clu_clk_i), .rst_ni(rst_ni[extClusterIdx]), + .clu_clk_en_i(clu_clk_en_i[extClusterIdx]), .widemem_bypass_i(widemem_bypass_i[extClusterIdx]), .debug_req_i(debug_req_i[`PREVNRCORES(extClusterIdx)+:`NRCORES(extClusterIdx)]), .meip_i(xeip_i[`PREVNRCORES(extClusterIdx)+:`NRCORES(extClusterIdx)]), diff --git a/hw/chimera_cluster_adapter.sv b/hw/chimera_cluster_adapter.sv index af293d3..7696c63 100644 --- a/hw/chimera_cluster_adapter.sv +++ b/hw/chimera_cluster_adapter.sv @@ -14,6 +14,7 @@ module chimera_cluster_adapter #( // End address of Memory Island parameter int WidePassThroughRegionEnd = '0, // Add AXI CDC between the cluster and SoC, + // When this parameter is disabled, you must ensure clu_clk_i = soc_clk_i parameter bit EnAxiCdc = 1'b0, parameter type narrow_in_req_t = logic, diff --git a/hw/chimera_top_wrapper.sv b/hw/chimera_top_wrapper.sv index a534bdb..d46a845 100644 --- a/hw/chimera_top_wrapper.sv +++ b/hw/chimera_top_wrapper.sv @@ -133,9 +133,6 @@ module chimera_top_wrapper .axi_ext_llc_rsp_t(axi_mst_rsp_t), .axi_ext_mst_req_t(axi_mst_req_t), .axi_ext_mst_rsp_t(axi_mst_rsp_t), - // lleone: TODO: remove from here - // .axi_ext_wide_mst_req_t(axi_wide_mst_req_t), - // .axi_ext_wide_mst_rsp_t(axi_wide_mst_rsp_t), .axi_ext_slv_req_t(axi_slv_req_t), .axi_ext_slv_rsp_t(axi_slv_rsp_t), .reg_ext_req_t (reg_req_t), @@ -315,7 +312,11 @@ module chimera_top_wrapper }; logic [ExtClusters-1:0] cluster_clock_gate_en; - logic [ExtClusters-1:0] clu_clk_gated; + // This is the enable clk gate, i.e. + // - enable = 1 -> clock is gated (off) + // - enable = 0 -> clock is running (on) + // It will be used to drive the actual clk eneable signal ine ach cluster. + // For this reason it's inverted when connected to the cluster. assign cluster_clock_gate_en = { reg2hw.cluster_4_clk_gate_en, reg2hw.cluster_3_clk_gate_en, @@ -324,14 +325,6 @@ module chimera_top_wrapper reg2hw.cluster_0_clk_gate_en }; - for (genvar extClusterIdx = 0; extClusterIdx < ExtClusters; extClusterIdx++) begin : gen_clk_gates - tc_clk_gating i_cluster_clk_gate ( - .clk_i (clu_clk_i), - .en_i (~cluster_clock_gate_en[extClusterIdx]), - .test_en_i(1'b0), - .clk_o (clu_clk_gated[extClusterIdx]) - ); - end logic [ExtClusters-1:0] cluster_rst_n; logic [ExtClusters-1:0] cluster_soft_rst_n; @@ -343,6 +336,7 @@ module chimera_top_wrapper ~reg2hw.reset_cluster_0.q }; + // TODO: Double check that teh logic AND is sufficient! // The Rst used for each cluster is the AND gate among all different source of rst in the system that are: // - rst_ni: Global asynchronous reset coming from the PAD // - cluster_soft_rst_n: Software synchronous rst coming from the SoC configuration registers @@ -363,8 +357,9 @@ module chimera_top_wrapper .wide_out_resp_t (axi_wide_mst_rsp_t) ) i_cluster_domain ( .soc_clk_i (soc_clk_i), - .clu_clk_i (clu_clk_gated), + .clu_clk_i (clu_clk_i), .rst_ni (cluster_rst_n), + .clu_clk_en_i (~cluster_clock_gate_en), .widemem_bypass_i (wide_mem_bypass_mode), .boot_addr_i (reg2hw.snitch_configurable_boot_addr.q), .debug_req_i (dbg_ext_req), diff --git a/hw/clusters/chimera_cluster.sv b/hw/clusters/chimera_cluster.sv index 951e672..5e3a51a 100644 --- a/hw/clusters/chimera_cluster.sv +++ b/hw/clusters/chimera_cluster.sv @@ -22,6 +22,7 @@ module chimera_cluster input logic soc_clk_i, input logic clu_clk_i, input logic rst_ni, + input logic clu_clk_en_i, input logic widemem_bypass_i, //----------------------------- // Interrupt ports @@ -117,6 +118,15 @@ module chimera_cluster axi_cluster_out_wide_req_t clu_axi_wide_mst_req; axi_cluster_out_wide_resp_t clu_axi_wide_mst_resp; + // Cluster clk signal after the clk gating cell + logic clu_clk_gated; + + tc_clk_gating i_cluster_clk_gate ( + .clk_i (clu_clk_i), + .en_i (clu_clk_en_i), + .test_en_i(1'b0), + .clk_o (clu_clk_gated) + ); if (ClusterDataWidth != Cfg.ChsCfg.AxiDataWidth) begin : gen_narrow_adapter @@ -180,12 +190,12 @@ module chimera_cluster .clu_wide_out_req_t (axi_cluster_out_wide_req_t), .clu_wide_out_resp_t(axi_cluster_out_wide_resp_t), - // Make sure the SoC and Clusters in the TB run at the same frequency if CDCs are disabled + // Make sure the SoC and Clusters run at the same frequency if CDCs are disabled .EnAxiCdc (1'b0) ) i_cluster_axi_adapter ( .soc_clk_i(soc_clk_i), - .clu_clk_i(clu_clk_i), + .clu_clk_i(clu_clk_gated), .rst_ni, .narrow_in_req_i (clu_axi_narrow_slv_req), @@ -273,7 +283,7 @@ module chimera_cluster .RegisterExtNarrow('0) ) i_test_cluster ( - .clk_i (clu_clk_i), + .clk_i (clu_clk_gated), .clk_d2_bypass_i('0), .rst_ni,