diff --git a/fluid/services.py b/fluid/services.py
index 259e2ec..abded8d 100644
--- a/fluid/services.py
+++ b/fluid/services.py
@@ -28,6 +28,7 @@ def __init__(self, name):
self.add_control("parser_meta_csr_readdata", 32, Direction.OUTPUT)
self.add_control("stats_incomp_out_meta", 32, Direction.OUTPUT)
self.add_control("stats_parser_out_meta", 32, Direction.OUTPUT)
+ self.add_control("stats_parser_out_bytes", 64, Direction.OUTPUT)
self.add_control("stats_ft_in_meta", 32, Direction.OUTPUT)
self.add_control("stats_ft_out_meta", 32, Direction.OUTPUT)
self.add_control("stats_emptylist_in", 32, Direction.OUTPUT)
@@ -39,6 +40,7 @@ def __init__(self, name):
self.add_control("stats_dm_in_check_meta", 32, Direction.OUTPUT)
self.add_control("stats_dm_in_ooo_meta", 32, Direction.OUTPUT)
self.add_control("stats_dm_in_forward_ooo_meta", 32, Direction.OUTPUT)
+ self.add_control("stats_dm_out_bytes", 64, Direction.OUTPUT)
self.add_control("stats_nopayload_pkt", 32, Direction.OUTPUT)
self.add_control("stats_dm_check_pkt", 32, Direction.OUTPUT)
diff --git a/pigasus.py b/pigasus.py
index 3bc3af1..ad4fb41 100644
--- a/pigasus.py
+++ b/pigasus.py
@@ -137,22 +137,34 @@ def struct_s():
`ifndef STRUCT_S
`define STRUCT_S
-//`define SIM
+// `define SIM
`define USE_BRAM
`define BRAM_CHECKPKT_BUF
`define NO_C2F
-//`define DISABLE_NF_BYPASS
+// `define DISABLE_NF_BYPASS
// `define NO_BP
+// `define ENABLE_SURGEPROTECTOR
+
+// Reassembler scheduling policy
+`ifdef ENABLE_SURGEPROTECTOR""")
+ SCHEDULER_REASSEMBLY_POLICY = Param("WSJF")
+ T("`else")
+ SCHEDULER_REASSEMBLY_POLICY = Param("FCFS")
+ T("""`endif
// Packet buffer
// STORE 1024 pkts, each pkt takes 32 * 512 bits = 2 KB.
// 32 * 1024 = 32768 entries.
-`ifdef USE_BRAM""")
+`ifdef USE_BRAM
+`ifdef PKT_NUM
+parameter PKT_NUM = `PKT_NUM;
+`else""")
PKT_NUM = Param(PKT_NUM_BRAM_V)
- T("`else")
+ T("""`endif
+`else""")
PKT_NUM = Param(PKT_NUM_V)
- T("`endif")
-
+ T("""`endif""")
+
PKTBUF_AWIDTH = Param(clog2(PKT_NUM) + 5)
PKTBUF_DEPTH = Param(32 * PKT_NUM)
PKT_AWIDTH = Param(clog2(PKT_NUM))
@@ -181,7 +193,7 @@ def struct_s():
ETH_IP = Param(ETH_IP_V)
ETH_META = Param(ETH_META_V)
ETH_USR = Param(ETH_USR_V)
-
+
PROT_ETH = Param(PROT_ETH_V)
IP_V4 = Param(IP_V4_V)
PROT_TCP = Param(PROT_TCP_V)
@@ -244,7 +256,6 @@ def struct_s():
} flit_meta_t;""")
# Linked list entry
- LL_DWIDTH = Param(1 + 32 + 16 + 16 + PKT_AWIDTH + 1 + 56)
T("""
typedef struct packed {
logic valid; // Valid
@@ -256,18 +267,108 @@ def struct_s():
logic last; // Last
logic [55:0] last_7_bytes; // Last
} entry_t;""")
+ LL_DWIDTH = Param(1 + 32 + 16 + LL_AWIDTH + PKT_AWIDTH + 5 + 1 + 56)
# Tuple
- TUPLE_DWIDTH = Param(32 + 32 + 16 + 16)
T("""
typedef struct packed {
logic [31:0] sIP;
logic [31:0] dIP;
logic [15:0] sPort;
logic [15:0] dPort;
-} tuple_t;
+} tuple_t;""")
+ TUPLE_DWIDTH = Param(32 + 32 + 16 + 16)
+
+ T("""
+/**
+ * Reassembler service.
+ */
+// OOO flow IDs""")
+ MAX_NUM_OOO_FLOWS = Param(1024)
+ OOO_FLOW_ID_AWIDTH = Param(clog2(MAX_NUM_OOO_FLOWS))
+ T("""// Service Queue""")
+ HEAP_BITMAP_WIDTH = Param(32)
+ HEAP_MAX_NUM_ENTRIES = Param(MAX_NUM_OOO_FLOWS)
+ HEAP_NUM_PRIORITIES = Param(HEAP_BITMAP_WIDTH ** 2)
+ HEAP_PRIORITY_AWIDTH = Param(clog2(HEAP_NUM_PRIORITIES))
+ HEAP_LOG_MAX_NUM_ENTRIES = Param(clog2(HEAP_MAX_NUM_ENTRIES))
+ T("""// Scheduler""")
+ OOO_FLOW_LL_MAX_NUM_ENTRIES = Param(PKT_NUM / 2)
+ OOO_FLOW_LL_ENTRY_AWIDTH = Param(clog2(OOO_FLOW_LL_MAX_NUM_ENTRIES))
+ OOO_FLOW_LL_ENTRY_PTR_T_WIDTH = Param(OOO_FLOW_LL_ENTRY_AWIDTH + 1)
+
+ T("""
+typedef logic [OOO_FLOW_ID_AWIDTH-1:0] ooo_flow_id_t;
+typedef logic [HEAP_LOG_MAX_NUM_ENTRIES:0] heap_size_t;
+typedef logic [HEAP_PRIORITY_AWIDTH-1:0] heap_priority_t;
+typedef logic [OOO_FLOW_LL_ENTRY_PTR_T_WIDTH-1:0] ooo_flow_ll_entry_ptr_t;""")
+
+ # Scheduler token
+ T("""
+typedef struct packed {
+ tuple_t tuple;
+ ooo_flow_id_t ooo_flow_id;
+} scheduler_token_t;""")
+ SCHEDULER_TOKEN_T_WIDTH = Param(TUPLE_DWIDTH + OOO_FLOW_ID_AWIDTH)
+
+ T("""
+typedef struct packed {
+ ooo_flow_ll_entry_ptr_t head;
+ ooo_flow_ll_entry_ptr_t tail;
+} ooo_flow_list_t;""")
+ OOO_FLOW_LIST_T_WIDTH = Localparam(2 * OOO_FLOW_LL_ENTRY_PTR_T_WIDTH)
+
+ T("""
+typedef struct packed {
+ logic valid;
+ tuple_t tuple;
+ logic [31:0] seq;
+ logic ll_valid;
+ logic [LL_AWIDTH-1:0] pointer;
+ logic [LL_AWIDTH-1:0] ll_size;
+ logic [55:0] last_7_bytes;
+ logic [FT_AWIDTH-1:0] addr0;
+ logic [FT_AWIDTH-1:0] addr1;
+ logic [FT_AWIDTH-1:0] addr2;
+ logic [FT_AWIDTH-1:0] addr3;
+ ooo_flow_list_t ooo_flow_ll;
+} ooo_flow_fc_entry_t;""")
+ OOO_FLOW_FC_ENTRY_T_WIDTH = Localparam(1 + TUPLE_DWIDTH + 32 + 1 + LL_AWIDTH + LL_AWIDTH +
+ 56 + (4 * FT_AWIDTH) + OOO_FLOW_LIST_T_WIDTH)
+ T("""
+typedef struct packed {
+ tuple_t tuple;
+ logic is_delete;
+ logic [31:0] seq;
+ logic [FT_AWIDTH-1:0] addr0;
+ logic [FT_AWIDTH-1:0] addr1;
+ logic [FT_AWIDTH-1:0] addr2;
+ logic [FT_AWIDTH-1:0] addr3;
+ logic [PKT_AWIDTH-1:0] rel_pkt_cnt;
+} ft_update_t;""")
+ FT_UPDATE_T_WIDTH = Localparam(TUPLE_DWIDTH + 1 + 32 + (4 * FT_AWIDTH) + PKT_AWIDTH)
+ T("""
+typedef struct packed {
+ ooo_flow_id_t ooo_flow_id;
+ tuple_t tuple;
+ logic [FT_AWIDTH-1:0] addr0;
+ logic [FT_AWIDTH-1:0] addr1;
+ logic [FT_AWIDTH-1:0] addr2;
+ logic [FT_AWIDTH-1:0] addr3;
+} reassembly_gc_meta_t;""")
+ REASSEMBLY_GC_META_T_WIDTH = Localparam(OOO_FLOW_ID_AWIDTH + TUPLE_DWIDTH + (4 * FT_AWIDTH))
+
+ T("""
+typedef struct packed {
+ logic ll_valid;
+ logic [LL_AWIDTH-1:0] pointer;
+ reassembly_gc_meta_t meta;
+} reassembly_gc_req_t;""")
+ REASSEMBLY_GC_REQ_T_WIDTH = Localparam(1 + LL_AWIDTH + REASSEMBLY_GC_META_T_WIDTH)
+
+ T("""
typedef struct packed {
logic [31:0] c2f_kmem_high_1; // higher 32 bit of kernel memory, FPGA read only
logic [31:0] c2f_kmem_low_1; // lower 32 bit of kernel memory, FPGA read only
@@ -290,22 +391,23 @@ def struct_s():
logic [31:0] f2c_tail; // tail pointer, CPU read only
} pcie_block_t;""")
- FT_DWIDTH = Param(1 + TUPLE_DWIDTH + 32 + LL_AWIDTH + 1 + PKT_AWIDTH + 56 + (4 * FT_AWIDTH))
T("""
typedef struct packed {
logic valid;
tuple_t tuple;
logic [31:0] seq;
- logic [LL_AWIDTH-1:0] pointer;
- logic ll_valid;
logic [PKT_AWIDTH-1:0] slow_cnt;
logic [55:0] last_7_bytes;
logic [FT_AWIDTH-1:0] addr0;
logic [FT_AWIDTH-1:0] addr1;
logic [FT_AWIDTH-1:0] addr2;
logic [FT_AWIDTH-1:0] addr3;
-} fce_t; // Flow context entry
+ logic ooo_flow_id_valid;
+ logic [OOO_FLOW_ID_AWIDTH-1:0] ooo_flow_id;
+} fce_t; // Flow context entry""")
+ FT_DWIDTH = Param(1 + TUPLE_DWIDTH + 32 + PKT_AWIDTH + 56 + (4 * FT_AWIDTH) + 1 + OOO_FLOW_ID_AWIDTH)
+ T("""
typedef struct packed {
tuple_t tuple;
logic [FT_AWIDTH-1:0] addr0;
@@ -499,7 +601,7 @@ def struct_s():
# super().__init__("identity", SourceType.SYSTEM_VERILOG, name)
# self.add_extern_connection("clk", "clk_pcie")
# self.add_extern_connection("rst_n", "rst_n_pcie")
-
+
# self.add_provides("in_pkt", 32, 512, channel_type="Packet")
# self.add_provides("in_meta", 32, 512, channel_type="Meta")
# self.add_provides("in_usr", 32, 512, channel_type="Usr")
@@ -509,7 +611,7 @@ def struct_s():
# self.add_requires("out_usr", channel_type="Usr")
-
+
# Start constructing the application
@@ -633,7 +735,7 @@ def struct_s():
# like an identity function
# identity = Identity("id")
-# FIFO to and from example module
+# FIFO to and from example module
#identity_cf = make_channel_fifo("clk_pcie", "rst_n_pcie", "identity_fill_level")
#identity_cf2 = make_channel_fifo("clk_pcie", "rst_n_pcie", "identity2_fill_level")
diff --git a/pigasus/hardware/rtl_sim/run_vsim.sh b/pigasus/hardware/rtl_sim/run_vsim.sh
index 34b48b3..261cdb6 100755
--- a/pigasus/hardware/rtl_sim/run_vsim.sh
+++ b/pigasus/hardware/rtl_sim/run_vsim.sh
@@ -31,7 +31,7 @@ cd ./src/non_fast_pattern_matcher/
./update.sh
cd ../../
-vlog +define+SIM +define+PKT_FILE=\"$PKT_FILE\" +define+PKT_FILE_NB_LINES=$PKT_FILE_NB_LINES ./src/*.*v -sv
+vlog +define+SIM +define+PKT_FILE=\"$PKT_FILE\" +define+PKT_FILE_NB_LINES=$PKT_FILE_NB_LINES ./src/*.*v -sv
#vlog *.v
vlog +define+SIM ./src/common/*.sv -sv
vlog +define+SIM ./src/common/*.v
@@ -45,6 +45,7 @@ vlog +define+SIM ./src/parser/*.sv -sv
vlog +define+SIM ./src/pcie/*.sv -sv
vlog +define+SIM ./src/port_group/*.sv -sv
vlog +define+SIM ./src/reassembly/*.sv -sv
+vlog +define+SIM ./src/reassembly/surge_protector/*.sv -sv
vlog +define+SIM ./src/services/*.sv -sv
#GUI full debug
diff --git a/pigasus/hardware/rtl_sim/src/reassembly/bounded_fcfs_queue.sv b/pigasus/hardware/rtl_sim/src/reassembly/bounded_fcfs_queue.sv
new file mode 100644
index 0000000..d2ab4d3
--- /dev/null
+++ b/pigasus/hardware/rtl_sim/src/reassembly/bounded_fcfs_queue.sv
@@ -0,0 +1,153 @@
+`include "./src/struct_s.sv"
+
+/**
+ * Implements a bounded FCFS queue using a simple FIFO.
+ */
+module bounded_fcfs_queue(
+ // General inputs
+ input logic clk,
+ input logic rst, // TODO(natre): Impl.
+
+ // Enque
+ input logic in_enque_en,
+ input ooo_flow_id_t in_enque_ooo_flow_id,
+ input heap_priority_t in_enque_priority,
+ output logic in_enque_ready,
+
+ // Deque-min
+ input logic out_deque_min_en,
+ output ooo_flow_id_t out_deque_min_ooo_flow_id,
+ output heap_priority_t out_deque_min_priority,
+ output logic out_deque_min_ready,
+
+ // Deque-max (request)
+ input logic in_deque_max_req_en,
+ output logic in_deque_max_req_ready,
+ // Deque-max (output)
+ input logic out_deque_max_en,
+ output ooo_flow_id_t out_deque_max_ooo_flow_id,
+ output heap_priority_t out_deque_max_priority,
+ output logic out_deque_max_ready,
+
+ // Feedback
+ output heap_size_t queue_size,
+ output logic queue_ready
+);
+
+/**
+ * Local parameters.
+ */
+localparam FIFO_WIDTH = (OOO_FLOW_ID_AWIDTH + HEAP_PRIORITY_AWIDTH);
+
+/**
+ * Housekeeping.
+ */
+// FCFS queue
+logic fcfs_queue_full;
+logic fcfs_queue_rdreq;
+logic fcfs_queue_wrreq;
+logic fcfs_queue_empty;
+heap_priority_t fcfs_queue_q_priority;
+ooo_flow_id_t fcfs_queue_q_ooo_flow_id;
+
+// Deque requests
+logic deque_max_req_done;
+logic deque_min_req_done;
+logic deque_max_req_en_latch;
+
+// Miscellaneous logic
+assign queue_ready = 1'b1;
+
+// Enque logic
+assign in_enque_ready = !fcfs_queue_full;
+assign fcfs_queue_wrreq = in_enque_en & in_enque_ready;
+
+// Deque-max logic
+assign in_deque_max_req_ready = (!out_deque_max_ready &
+ !deque_max_req_en_latch);
+initial begin
+ out_deque_max_ready = 0;
+ out_deque_max_priority = 0;
+ deque_max_req_en_latch = 0;
+ out_deque_max_ooo_flow_id = 0;
+
+ out_deque_min_ready = 0;
+ out_deque_min_priority = 0;
+ out_deque_min_ooo_flow_id = 0;
+end
+
+// Deque-max logic
+always @(posedge clk) begin
+ // A deque-max entry is already available on the output.
+ // Deassert ready, preparing to accept the next request.
+ if (out_deque_max_en & out_deque_max_ready) begin
+ out_deque_max_ready <= 0;
+ end
+ // A deque-max was performed on this cycle. Raise
+ // ready and register the output data. Also reset
+ // the request latch, indicating that no requests
+ // are outstanding.
+ else if (deque_max_req_done) begin
+ out_deque_max_ready <= 1;
+ deque_max_req_en_latch <= 0;
+ out_deque_max_priority <= fcfs_queue_q_priority;
+ out_deque_max_ooo_flow_id <= fcfs_queue_q_ooo_flow_id;
+ end
+ // Deque-max was requested on this cycle, update the latch
+ else if (in_deque_max_req_en & in_deque_max_req_ready) begin
+ deque_max_req_en_latch <= 1;
+ end
+
+ // Consume the deque-min entry on the output
+ if (out_deque_min_en & out_deque_min_ready) begin
+ out_deque_min_ready <= 0;
+ end
+ // A deque-min just completed on this cycle.
+ // Raise ready and register the output data.
+ else if (deque_min_req_done) begin
+ out_deque_min_ready <= 1;
+ out_deque_min_priority <= fcfs_queue_q_priority;
+ out_deque_min_ooo_flow_id <= fcfs_queue_q_ooo_flow_id;
+ end
+end
+
+// Deque arbitration
+always @(*) begin
+ fcfs_queue_rdreq = 0;
+ deque_min_req_done = 0;
+ deque_max_req_done = 0;
+
+ if (!fcfs_queue_empty) begin
+ // If required, perform deque-max
+ if (deque_max_req_en_latch) begin
+ deque_max_req_done = 1;
+ fcfs_queue_rdreq = 1;
+ end
+ // No deque-min entry is currently available on the
+ // output, (opportunistically) perform a deque-min.
+ else if (!out_deque_min_ready) begin
+ deque_min_req_done = 1;
+ fcfs_queue_rdreq = 1;
+ end
+ end
+end
+
+// FCFS queue
+sc_fifo #(
+ .IS_SHOWAHEAD(1),
+ .IS_OUTDATA_REG(0),
+ .DWIDTH(FIFO_WIDTH),
+ .DEPTH(MAX_NUM_OOO_FLOWS)
+)
+fcfs_queue (
+ .clock(clk),
+ .data({in_enque_ooo_flow_id, in_enque_priority}),
+ .rdreq(fcfs_queue_rdreq),
+ .wrreq(fcfs_queue_wrreq),
+ .empty(fcfs_queue_empty),
+ .full(fcfs_queue_full),
+ .q({fcfs_queue_q_ooo_flow_id, fcfs_queue_q_priority}),
+ .usedw(queue_size)
+);
+
+endmodule
diff --git a/pigasus/hardware/rtl_sim/src/reassembly/flow_reassembly.sv b/pigasus/hardware/rtl_sim/src/reassembly/flow_reassembly.sv
index b3f94c5..e8fbe5d 100644
--- a/pigasus/hardware/rtl_sim/src/reassembly/flow_reassembly.sv
+++ b/pigasus/hardware/rtl_sim/src/reassembly/flow_reassembly.sv
@@ -1,31 +1,46 @@
`include "./src/struct_s.sv"
-//`define DEBUG
+// `define DEBUG
+
module flow_reassembly(
+ // General inputs
input logic clk,
input logic rst,
- input metadata_t meta_data,
- input logic meta_valid,
- output logic meta_ready,
- input fce_t fce_data,
- input logic fce_valid,
- output logic fce_ready,
- // Read channel 2
- output fce_meta_t ch2_meta,
- output logic ch2_rden,
- input logic ch2_ready,
- input fce_t ch2_q,
- input logic ch2_rd_valid,
- // Write channel 3
- output logic [2:0] ch3_opcode,
- output logic ch3_wren,
- input logic ch3_ready,
- output fce_t ch3_data,
- output logic [PKT_AWIDTH-1:0] ch3_rel_pkt_cnt,
-
+ // Heap input
+ output logic heap_out_deque_min_en,
+ input logic heap_out_deque_min_ready,
+ input ooo_flow_id_t heap_out_deque_min_ooo_flow_id,
+ input heap_priority_t heap_out_deque_min_priority,
+ // Scheduler read channel
+ output logic scheduler_rdch_rden,
+ output ooo_flow_id_t scheduler_rdch_ooo_flow_id,
+ input logic scheduler_rdch_ready,
+ input logic scheduler_rdch_rd_valid,
+ input logic scheduler_rdch_rd_ooo_flow_invalid,
+ input metadata_t scheduler_rdch_meta,
+ input ooo_flow_fc_entry_t scheduler_rdch_fce,
+ // Scheduler write channel
+ output logic scheduler_wrch_wren,
+ input logic scheduler_wrch_ready,
+ output logic scheduler_wrch_is_delete,
+ output ooo_flow_fc_entry_t scheduler_wrch_update_fce,
+ output logic [PKT_AWIDTH-1:0] scheduler_wrch_rel_pkt_cnt,
+ // GC request
+ output logic gc_req_state_idle,
+ output logic gc_req_fifo_rdreq,
+ input logic gc_req_fifo_empty,
+ input reassembly_gc_req_t gc_req_fifo_q,
+ output logic [31:0] gc_fl_fill_level,
+ // GC response
+ input logic gc_rsp_fifo_full,
+ output reassembly_gc_meta_t gc_rsp_fifo_data,
+ output logic gc_rsp_fifo_wrreq,
+ // Reorder FIFO
output metadata_t reorder_meta,
output logic reorder_valid,
input logic reorder_ready,
- input logic reorder_almost_full
+ input logic reorder_almost_full,
+ // Debug
+ output logic reassembly_state_idle
);
typedef enum {
@@ -36,6 +51,15 @@ typedef enum {
} state_t;
state_t state;
+typedef enum logic [2:0] {
+ GC_FSM_STATE_IDLE,
+ GC_FSM_STATE_RD,
+ GC_FSM_STATE_RD_WAIT,
+ GC_FSM_STATE_RELEASE,
+ GC_FSM_STATE_DONE
+} gc_fsm_state_t;
+gc_fsm_state_t gc_fsm_state;
+
logic [31:0] next_seq;
logic [7:0] cycle;
@@ -43,6 +67,7 @@ logic [7:0] cycle;
logic ll_start;
logic ll_done;
logic ll_discard;
+logic ll_is_newlist;
logic [LL_AWIDTH-1:0] ll_pointer;
logic [PKT_AWIDTH-1:0] ll_pkt_id;
logic [4:0] ll_flits;
@@ -66,10 +91,68 @@ logic [31:0] end_p;
logic [55:0] last_7_bytes;
//internal state
-fce_t fce_data_r;
+ooo_flow_fc_entry_t fce_data_r;
metadata_t meta_data_r;
logic done;
-logic ch3_wr_temp;
+
+// GC logic
+logic ll_gc_ready;
+logic ll_gc_start;
+logic [LL_AWIDTH-1:0] ll_gc_pointer;
+entry_t ll_gc_entry;
+logic ll_gc_done;
+
+reassembly_gc_req_t gc_req;
+entry_t ll_gc_entry_r;
+logic reorder_enque_busy;
+
+// Debug
+logic [LL_AWIDTH-1:0] gc_cur_ll_release_cnt_r;
+
+assign reassembly_state_idle = (state == IDLE);
+assign gc_req_state_idle = (gc_fsm_state == GC_FSM_STATE_IDLE);
+
+// Intermediate and output logic
+always @(*) begin
+ ll_gc_start = 0;
+ reorder_enque_busy = 0;
+
+ if (!rst) begin
+ // reorder_enque_busy
+ case (state)
+ COMPARE: begin
+ if (meta_data_r.seq == fce_data_r.seq) begin
+ if ((cycle == 8'd0) ||
+ ((cycle == 8'd2) && (next_seq == end_p))) begin
+ reorder_enque_busy = 1;
+ end
+ end
+ else if (meta_data_r.seq > fce_data_r.seq) begin
+ if (cycle == 8'd3 && ll_done && ll_discard) begin
+ reorder_enque_busy = 1;
+ end
+ end
+ else begin
+ reorder_enque_busy = 1;
+ end
+ end
+ default: begin
+ reorder_enque_busy = 0;
+ end
+ endcase
+ // ll_gc_start
+ case (gc_fsm_state)
+ GC_FSM_STATE_RD: begin
+ if (ll_gc_ready) begin
+ ll_gc_start = 1;
+ end
+ end
+ default: begin
+ ll_gc_start = 0;
+ end
+ endcase
+ end
+end
/* Pesudocode:
* if entry.seq == seq
@@ -92,19 +175,32 @@ always @ (posedge clk) begin
cycle <= 0;
ll_load_head <= 0;
ll_store_head <= 0;
+ ll_is_newlist <= 0;
ll_start <= 0;
- meta_ready <= 0;
- fce_ready <= 0;
- ch3_wren <= 0;
- ch3_wr_temp <= 0;
- ch2_rden <= 0;
+ scheduler_rdch_rden <= 0;
+ heap_out_deque_min_en <= 0;
+ scheduler_rdch_ooo_flow_id <= 0;
+
+ scheduler_wrch_wren <= 0;
+ scheduler_wrch_is_delete <= 0;
+ scheduler_wrch_update_fce <= 0;
+ scheduler_wrch_rel_pkt_cnt <= 0;
+
reorder_meta <= 0;
reorder_valid <= 0;
- ch3_opcode <= FT_UPDATE;
- ch3_rel_pkt_cnt <= 0;
+
+ gc_req <= 0;
+ ll_gc_pointer <= 0;
+ ll_gc_entry_r <= 0;
+
+ gc_rsp_fifo_data <= 0;
+ gc_rsp_fifo_wrreq <= 0;
+ gc_req_fifo_rdreq <= 0;
+ gc_fsm_state <= GC_FSM_STATE_IDLE;
end
else begin
+ reorder_valid <= 0;
case (state)
IDLE: begin
// Initialize states
@@ -113,55 +209,69 @@ always @ (posedge clk) begin
ll_start <= 0;
ll_load_head <= 0;
ll_store_head <= 0;
+ ll_is_newlist <= 0;
// Default values
+ scheduler_rdch_rden <= 0;
+ heap_out_deque_min_en <= 0;
+ scheduler_rdch_ooo_flow_id <= 0;
+
+ scheduler_wrch_wren <= 0;
+ scheduler_wrch_is_delete <= 0;
+ scheduler_wrch_update_fce <= 0;
+ scheduler_wrch_rel_pkt_cnt <= 0;
+
reorder_valid <= 0;
- reorder_meta <= meta_data;
- ch3_wren <= 0;
- ch3_wr_temp <= 0;
- ch2_rden <= 0;
- ch3_opcode <= FT_UPDATE;
- ch3_rel_pkt_cnt <= 0;
-
- if (meta_valid & fce_valid & !reorder_almost_full) begin
+ reorder_meta <= 0;
+
+ if (heap_out_deque_min_ready & !reorder_almost_full) begin
state <= GET_FCE;
- meta_data_r <= meta_data;
- meta_ready <= 1;
- fce_ready <= 1;
- start_p <= meta_data.seq;
- end_p <= meta_data.seq + meta_data.len;
-
- ch2_rden <= 1;
- ch2_meta.addr0 <= fce_data.addr0;
- ch2_meta.addr1 <= fce_data.addr1;
- ch2_meta.addr2 <= fce_data.addr2;
- ch2_meta.addr3 <= fce_data.addr3;
- ch2_meta.tuple <= fce_data.tuple;
- ch2_meta.opcode <= 0;
+ scheduler_rdch_rden <= 1;
+ heap_out_deque_min_en <= 1;
+ scheduler_rdch_ooo_flow_id <= (
+ heap_out_deque_min_ooo_flow_id);
end
end
- // Wait until ch2 data is ready
+ // Wait until rdch data is ready
GET_FCE: begin
- meta_ready <= 0;
- fce_ready <= 0;
+ heap_out_deque_min_en <= 0;
+
// Read is success
- if (ch2_ready) begin
- ch2_rden <= 0;
+ if (scheduler_rdch_ready) begin
+ scheduler_rdch_rden <= 0;
end
- if (ch2_rd_valid) begin
- fce_data_r <= ch2_q;
- state <= COMPARE;
+ if (scheduler_rdch_rd_valid) begin
+ if (scheduler_rdch_rd_ooo_flow_invalid) begin
+ state <= IDLE;
+ `ifdef DEBUG
+ $display("[FR] Got OOO flow ID %0d from heap, flow invalid",
+ scheduler_rdch_ooo_flow_id);
+ `endif
+ end
+ else begin
+ state <= COMPARE;
+
+ `ifdef DEBUG
+ $display("[FR] Got OOO flow ID %0d from heap with LL size %0d, starting processing",
+ scheduler_rdch_ooo_flow_id, scheduler_rdch_fce.ll_size);
+ `endif
- // Default value is the fce with latest
- // seq (the slow_cnt may not be latest).
- ch3_data <= ch2_q;
+ fce_data_r <= scheduler_rdch_fce;
+ meta_data_r <= scheduler_rdch_meta;
+ start_p <= scheduler_rdch_meta.seq;
+ end_p <= (scheduler_rdch_meta.seq +
+ scheduler_rdch_meta.len);
+
+ // Default value is the fce with latest seq
+ scheduler_wrch_update_fce <= scheduler_rdch_fce;
+ end
end
end
COMPARE: begin
reorder_valid <= 0;
- ch3_wren <= 0;
- ch3_opcode <= FT_UPDATE;
+ scheduler_wrch_wren <= 0;
+ scheduler_wrch_is_delete <= 0;
// Seq is expected, check whether or not it fills the hole
if (meta_data_r.seq == fce_data_r.seq) begin
@@ -169,7 +279,7 @@ always @ (posedge clk) begin
case (cycle)
8'd0: begin
`ifdef DEBUG
- $display("[FR] Match seq: pkt %d, seq %x, length %d, expected_seq %x",
+ $display("[FR] Match seq: pkt %0d, seq %x, length %0d, expected_seq %x",
meta_data_r.pktID, meta_data_r.seq, meta_data_r.len,
fce_data_r.seq);
`endif
@@ -180,24 +290,25 @@ always @ (posedge clk) begin
// order. This can happen due to stale state.
if (!fce_data_r.ll_valid) begin
state <= WAIT_WR_READY;
- ch3_wren <= 1;
- ch3_data.seq <= end_p;
- ch3_data.pointer <= 0;
- ch3_data.ll_valid <= 0;
+ scheduler_wrch_wren <= 1;
+ scheduler_wrch_update_fce.seq <= end_p;
+ scheduler_wrch_update_fce.pointer <= 0;
+ scheduler_wrch_update_fce.ll_valid <= 0;
// Store current pkt's last_7_bytes
- ch3_data.last_7_bytes <= meta_data_r.last_7_bytes;
+ scheduler_wrch_update_fce.last_7_bytes <= meta_data_r.last_7_bytes;
// Release current pkt
- ch3_rel_pkt_cnt <= 1;
+ scheduler_wrch_rel_pkt_cnt <= 1;
// Delete the fce
if (meta_data_r.tcp_flags[TCP_FIN] |
meta_data_r.tcp_flags[TCP_RST]) begin
- $display("FIN/RST : pkt %d, seq %x, length %d, expect %x",
+ `ifdef DEBUG
+ $display("[FR] FIN/RST : pkt %0d, seq %x, length %0d, expect %x",
meta_data_r.pktID, meta_data_r.seq, meta_data_r.len,
fce_data_r.seq);
+ `endif
- ch3_data.valid <= 0;
- ch3_opcode <= FT_DELETE;
+ scheduler_wrch_is_delete <= 1;
reorder_meta.pkt_flags <= PKT_FORWARD;
end
end
@@ -206,7 +317,7 @@ always @ (posedge clk) begin
ll_load_head <= 1;
ll_pointer <= fce_data_r.pointer;
last_7_bytes <= meta_data_r.last_7_bytes;
- ch3_rel_pkt_cnt <= 1; // Release packet count
+ scheduler_wrch_rel_pkt_cnt <= 1; // Release packet count
end
end
8'd1: begin
@@ -221,7 +332,7 @@ always @ (posedge clk) begin
8'd2: begin
if (next_seq == end_p) begin
`ifdef DEBUG
- $display("[FR] Reordered: pkt %d, seq %x, length %d",
+ $display("[FR] Reordered: pkt %0d, seq %x, length %0d",
ll_head_out.pktID, ll_head_out.seq, ll_head_out.len);
`endif
reorder_valid <= 1;
@@ -229,7 +340,7 @@ always @ (posedge clk) begin
reorder_meta.pktID <= ll_head_out.pktID;
reorder_meta.flits <= ll_head_out.flits;
// Release packet count
- ch3_rel_pkt_cnt <= ch3_rel_pkt_cnt + 1;
+ scheduler_wrch_rel_pkt_cnt <= scheduler_wrch_rel_pkt_cnt + 1;
// Previous pkt's last_7_bytes
reorder_meta.last_7_bytes <= last_7_bytes;
last_7_bytes <= ll_head_out.last_7_bytes;
@@ -245,22 +356,23 @@ always @ (posedge clk) begin
state <= WAIT_WR_READY;
// Set ll_valid to 1'b0 after fetch the last node
- ch3_wren <= 1;
- ch3_data.pointer <= 0;
- ch3_data.ll_valid <= 0;
+ scheduler_wrch_wren <= 1;
+ scheduler_wrch_update_fce.pointer <= 0;
+ scheduler_wrch_update_fce.ll_valid <= 0;
// Store current pkt's last 7 bytes
- ch3_data.last_7_bytes <= ll_head_out.last_7_bytes;
- ch3_data.seq <= ll_head_out.seq + ll_head_out.len;
+ scheduler_wrch_update_fce.last_7_bytes <= ll_head_out.last_7_bytes;
+ scheduler_wrch_update_fce.seq <= ll_head_out.seq + ll_head_out.len;
// Delete the fce
if (meta_data_r.tcp_flags[TCP_FIN] |
meta_data_r.tcp_flags[TCP_RST]) begin
- $display("FIN/RST : pkt %d, seq %x, length %d, expect %x",
+ `ifdef DEBUG
+ $display("[FR] FIN/RST : pkt %0d, seq %x, length %0d, expect %x",
meta_data_r.pktID, meta_data_r.seq,
meta_data_r.len, fce_data_r.seq);
+ `endif
- ch3_data.valid <= 0;
- ch3_opcode <= FT_DELETE;
+ scheduler_wrch_is_delete <= 1;
reorder_meta.pkt_flags <= PKT_FORWARD;
end
@@ -283,11 +395,11 @@ always @ (posedge clk) begin
// Just update the seq number and
// ll_pointer, have LL node left.
else begin
- ch3_wren <= 1;
- ch3_data.seq <= end_p;
- ch3_data.pointer <= ll_pointer;
+ scheduler_wrch_wren <= 1;
+ scheduler_wrch_update_fce.seq <= end_p;
+ scheduler_wrch_update_fce.pointer <= ll_pointer;
//Store current pkt's last_7_bytes
- ch3_data.last_7_bytes <= last_7_bytes;
+ scheduler_wrch_update_fce.last_7_bytes <= last_7_bytes;
cycle <= 0;
state <= WAIT_WR_READY;
@@ -308,22 +420,24 @@ always @ (posedge clk) begin
// OOO pkts show up
else if (meta_data_r.seq > fce_data_r.seq) begin
// Won't release any pkt
- ch3_rel_pkt_cnt <= 0;
+ scheduler_wrch_rel_pkt_cnt <= 0;
case (cycle)
// Check if the pointer is valid or not
8'd0: begin
// Invalid pointer, load a valid pointer from linked-list
`ifdef DEBUG
- $display("[FR] OOO pkts: pkt %d, seq %x, length %d",
+ $display("[FR] OOO pkts: pkt %0d, seq %x, length %0d",
meta_data_r.pktID, meta_data_r.seq, meta_data_r.len);
`endif
if (fce_data_r.ll_valid == 0) begin
ll_load_empty_pointer <= 1;
+ ll_is_newlist <= 1;
cycle <= 1;
end
// Otherwise directly start an insert
else begin
cycle <= 2;
+ ll_is_newlist <= 0;
ll_pointer <= fce_data_r.pointer;
end
end
@@ -332,9 +446,8 @@ always @ (posedge clk) begin
ll_load_empty_pointer <= 0;
if (ll_empty_pointer_valid) begin
// Set the ll_valid
- ch3_data.pointer <= ll_empty_pointer;
- ch3_data.ll_valid <= 1'b1;
- ch3_wr_temp <= 1;
+ scheduler_wrch_update_fce.pointer <= ll_empty_pointer;
+ scheduler_wrch_update_fce.ll_valid <= 1'b1;
ll_pointer <= ll_empty_pointer;
cycle <= 2;
@@ -342,7 +455,6 @@ always @ (posedge clk) begin
end
// Start an insert
8'd2: begin
- ch3_wren <= 0;
ll_last_7_bytes <= meta_data_r.last_7_bytes;
ll_pkt_id <= meta_data_r.pktID;
ll_flits <= meta_data_r.flits;
@@ -353,31 +465,28 @@ always @ (posedge clk) begin
end
8'd3: begin
ll_start <= 0;
- ch3_wren <= 0;
+ scheduler_wrch_wren <= 0;
// "ll_update_p_valid" is raised at the same cycle with "ll_done"
if (ll_update_p_valid) begin
- ch3_data.pointer <= ll_update_p;
+ scheduler_wrch_update_fce.pointer <= ll_update_p;
end
if (ll_done) begin
cycle <= 0;
done <= 1;
- if (ch3_wr_temp | ll_update_p_valid) begin
- ch3_wren <= 1;
- ch3_wr_temp <= 0;
- state <= WAIT_WR_READY;
- end
- else begin
- state <= IDLE;
- end
+ // Always write-back updates
+ scheduler_wrch_wren <= 1;
+ state <= WAIT_WR_READY;
+
if (ll_discard) begin
`ifdef DEBUG
- $display("[FR] discard pkts: pkt %d, seq %x, length %d",
+ $display("[FR] discard pkts: pkt %0d, seq %x, length %0d",
meta_data_r.pktID, meta_data_r.seq, meta_data_r.len);
`endif
reorder_valid <= 1;
reorder_meta <= meta_data_r;
+ scheduler_wrch_rel_pkt_cnt <= 1;
reorder_meta.pkt_flags <= PKT_DROP;
end
end
@@ -388,15 +497,17 @@ always @ (posedge clk) begin
// Packet with overlapping sequence range.
// Simply drop it without changing the FT.
else begin
- state <= IDLE;
done <= 1;
- ch3_wren <= 1;
- ch3_rel_pkt_cnt <= 1;
+ state <= WAIT_WR_READY;
+ scheduler_wrch_wren <= 1;
+ scheduler_wrch_rel_pkt_cnt <= 1;
+
reorder_valid <= 1;
reorder_meta <= meta_data_r;
reorder_meta.pkt_flags <= PKT_DROP;
+
`ifdef DEBUG
- $display("[FR] Overlap pkts, pkt %d, seq %x, length %d",
+ $display("[FR] Overlap pkts, pkt %0d, seq %x, length %0d",
meta_data_r.pktID, meta_data_r.seq, meta_data_r.len);
`endif
end
@@ -408,18 +519,106 @@ always @ (posedge clk) begin
ll_load_head <= 0;
ll_store_head <= 0;
reorder_valid <= 0;
- reorder_meta <= meta_data;
- ch3_wr_temp <= 0;
- ch2_rden <= 0;
+ scheduler_rdch_rden <= 0;
+ reorder_meta <= meta_data_r;
- // Keep the ch3_wr and ch3_data, until it is accepted
- if (ch3_ready) begin
- ch3_wren <= 0;
+ // Keep wren and the data until it is accepted
+ if (scheduler_wrch_ready) begin
+ scheduler_wrch_wren <= 0;
state <= IDLE;
end
end
default: state <= IDLE;
endcase
+
+ /**
+ * Garbage-collection (GC) FSM.
+ */
+ gc_req_fifo_rdreq <= 0;
+ gc_rsp_fifo_wrreq <= 0;
+ case (gc_fsm_state)
+ GC_FSM_STATE_IDLE: begin
+ gc_req <= 0;
+ ll_gc_pointer <= 0;
+ ll_gc_entry_r <= 0;
+ gc_cur_ll_release_cnt_r <= 0;
+ gc_fsm_state <= GC_FSM_STATE_IDLE;
+
+ if (!gc_req_fifo_empty &&
+ !reorder_almost_full) begin
+ gc_req_fifo_rdreq <= 1;
+ gc_req <= gc_req_fifo_q;
+
+ if (gc_req_fifo_q.ll_valid) begin
+ gc_fsm_state <= GC_FSM_STATE_RD;
+ ll_gc_pointer <= gc_req_fifo_q.pointer;
+
+ `ifdef DEBUG
+ $display("[FR, GC] Started dropping OOO flow ID %0d, valid LL",
+ gc_req_fifo_q.meta.ooo_flow_id);
+ `endif
+ end
+ else begin
+ gc_fsm_state <= GC_FSM_STATE_DONE;
+ `ifdef DEBUG
+ $display("[FR, GC] Dropping OOO flow ID %0d, invalid LL",
+ gc_req_fifo_q.meta.ooo_flow_id);
+ `endif
+ end
+ end
+ end
+ GC_FSM_STATE_RD: begin
+ gc_fsm_state <= GC_FSM_STATE_RD;
+
+ if (ll_gc_ready) begin
+ gc_fsm_state <= GC_FSM_STATE_RD_WAIT;
+ end
+ end
+ GC_FSM_STATE_RD_WAIT: begin
+ gc_fsm_state <= GC_FSM_STATE_RD_WAIT;
+ if (ll_gc_done) begin
+ ll_gc_entry_r <= ll_gc_entry;
+ gc_fsm_state <= GC_FSM_STATE_RELEASE;
+ end
+ end
+ GC_FSM_STATE_RELEASE: begin
+ gc_fsm_state <= GC_FSM_STATE_RELEASE;
+ if (!reorder_enque_busy) begin
+ reorder_valid <= 1;
+ reorder_meta.pkt_flags <= PKT_DROP;
+ reorder_meta.seq <= ll_gc_entry_r.seq;
+ reorder_meta.len <= ll_gc_entry_r.len;
+ reorder_meta.pktID <= ll_gc_entry_r.pktID;
+ reorder_meta.flits <= ll_gc_entry_r.flits;
+ reorder_meta.last_7_bytes <= ll_gc_entry_r.last_7_bytes;
+ gc_cur_ll_release_cnt_r <= (gc_cur_ll_release_cnt_r + 1);
+
+ if (!ll_gc_entry_r.last) begin
+ gc_fsm_state <= GC_FSM_STATE_RD;
+ ll_gc_pointer <= ll_gc_entry_r.next;
+ end
+ else begin
+ gc_fsm_state <= GC_FSM_STATE_DONE;
+ end
+ end
+ end
+ GC_FSM_STATE_DONE: begin
+ gc_fsm_state <= GC_FSM_STATE_DONE;
+ if (!gc_rsp_fifo_full) begin
+ gc_rsp_fifo_wrreq <= 1;
+ gc_rsp_fifo_data <= gc_req.meta;
+ gc_fsm_state <= GC_FSM_STATE_IDLE;
+
+ `ifdef DEBUG
+ $display("[FR, GC] Done dropping OOO flow ID %0d with %0d LL packets",
+ gc_req.meta.ooo_flow_id, gc_cur_ll_release_cnt_r);
+ `endif
+ end
+ end
+ default: begin
+ gc_fsm_state <= GC_FSM_STATE_IDLE;
+ end
+ endcase
end
end
@@ -429,6 +628,7 @@ linked_list ll_inst(
.start (ll_start),
.done (ll_done),
.discard (ll_discard),
+ .is_newlist (ll_is_newlist),
.pointer (ll_pointer),
.pktID (ll_pkt_id),
.flits (ll_flits),
@@ -445,7 +645,13 @@ linked_list ll_inst(
.head_wr_addr (ll_head_wr_addr),
.load_empty_pointer (ll_load_empty_pointer),
.empty_pointer (ll_empty_pointer),
- .empty_pointer_valid(ll_empty_pointer_valid)
+ .empty_pointer_valid(ll_empty_pointer_valid),
+ .gc_ready (ll_gc_ready),
+ .gc_start (ll_gc_start),
+ .gc_pointer (ll_gc_pointer),
+ .gc_entry (ll_gc_entry),
+ .gc_done (ll_gc_done),
+ .gc_fl_fill_level (gc_fl_fill_level)
);
endmodule
diff --git a/pigasus/hardware/rtl_sim/src/reassembly/flow_table.sv b/pigasus/hardware/rtl_sim/src/reassembly/flow_table.sv
index 148d1fe..c9738b2 100644
--- a/pigasus/hardware/rtl_sim/src/reassembly/flow_table.sv
+++ b/pigasus/hardware/rtl_sim/src/reassembly/flow_table.sv
@@ -1,10 +1,11 @@
`include "./src/struct_s.sv"
+// `define DEBUG
module flow_table(
input logic clk,
input logic rst,
- // Read channel 0
+ // Read channel 0 (from FTW)
input fce_meta_t ch0_meta,
input logic ch0_rden,
output fce_t ch0_q,
@@ -12,26 +13,21 @@ module flow_table(
output logic [FT_SUBTABLE:0] ch0_bit_map,
output logic ch0_rd_stall,
- // Write channel 1
+ // Write channel 1 (from FTW)
input logic [2:0] ch1_opcode,
input logic [FT_SUBTABLE:0] ch1_bit_map,
input logic ch1_wren,
input fce_t ch1_data,
output logic ch1_insert_stall,
- // Read channel 2
- input fce_meta_t ch2_meta,
- input logic ch2_rden,
+ // Update channel (from Scheduler)
+ input logic ch2_wren,
+ input ft_update_t ch2_data,
output logic ch2_ready,
- output fce_t ch2_q,
- output logic ch2_rd_valid,
-
- // Write channel 3
- input logic [2:0] ch3_opcode,
- input logic ch3_wren,
- output logic ch3_ready,
- input fce_t ch3_data,
- input logic [PKT_AWIDTH-1:0] ch3_rel_pkt_cnt
+
+ // OOO flow ID release (to FTW)
+ output ooo_flow_id_t ooo_flow_id_release_data,
+ output logic ooo_flow_id_release_valid
);
logic [FT_AWIDTH-1:0] ft0_addr_a;
@@ -193,15 +189,15 @@ logic rdwq_b_r2;
logic q_empty_r;
logic q_rden_a_r1;
+fce_t ch2_q;
+
typedef enum {
P_ARB,
P_LOOKUP,
P_FILL,
P_EVIC,
SLOW_UPDATE,
- SLOW_UPDATE_WAIT,
- SLOW_LOOKUP_WAIT,
- SLOW_LOOKUP
+ SLOW_UPDATE_WAIT
} place_t;
place_t p_state;
@@ -234,8 +230,8 @@ logic ch0_rden_r3;
fce_t ch1_data_r1;
fce_t ch1_data_r2;
logic [FT_SUBTABLE:0] ch2_bit_map;
-fce_t ch3_data_r;
-logic [PKT_AWIDTH-1:0] ch3_rel_pkt_cnt_r;
+logic [PKT_AWIDTH-1:0] rel_pkt_cnt_r;
+ft_update_t ch2_data_r;
///////// Lookup operation /////////////////////////
assign ft0_rden_a = ch0_rden & !ch0_rd_stall;
@@ -436,13 +432,10 @@ assign ft_empty[3] = !ft3_q_b.valid;
///////// Arbiration for port_B /////////////////////////
// Only generate a request during the ARB state.
assign req[0] = (p_state == P_ARB) & !q_empty_r;
-assign req[1] = (p_state == P_ARB) & (ch3_wren || ch2_rden);
+assign req[1] = (p_state == P_ARB) & ch2_wren;
assign req[2] = 0;
assign req[3] = 0;
-assign ch3_ready = grant[1];
-assign ch2_ready = grant[1];
-
assign ft_hit_b[0] = (lookup_tuple_b_r2 == ft0_q_b.tuple) & ft0_q_b.valid;
assign ft_hit_b[1] = (lookup_tuple_b_r2 == ft1_q_b.tuple) & ft1_q_b.valid;
assign ft_hit_b[2] = (lookup_tuple_b_r2 == ft2_q_b.tuple) & ft2_q_b.valid;
@@ -470,13 +463,8 @@ always @(*) begin
ft2_odata_b = ft2_q_b;
ft3_odata_b = ft3_q_b;
q_odata_b = q_q_b;
- ch2_rd_valid = 0;
ch2_bit_map = ft_hit_b;
- if (p_state == SLOW_LOOKUP) begin
- ch2_rd_valid = rd_valid_b;
- end
-
if (rdw0_b_r2) begin
ft0_odata_b = ft0_data_a_r2;
ch2_bit_map = 5'b0_0001;
@@ -509,7 +497,8 @@ always @(posedge clk) begin
ch0_tuple_latch_r <= ch0_meta.tuple;
end
end
-assign ch0_tuple_latch = ft0_rden_a ? ch0_meta.tuple : ch0_tuple_latch_r;
+assign ch0_tuple_latch = ft0_rden_a ? lookup_tuple :
+ ch0_tuple_latch_r;
// Random number used for eviction
always @(posedge clk) begin
@@ -527,26 +516,36 @@ assign head_busy = (q_rden_a & (ch0_meta.tuple == q_deque_data.tuple) |
// Start from the rden, until the update is done
assign fast_busy = ch0_rden | ch0_rd_stall;
-// When slow path is updating the FT assign p_state == SLOW_UPDATE
-assign slow_conflict = slow_busy & ch0_rden & (ch3_data_r.tuple == ch0_meta.tuple);
+// R/W conflict on any of the FT entries
+assign slow_conflict = (slow_busy & ch0_rden & (
+ (ch2_data_r.tuple == ch0_meta.tuple) |
+ (ch2_data_r.addr0 == ch0_meta.addr0) |
+ (ch2_data_r.addr1 == ch0_meta.addr1) |
+ (ch2_data_r.addr2 == ch0_meta.addr2) |
+ (ch2_data_r.addr3 == ch0_meta.addr3)));
// One state machine that arbirates all the read/writes using port_b.
always @(posedge clk) begin
+ ch2_ready <= 0;
+ ooo_flow_id_release_valid <= 0;
+
if (rst) begin
- p_state <= P_ARB;
- ft0_rden_b <= 0;
- ft1_rden_b <= 0;
- ft2_rden_b <= 0;
- ft3_rden_b <= 0;
- ft0_wren_b <= 0;
- ft1_wren_b <= 0;
- ft2_wren_b <= 0;
- ft3_wren_b <= 0;
- q_rden_b <= 0;
- q_wren_b <= 0;
- q_deque_en <= 0;
- evict <= 0;
- slow_busy <= 0;
+ p_state <= P_ARB;
+ ft0_rden_b <= 0;
+ ft1_rden_b <= 0;
+ ft2_rden_b <= 0;
+ ft3_rden_b <= 0;
+ ft0_wren_b <= 0;
+ ft1_wren_b <= 0;
+ ft2_wren_b <= 0;
+ ft3_wren_b <= 0;
+ q_rden_b <= 0;
+ q_wren_b <= 0;
+ q_deque_en <= 0;
+ evict <= 0;
+ slow_busy <= 0;
+ ooo_flow_id_release_data <= 0;
+ ch2_data_r <= 0;
end
else begin
case (p_state)
@@ -580,50 +579,41 @@ always @(posedge clk) begin
end
// Slow path lookup and update
4'b0010: begin
- // Read and Write won't happen on the same cycle
- if (ch3_wren) begin
- // Same entry is busy
- if (fast_busy & (ch3_data.tuple == ch0_tuple_latch)) begin
- p_state <= SLOW_UPDATE_WAIT;
- end
- else begin
- slow_busy <= 1;
- p_state <= SLOW_UPDATE;
- ft0_rden_b <= 1;
- ft1_rden_b <= 1;
- ft2_rden_b <= 1;
- ft3_rden_b <= 1;
- q_rden_b <= 1;
- end
- ft0_addr_b <= ch3_data.addr0;
- ft1_addr_b <= ch3_data.addr1;
- ft2_addr_b <= ch3_data.addr2;
- ft3_addr_b <= ch3_data.addr3;
- q_addr_b <= ch3_data.tuple;
- ch3_data_r <= ch3_data;
- lookup_tuple_b <= ch3_data.tuple;
- ch3_rel_pkt_cnt_r <= ch3_rel_pkt_cnt;
+ // R/W conflict on any of the FT entries
+ if (ch1_wren & (
+ (ch1_data.addr0 == ch2_data.addr0) |
+ (ch1_data.addr1 == ch2_data.addr1) |
+ (ch1_data.addr2 == ch2_data.addr2) |
+ (ch1_data.addr3 == ch2_data.addr3))) begin
+ p_state <= SLOW_UPDATE_WAIT;
+ end
+ // The same FT entry is busy
+ else if (fast_busy & (ch2_data.tuple == ch0_tuple_latch)) begin
+ p_state <= SLOW_UPDATE_WAIT;
end
else begin
- // Avoid rd/wr conflicts
- if (ch0_rd_valid & (ch2_meta.tuple == ch0_tuple_latch)) begin
- p_state <= SLOW_LOOKUP_WAIT;
- end
- else begin
- p_state <= SLOW_LOOKUP;
- ft0_rden_b <= 1;
- ft1_rden_b <= 1;
- ft2_rden_b <= 1;
- ft3_rden_b <= 1;
- q_rden_b <= 1;
- end
- ft0_addr_b <= ch2_meta.addr0;
- ft1_addr_b <= ch2_meta.addr1;
- ft2_addr_b <= ch2_meta.addr2;
- ft3_addr_b <= ch2_meta.addr3;
- q_addr_b <= ch2_meta.tuple;
- lookup_tuple_b <= ch2_meta.tuple;
+ slow_busy <= 1;
+ p_state <= SLOW_UPDATE;
+ ft0_rden_b <= 1;
+ ft1_rden_b <= 1;
+ ft2_rden_b <= 1;
+ ft3_rden_b <= 1;
+ q_rden_b <= 1;
end
+ ch2_data_r <= ch2_data;
+ ft0_addr_b <= ch2_data.addr0;
+ ft1_addr_b <= ch2_data.addr1;
+ ft2_addr_b <= ch2_data.addr2;
+ ft3_addr_b <= ch2_data.addr3;
+ q_addr_b <= ch2_data.tuple;
+ lookup_tuple_b <= ch2_data.tuple;
+ rel_pkt_cnt_r <= ch2_data.rel_pkt_cnt;
+
+ ch2_ready <= 1;
+
+ `ifdef DEBUG
+ $display("[FT]: Arbiter granted FC update");
+ `endif
end
default: p_state <= P_ARB;
endcase
@@ -692,7 +682,7 @@ always @(posedge clk) begin
q_deque_en <= 0;
if (q_deque_done) begin
p_state <= P_ARB;
- $display("Evict!");
+ $display("[FT] Evict!");
// The queue cannot be full during eviction,
// so don't need to check the full signal.
evict <= 1;
@@ -719,7 +709,16 @@ always @(posedge clk) begin
end
end
SLOW_UPDATE_WAIT: begin
- if (!(fast_busy & (ch3_data.tuple == ch0_tuple_latch))) begin
+ // R/W conflict on any of the FT entries
+ if (ch1_wren & (
+ (ch1_data.addr0 == ch2_data_r.addr0) |
+ (ch1_data.addr1 == ch2_data_r.addr1) |
+ (ch1_data.addr2 == ch2_data_r.addr2) |
+ (ch1_data.addr3 == ch2_data_r.addr3))) begin
+ p_state <= SLOW_UPDATE_WAIT;
+ end
+ // The same FT entry is busy
+ else if (!(fast_busy & (ch2_data_r.tuple == ch0_tuple_latch))) begin
p_state <= SLOW_UPDATE;
ft0_rden_b <= 1;
ft1_rden_b <= 1;
@@ -741,31 +740,82 @@ always @(posedge clk) begin
q_rden_b <= 0;
q_wren_b <= 0;
- ft0_data_b <= ch3_data_r;
- ft1_data_b <= ch3_data_r;
- ft2_data_b <= ch3_data_r;
- ft3_data_b <= ch3_data_r;
- q_data_b <= ch3_data_r;
+ ft0_data_b <= ch2_q;
+ ft1_data_b <= ch2_q;
+ ft2_data_b <= ch2_q;
+ ft3_data_b <= ch2_q;
+ q_data_b <= ch2_q;
- ft0_data_b.slow_cnt <= ch2_q.slow_cnt - ch3_rel_pkt_cnt_r;
- ft1_data_b.slow_cnt <= ch2_q.slow_cnt - ch3_rel_pkt_cnt_r;
- ft2_data_b.slow_cnt <= ch2_q.slow_cnt - ch3_rel_pkt_cnt_r;
- ft3_data_b.slow_cnt <= ch2_q.slow_cnt - ch3_rel_pkt_cnt_r;
- q_data_b.slow_cnt <= ch2_q.slow_cnt - ch3_rel_pkt_cnt_r;
+ ft0_data_b.seq <= ch2_data_r.seq;
+ ft1_data_b.seq <= ch2_data_r.seq;
+ ft2_data_b.seq <= ch2_data_r.seq;
+ ft3_data_b.seq <= ch2_data_r.seq;
+ q_data_b.seq <= ch2_data_r.seq;
- `ifdef DEBUG
- if (rd_valid_b & ch2_bit_map != 0) begin
- $display("Slow_cnt current %d, release %d, updated %d",
- (ch2_q.slow_cnt, ch3_rel_pkt_cnt_r),
- (ch2_q.slow_cnt - ch3_rel_pkt_cnt_r));
+ ft0_data_b.slow_cnt <= ch2_q.slow_cnt - rel_pkt_cnt_r;
+ ft1_data_b.slow_cnt <= ch2_q.slow_cnt - rel_pkt_cnt_r;
+ ft2_data_b.slow_cnt <= ch2_q.slow_cnt - rel_pkt_cnt_r;
+ ft3_data_b.slow_cnt <= ch2_q.slow_cnt - rel_pkt_cnt_r;
+ q_data_b.slow_cnt <= ch2_q.slow_cnt - rel_pkt_cnt_r;
- assert(!(ch2_q.slow_cnt < ch3_rel_pkt_cnt_r))
+ if (rd_valid_b & ch2_bit_map != 0) begin
+ `ifdef DEBUG
+ $display("[FT] Slow_cnt current %0d, release %0d, updated %0d",
+ ch2_q.slow_cnt, rel_pkt_cnt_r,
+ (ch2_q.slow_cnt - rel_pkt_cnt_r));
+ `endif
+
+ // Sanity checks
+ assert(!(ch2_q.slow_cnt < rel_pkt_cnt_r))
+ else begin
+ $error("[FT] Slow_cnt error");
+ $finish;
+ end
+ assert (ch2_q.ooo_flow_id_valid)
else begin
- $error("slow_cnt error");
+ $error("[FT] OOO flow ID was invalid on SLOW UPDATE path");
$finish;
end
+
+ // Flow becomes in-order, slow path is now inactive
+ if ((ch2_q.slow_cnt == rel_pkt_cnt_r) &&
+ (rel_pkt_cnt_r != 0)) begin
+ `ifdef DEBUG
+ $display("[FT] Flow with OOO flow ID %0d becomes in-order",
+ ch2_q.ooo_flow_id);
+ `endif
+
+ // Release the OOO flow ID
+ ooo_flow_id_release_valid <= 1;
+ ooo_flow_id_release_data <= ch2_q.ooo_flow_id;
+
+ // Update the flow context
+ q_data_b.ooo_flow_id_valid <= 0;
+ ft0_data_b.ooo_flow_id_valid <= 0;
+ ft1_data_b.ooo_flow_id_valid <= 0;
+ ft2_data_b.ooo_flow_id_valid <= 0;
+ ft3_data_b.ooo_flow_id_valid <= 0;
+ end
+
+ // Flow was GC'd or finished/reset
+ if (ch2_data_r.is_delete) begin
+ `ifdef DEBUG
+ $display("[FT] Flow with OOO flow ID %0d is dropped",
+ ch2_q.ooo_flow_id);
+ `endif
+
+ // Release the OOO flow ID
+ ooo_flow_id_release_valid <= 1;
+ ooo_flow_id_release_data <= ch2_q.ooo_flow_id;
+
+ // Deallocate the flow context
+ ft0_data_b.valid <= 0;
+ ft1_data_b.valid <= 0;
+ ft2_data_b.valid <= 0;
+ ft3_data_b.valid <= 0;
+ q_data_b.valid <= 0;
+ end
end
- `endif
// Update data in the flow table. Address is not changed.
if (rd_valid_b) begin
@@ -779,27 +829,6 @@ always @(posedge clk) begin
p_state <= P_ARB;
end
end
- SLOW_LOOKUP_WAIT: begin
- if (!(ch0_rd_valid & (lookup_tuple_b == ch0_tuple_latch))) begin
- p_state <= SLOW_LOOKUP;
- ft0_rden_b <= 1;
- ft1_rden_b <= 1;
- ft2_rden_b <= 1;
- ft3_rden_b <= 1;
- q_rden_b <= 1;
- end
- end
- SLOW_LOOKUP: begin
- ft0_rden_b <= 0;
- ft1_rden_b <= 0;
- ft2_rden_b <= 0;
- ft3_rden_b <= 0;
- q_rden_b <= 0;
- // Address is not changed
- if (rd_valid_b) begin
- p_state <= P_ARB;
- end
- end
default: begin
$display("Error state!");
$finish;
diff --git a/pigasus/hardware/rtl_sim/src/reassembly/flow_table_wrapper.sv b/pigasus/hardware/rtl_sim/src/reassembly/flow_table_wrapper.sv
index f91c6af..154fdc5 100644
--- a/pigasus/hardware/rtl_sim/src/reassembly/flow_table_wrapper.sv
+++ b/pigasus/hardware/rtl_sim/src/reassembly/flow_table_wrapper.sv
@@ -1,5 +1,5 @@
`include "./src/struct_s.sv"
-//`define DEBUG
+// `define DEBUG
module flow_table_wrapper(
input logic clk,
input logic rst,
@@ -16,7 +16,11 @@ module flow_table_wrapper(
output metadata_t reorder_meta_data,
output logic reorder_meta_valid,
input logic reorder_meta_ready,
- input logic reorder_meta_almost_full
+ input logic reorder_meta_almost_full,
+ output metadata_t scheduler_meta_data,
+ output logic scheduler_meta_valid,
+ input logic scheduler_meta_ready,
+ input logic scheduler_meta_almost_full
);
tuple_t h0_tuple_in;
@@ -58,19 +62,10 @@ logic ch1_wren;
fce_t ch1_data;
logic ch1_insert_stall;
-// Read channel 2
-fce_meta_t ch2_meta;
-logic ch2_rden;
-logic ch2_ready;
-fce_t ch2_q;
-logic ch2_rd_valid;
-
-// Write channel 3
-logic [2:0] ch3_opcode;
-logic ch3_wren;
-logic ch3_ready;
-fce_t ch3_data;
-logic [PKT_AWIDTH-1:0] ch3_rel_pkt_cnt;
+// Write channel 2
+logic ft_update_fifo_empty;
+logic ft_update_fifo_rdreq;
+ft_update_t ft_update_fifo_q;
fce_meta_t ch0_meta_r1;
fce_meta_t ch0_meta_r2;
@@ -98,21 +93,61 @@ logic udp_pkt; // Not-forward pkt;
logic stall;
// Slow path
-metadata_t ooo_meta_data;
-logic ooo_meta_valid;
-logic ooo_meta_ready;
-metadata_t r_meta_data;
-logic r_meta_valid;
-logic r_meta_ready;
-fce_t ooo_fce_data;
-logic ooo_fce_valid;
-logic ooo_fce_ready;
-fce_t r_fce_data;
-logic r_fce_valid;
-logic r_fce_ready;
-logic [31:0] meta_csr_readdata;
-logic [31:0] ooo_csr_readdata;
-logic ooo_almost_full;
+metadata_t ooo_meta_data;
+logic ooo_meta_valid;
+logic ooo_meta_ready;
+metadata_t r_meta_data;
+logic r_meta_valid;
+logic r_meta_ready;
+fce_t ooo_fce_data;
+logic ooo_fce_valid;
+logic ooo_fce_ready;
+fce_t r_fce_data;
+logic r_fce_valid;
+logic r_fce_ready;
+logic [31:0] meta_csr_readdata;
+logic [31:0] ooo_csr_readdata;
+logic ooo_almost_full;
+scheduler_token_t out_sched_token_data;
+logic out_sched_token_valid;
+logic out_sched_token_ready;
+scheduler_token_t r_sched_token_data;
+logic r_sched_token_valid;
+logic r_sched_token_ready;
+logic [31:0] sched_token_csr_readdata;
+logic sched_token_fifo_almost_full;
+
+// OOO flow ID management
+logic ooo_flow_ids_fl_rdreq;
+logic ooo_flow_ids_fl_wrreq;
+logic ooo_flow_ids_fl_empty;
+ooo_flow_id_t ooo_flow_ids_fl_q;
+ooo_flow_id_t ooo_flow_ids_fl_data;
+
+logic ooo_flow_id_release_valid;
+ooo_flow_id_t ooo_flow_id_release_data;
+ooo_flow_id_t ooo_flow_ids_fl_data_init;
+
+typedef enum {
+ OOO_FLOW_IDS_FL_IDLE,
+ OOO_FLOW_IDS_FL_INIT,
+ OOO_FLOW_IDS_FL_INIT_DONE
+} ooo_flow_ids_fl_state_t;
+ooo_flow_ids_fl_state_t ooo_flow_ids_fl_state;
+
+initial begin
+ ooo_flow_ids_fl_state = OOO_FLOW_IDS_FL_IDLE;
+end
+
+assign ooo_flow_ids_fl_wrreq = (!rst && (
+ (ooo_flow_ids_fl_state == OOO_FLOW_IDS_FL_INIT) ||
+ ooo_flow_id_release_valid
+));
+
+assign ooo_flow_ids_fl_data = (
+ (ooo_flow_ids_fl_state == OOO_FLOW_IDS_FL_INIT) ?
+ ooo_flow_ids_fl_data_init : ooo_flow_id_release_data
+);
///////// Forward pkts /////////////////////////
// Packets which are either ACK (0 length) OR
@@ -154,14 +189,34 @@ always @(posedge clk) begin
end
always @(posedge clk) begin
+ ooo_flow_ids_fl_rdreq <= 0;
+ if (sched_token_fifo_almost_full) begin
+ $error("[FTW] Scheduler token FIFO is almost full");
+ $finish;
+ end
+
if (rst) begin
- ch1_opcode <= 0;
- ch1_bit_map <= 0;
- ch1_wren <= 0;
- ch1_data <= 0;
- out_meta_valid <= 0;
- ooo_meta_valid <= 0;
- ooo_fce_valid <= 0;
+ ch1_opcode <= 0;
+ ch1_bit_map <= 0;
+ ch1_wren <= 0;
+ ch1_data <= 0;
+ out_meta_valid <= 0;
+ ooo_meta_valid <= 0;
+ ooo_fce_valid <= 0;
+ ooo_flow_ids_fl_data_init <= 0;
+ out_sched_token_data <= 0;
+ out_sched_token_valid <= 0;
+ ooo_flow_ids_fl_state <= OOO_FLOW_IDS_FL_INIT;
+ end
+ // Initialize the OOO flow IDs free-list
+ else if (ooo_flow_ids_fl_state == OOO_FLOW_IDS_FL_INIT) begin
+ ooo_flow_ids_fl_data_init <= ooo_flow_ids_fl_data_init + 1;
+
+ if (ooo_flow_ids_fl_data_init ==
+ {OOO_FLOW_ID_AWIDTH{1'b1}}) begin
+ ooo_flow_ids_fl_state <= OOO_FLOW_IDS_FL_INIT_DONE;
+ $display("[FTW] Finish OOO flow ID free-list init");
+ end
end
// Stall the pipeline if inserting to full para_q
else if (!ch1_insert_stall) begin
@@ -183,6 +238,9 @@ always @(posedge clk) begin
ooo_meta_data <= m10;
ooo_meta_data.pkt_flags <= PKT_CHECK;
+ out_sched_token_data <= 0;
+ out_sched_token_valid <= 0;
+
ooo_fce_valid <= 0;
ooo_fce_data <= ch0_q;
if (ch0_rd_valid) begin
@@ -194,15 +252,17 @@ always @(posedge clk) begin
// Inorder and no LL_node, forward the pkt
if (m10.seq == ch0_q.seq) begin
`ifdef DEBUG
- $display("Inorder : pkt %d, seq %x, length %d, expect %x, slow_cnt %d",
- m10.pktID, m10.seq, m10.len, ch0_q.seq, ch0_q.slow_cnt);
+ $display("[FTW] Inorder: pkt %d, seq %x, length %d, expect %x, slow_cnt %d, ",
+ m10.pktID, m10.seq, m10.len, ch0_q.seq, ch0_q.slow_cnt,
+ "ooo_flow_id_valid %d, ooo_flow_id %d",
+ ch0_q.ooo_flow_id_valid, ch0_q.ooo_flow_id);
`endif
ch1_wren <= 1;
// Slow Path has packets, update cnt
if (ch0_q.slow_cnt > 0) begin
`ifdef DEBUG
- $display("Check LL");
+ $display("[FTW] Check LL");
`endif
ch1_data.slow_cnt <= ch0_q.slow_cnt + 1;
@@ -218,7 +278,7 @@ always @(posedge clk) begin
// Delete the fce, forward the pkt
if (m10.tcp_flags[TCP_FIN] | m10.tcp_flags[TCP_RST]) begin
`ifdef DEBUG
- $display("FIN/RST : pkt %d, seq %x, length %d, expect %x",
+ $display("[FTW] FIN/RST : pkt %d, seq %x, length %d, expect %x",
m10.pktID, m10.seq, m10.len, ch0_q.seq);
`endif
@@ -230,8 +290,10 @@ always @(posedge clk) begin
// If incoming seq is bigger than expected, push to slow path
else if (m10.seq > ch0_q.seq) begin
`ifdef DEBUG
- $display("OOO : pkt %d, seq %x, length %d, expect %x, slow_cnt %d",
- m10.pktID, m10.seq, m10.len, ch0_q.seq, ch0_q.slow_cnt);
+ $display("[FTW] OOO: pkt %d, seq %x, length %d, expect %x, slow_cnt %d, ",
+ m10.pktID, m10.seq, m10.len, ch0_q.seq, ch0_q.slow_cnt,
+ "ooo_flow_id_valid %d, ooo_flow_id %d",
+ ch0_q.ooo_flow_id_valid, ch0_q.ooo_flow_id);
`endif
// Update the slow_cnt
@@ -241,6 +303,33 @@ always @(posedge clk) begin
ch1_data.seq <= ch0_q.seq;
ooo_meta_valid <= 1;
ooo_fce_valid <= 1;
+
+ // If required, allocate a new OOO flow ID
+ if (!ch0_q.ooo_flow_id_valid) begin
+ ch1_data.ooo_flow_id_valid <= 1;
+ ch1_data.ooo_flow_id <= ooo_flow_ids_fl_q;
+ ooo_fce_data.ooo_flow_id <= ooo_flow_ids_fl_q;
+
+ ooo_flow_ids_fl_rdreq <= 1;
+ if (ooo_flow_ids_fl_empty) begin
+ $error("[FTW] OOO flow IDs free-list is empty");
+ $finish;
+ end
+
+ // Insert op into the scheduler token FIFO
+ out_sched_token_valid <= 1;
+ out_sched_token_data.tuple <= ch0_q.tuple;
+ out_sched_token_data.ooo_flow_id <= ooo_flow_ids_fl_q;
+
+ if (!out_sched_token_ready) begin
+ $error("[FTW] Scheduler token FIFO is not ready");
+ $finish;
+ end
+
+ `ifdef DEBUG
+ $display("[FTW] Allocated OOO flowID %d", ooo_flow_ids_fl_q);
+ `endif
+ end
end
// The incoming seq is smaller than expected (overlapping bytes).
// Current policy drops these packet, without changing the FCE.
@@ -249,7 +338,7 @@ always @(posedge clk) begin
out_meta_data.pkt_flags <= PKT_DROP;
`ifdef DEBUG
- $display("Overlap : pkt %d, seq %x, length %d, expect %x",
+ $display("[FTW] Overlap: pkt %d, seq %x, length %d, expect %x",
m10.pktID, m10.seq, m10.len, ch0_q.seq);
`endif
end
@@ -257,17 +346,17 @@ always @(posedge clk) begin
// Miss, insert
else begin
// Insert to para_q, which is bit[4]
- ch1_opcode <= FT_INSERT;
- ch1_bit_map <= 5'b1_0000;
- ch1_data.valid <= 1;
- ch1_data.tuple <= m10.tuple;
- ch1_data.pointer <= 0;
- ch1_data.ll_valid <= 0;
- ch1_data.slow_cnt <= 0;
- ch1_data.addr0 <= ch0_meta_r3.addr0;
- ch1_data.addr1 <= ch0_meta_r3.addr1;
- ch1_data.addr2 <= ch0_meta_r3.addr2;
- ch1_data.addr3 <= ch0_meta_r3.addr3;
+ ch1_opcode <= FT_INSERT;
+ ch1_bit_map <= 5'b1_0000;
+ ch1_data.valid <= 1;
+ ch1_data.tuple <= m10.tuple;
+ ch1_data.slow_cnt <= 0;
+ ch1_data.addr0 <= ch0_meta_r3.addr0;
+ ch1_data.addr1 <= ch0_meta_r3.addr1;
+ ch1_data.addr2 <= ch0_meta_r3.addr2;
+ ch1_data.addr3 <= ch0_meta_r3.addr3;
+ ch1_data.ooo_flow_id <= 0;
+ ch1_data.ooo_flow_id_valid <= 0;
// SYN's expected seq is special
if (m10.tcp_flags[TCP_SYN]) begin
@@ -287,7 +376,7 @@ always @(posedge clk) begin
ch1_wren <= 1;
end
`ifdef DEBUG
- $display("Insert : pkt %d, seq %x, length %d, expect %x, slow_cnt %d",
+ $display("[FTW] Insert : pkt %d, seq %x, length %d, expect %x, slow_cnt %d",
m10.pktID, m10.seq, m10.len, ch0_q.seq, ch0_q.slow_cnt);
`endif
@@ -383,29 +472,24 @@ hash_func hash3 (
);
flow_table flow_table_inst (
- .clk (clk),
- .rst (rst),
- .ch0_meta (ch0_meta),
- .ch0_rden (ch0_rden),
- .ch0_q (ch0_q),
- .ch0_rd_valid (ch0_rd_valid),
- .ch0_bit_map (ch0_bit_map),
- .ch0_rd_stall (ch0_rd_stall),
- .ch1_opcode (ch1_opcode),
- .ch1_bit_map (ch1_bit_map),
- .ch1_wren (ch1_wren),
- .ch1_data (ch1_data),
- .ch1_insert_stall (ch1_insert_stall),
- .ch2_meta (ch2_meta),
- .ch2_rden (ch2_rden),
- .ch2_ready (ch2_ready),
- .ch2_q (ch2_q),
- .ch2_rd_valid (ch2_rd_valid),
- .ch3_opcode (ch3_opcode),
- .ch3_wren (ch3_wren),
- .ch3_ready (ch3_ready),
- .ch3_data (ch3_data),
- .ch3_rel_pkt_cnt (ch3_rel_pkt_cnt)
+ .clk (clk),
+ .rst (rst),
+ .ch0_meta (ch0_meta),
+ .ch0_rden (ch0_rden),
+ .ch0_q (ch0_q),
+ .ch0_rd_valid (ch0_rd_valid),
+ .ch0_bit_map (ch0_bit_map),
+ .ch0_rd_stall (ch0_rd_stall),
+ .ch1_opcode (ch1_opcode),
+ .ch1_bit_map (ch1_bit_map),
+ .ch1_wren (ch1_wren),
+ .ch1_data (ch1_data),
+ .ch1_insert_stall (ch1_insert_stall),
+ .ch2_wren (!ft_update_fifo_empty),
+ .ch2_data (ft_update_fifo_q),
+ .ch2_ready (ft_update_fifo_rdreq),
+ .ooo_flow_id_release_data (ooo_flow_id_release_data),
+ .ooo_flow_id_release_valid (ooo_flow_id_release_valid)
);
unified_fifo #(
@@ -458,29 +542,72 @@ unified_fifo #(
.overflow ()
);
-flow_reassembly flow_reassembly_inst (
- .clk (clk),
- .rst (rst),
- .meta_data (r_meta_data),
- .meta_valid (r_meta_valid),
- .meta_ready (r_meta_ready),
- .fce_data (r_fce_data),
- .fce_valid (r_fce_valid),
- .fce_ready (r_fce_ready),
- .ch2_meta (ch2_meta),
- .ch2_rden (ch2_rden),
- .ch2_ready (ch2_ready),
- .ch2_q (ch2_q),
- .ch2_rd_valid (ch2_rd_valid),
- .ch3_opcode (ch3_opcode),
- .ch3_wren (ch3_wren),
- .ch3_ready (ch3_ready),
- .ch3_data (ch3_data),
- .ch3_rel_pkt_cnt (ch3_rel_pkt_cnt),
- .reorder_meta (reorder_meta_data),
- .reorder_valid (reorder_meta_valid),
- .reorder_ready (reorder_meta_ready),
- .reorder_almost_full (reorder_meta_almost_full)
+scheduler_reassembly scheduler_inst(
+ .clk (clk),
+ .rst (rst),
+ .in_meta_data (r_meta_data),
+ .in_meta_valid (r_meta_valid),
+ .in_meta_ready (r_meta_ready),
+ .in_fce_data (r_fce_data),
+ .in_fce_valid (r_fce_valid),
+ .in_fce_ready (r_fce_ready),
+ .in_token_data (r_sched_token_data),
+ .in_token_valid (r_sched_token_valid),
+ .in_token_ready (r_sched_token_ready),
+ .ft_update_fifo_empty (ft_update_fifo_empty),
+ .ft_update_fifo_rdreq (ft_update_fifo_rdreq),
+ .ft_update_fifo_q (ft_update_fifo_q),
+ .out_sched_fifo_meta (scheduler_meta_data),
+ .out_sched_fifo_valid (scheduler_meta_valid),
+ .out_sched_fifo_ready (scheduler_meta_ready),
+ .out_sched_fifo_almost_full (scheduler_meta_almost_full),
+ .out_reassembly_fifo_meta (reorder_meta_data),
+ .out_reassembly_fifo_valid (reorder_meta_valid),
+ .out_reassembly_fifo_ready (reorder_meta_ready),
+ .out_reassembly_fifo_almost_full(reorder_meta_almost_full)
+);
+
+// OOO flow IDs free-list
+sc_fifo #(
+ .IS_SHOWAHEAD(1),
+ .IS_OUTDATA_REG(0),
+ .DEPTH(MAX_NUM_OOO_FLOWS),
+ .DWIDTH(OOO_FLOW_ID_AWIDTH)
+)
+ooo_flow_ids_fl_fifo (
+ .clock(clk),
+ .data(ooo_flow_ids_fl_data),
+ .rdreq(ooo_flow_ids_fl_rdreq),
+ .wrreq(ooo_flow_ids_fl_wrreq),
+ .empty(ooo_flow_ids_fl_empty),
+ .full(), // Unused
+ .q(ooo_flow_ids_fl_q),
+ .usedw() // Unused
+);
+
+unified_fifo #(
+ .FIFO_NAME ("[flow_table_wrapper] scheduler_token_fifo"),
+ .MEM_TYPE ("M20K"),
+ .DUAL_CLOCK (0),
+ .USE_ALMOST_FULL (1),
+ .FULL_LEVEL (112),
+ .SYMBOLS_PER_BEAT (1),
+ .BITS_PER_SYMBOL (SCHEDULER_TOKEN_T_WIDTH),
+ .FIFO_DEPTH (128)
+) scheduler_token_fifo (
+ .in_clk (clk),
+ .in_reset (rst),
+ .out_clk (),//not used
+ .out_reset (),
+ .in_data (out_sched_token_data),
+ .in_valid (out_sched_token_valid),
+ .in_ready (out_sched_token_ready),
+ .out_data (r_sched_token_data),
+ .out_valid (r_sched_token_valid),
+ .out_ready (r_sched_token_ready),
+ .fill_level (),
+ .almost_full (sched_token_fifo_almost_full),
+ .overflow ()
);
endmodule
diff --git a/pigasus/hardware/rtl_sim/src/reassembly/linked_list.sv b/pigasus/hardware/rtl_sim/src/reassembly/linked_list.sv
index 9b90f45..874944e 100644
--- a/pigasus/hardware/rtl_sim/src/reassembly/linked_list.sv
+++ b/pigasus/hardware/rtl_sim/src/reassembly/linked_list.sv
@@ -1,4 +1,5 @@
`include "./src/struct_s.sv"
+// `define DEBUG
module linked_list(
input logic clk,
@@ -6,6 +7,7 @@ module linked_list(
input logic start,
output logic done,
output logic discard,
+ input logic is_newlist,
input logic [LL_AWIDTH-1:0] pointer,
input logic [PKT_AWIDTH-1:0] pktID,
input logic [4:0] flits,
@@ -22,7 +24,14 @@ module linked_list(
input logic [LL_AWIDTH-1:0] head_wr_addr,
input logic load_empty_pointer,
output logic [LL_AWIDTH-1:0] empty_pointer,
- output logic empty_pointer_valid
+ output logic empty_pointer_valid,
+ // Garbage collection
+ output logic gc_ready,
+ input logic gc_start,
+ input logic [LL_AWIDTH-1:0] gc_pointer,
+ output entry_t gc_entry,
+ output logic gc_done,
+ output logic [31:0] gc_fl_fill_level
);
typedef enum {
@@ -66,9 +75,29 @@ logic [LL_AWIDTH-1:0] emptylist_in_data;
logic emptylist_out_ready;
logic emptylist_out_valid;
logic [LL_AWIDTH-1:0] emptylist_out_data;
+logic emptylist_enque_busy;
assign empty_entry = 0;
+// Port A
+logic [LL_AWIDTH-1:0] ll_addr;
+assign ll_addr = ll_wr ? ll_wr_addr : ll_rd_addr;
+
+// Port B
+entry_t gc_rd_data;
+logic [LL_AWIDTH-1:0] gc_rd_addr;
+logic gc_rd;
+logic gc_rd_valid;
+logic gc_rd_r1;
+
+// GC logic
+assign emptylist_enque_busy = ((state == INIT) ||
+ ((state == IDLE) && store_head));
+
+assign gc_ready = (!emptylist_enque_busy && emptylist_in_ready);
+assign gc_entry = gc_rd_data;
+assign gc_done = gc_rd_valid;
+
// Assume no repeat, no overlapping, no merging
always @ (posedge clk) begin
if (rst) begin
@@ -88,6 +117,9 @@ always @ (posedge clk) begin
ll_rd <= 0;
discard <= 1'b0;
+ gc_rd_addr <= 0;
+ gc_rd <= 0;
+
emptylist_in_data <= 0;
emptylist_in_valid <= 0;
emptylist_out_ready <= 0;
@@ -104,6 +136,18 @@ always @ (posedge clk) begin
$error("Insert to full LL_emptylsit");
$finish;
end
+ assert (!(ll_wr && ll_rd))
+ else begin
+ $error("Cannot perform simultaneous LL read/write");
+ $finish;
+ end
+ assert (!((ll_wr || ll_rd) && gc_rd && (ll_addr == gc_rd_addr)))
+ else begin
+ $error("LL and GC accesses should never collide");
+ $finish;
+ end
+
+ emptylist_in_valid <= 1'b0;
case (state)
INIT: begin
emptylist_in_valid <= 1'b1;
@@ -112,13 +156,12 @@ always @ (posedge clk) begin
if (emptylist_in_data == {LL_AWIDTH{1'b1}}) begin
state <= IDLE;
emptylist_in_valid <= 1'b0;
- $display("Finish LL emptylist init");
+ $display("[LL] Finish LL emptylist init");
end
end
end
IDLE: begin
// Initialize states
- emptylist_in_valid <= 1'b0;
cycle <= 0;
done <= 0;
entry <= empty_entry;
@@ -144,16 +187,11 @@ always @ (posedge clk) begin
ll_rd <= 0;
end
+ ll_wr <= 0;
if (store_head) begin
- ll_wr <= 1;
- ll_wr_data <= head_in;
- ll_wr_addr <= head_wr_addr;
emptylist_in_valid <= 1'b1;
emptylist_in_data <= head_wr_addr;
end
- else begin
- ll_wr <= 0;
- end
if (ll_rd_valid) begin
head_out_valid <= 1;
@@ -186,14 +224,27 @@ always @ (posedge clk) begin
end
end
COMPARE: begin
- if (entry.valid) begin
+ if (is_newlist) begin
+ `ifdef DEBUG
+ $display("[LL] Insert to empty list");
+ `endif
+
+ ll_wr <= 1;
+ ll_wr_addr <= addr;
+ ll_wr_data <= '{1, seq, len, 0, pktID,
+ flits, 1, last_7_bytes};
+
+ done <= 1;
+ state <= IDLE;
+ end
+ else if (entry.valid) begin
// Check whether the new node should be
// inserted in front of the current one.
if (end_p < entry.seq) begin
// Insert in front of the first node of the list
if (first) begin
`ifdef DEBUG
- $display("Insert in front \n");
+ $display("[LL] Insert in front \n");
`endif
//list_table[next_empty] <= '{1,seq,len,addr,pktID,0};
@@ -240,7 +291,7 @@ always @ (posedge clk) begin
// Insert new node
8'd3: begin
`ifdef DEBUG
- $display("Insert in between\n");
+ $display("[LL] Insert in between\n");
`endif
ll_wr <= 1;
@@ -278,7 +329,7 @@ always @ (posedge clk) begin
// Insert new node to the end of the list
8'd1: begin
`ifdef DEBUG
- $display("Insert in the end \n");
+ $display("[LL] Insert in the end \n");
`endif
ll_wr <= 1;
@@ -297,7 +348,7 @@ always @ (posedge clk) begin
// Pass the check of current node, check the next one
else begin
`ifdef DEBUG
- $display("Pass this\n");
+ $display("[LL] Pass this\n");
`endif
addr <= entry.next;
@@ -308,7 +359,7 @@ always @ (posedge clk) begin
end
else begin
`ifdef DEBUG
- $display("Overlap with node in LL");
+ $display("[LL] Overlap with node in LL");
`endif
done <= 1;
@@ -317,26 +368,28 @@ always @ (posedge clk) begin
end
end
else begin
- // Point to an empty location.
- if (first) begin
- `ifdef DEBUG
- $display("Insert to empty list");
- `endif
-
- ll_wr <= 1;
- ll_wr_addr <= addr;
- ll_wr_data <= '{1, seq, len, 0, pktID,
- flits, 1, last_7_bytes};
- end
- done <= 1;
- state <= IDLE;
+ $display("[LL] Error: LL is not empty, but entry is invalid");
+ $finish;
end
end
default: begin
- $display("Error state!");
+ $display("[LL] Error state!");
$finish;
end
endcase
+
+ /**
+ * Garbage-collection (GC) logic.
+ */
+ gc_rd <= 0;
+ gc_rd_addr <= 0;
+ if (gc_start && gc_ready) begin
+ gc_rd_addr <= gc_pointer;
+ gc_rd <= 1;
+
+ emptylist_in_data <= gc_pointer;
+ emptylist_in_valid <= 1'b1;
+ end
end
end
@@ -344,21 +397,28 @@ end
always @(posedge clk) begin
ll_rd_r1 <= ll_rd;
ll_rd_valid <= ll_rd_r1;
+
+ gc_rd_r1 <= gc_rd;
+ gc_rd_valid <= gc_rd_r1;
end
-bram_simple2port #(
+bram_true2port #(
.AWIDTH(LL_AWIDTH),
.DWIDTH(LL_DWIDTH),
.DEPTH(LL_DEPTH)
)
linked_list_mem (
- .clock (clk),
- .data (ll_wr_data),
- .rdaddress (ll_rd_addr),
- .rden (ll_rd),
- .wraddress (ll_wr_addr),
- .wren (ll_wr),
- .q (ll_rd_data)
+ .address_a(ll_addr),
+ .address_b(gc_rd_addr),
+ .clock(clk),
+ .data_a(ll_wr_data),
+ .data_b({LL_DWIDTH{1'b0}}), // GC writes disabled
+ .rden_a(ll_rd),
+ .rden_b(gc_rd),
+ .wren_a(ll_wr),
+ .wren_b(1'b0), // GC writes disabled
+ .q_a(ll_rd_data),
+ .q_b(gc_rd_data)
);
unified_fifo #(
@@ -382,7 +442,7 @@ ll_emptylist (
.out_data (emptylist_out_data),
.out_valid (emptylist_out_valid),
.out_ready (emptylist_out_ready),
- .fill_level (),
+ .fill_level (gc_fl_fill_level),
.almost_full (),//not used
.overflow ()
);
diff --git a/pigasus/hardware/rtl_sim/src/reassembly/scheduler_reassembly.sv b/pigasus/hardware/rtl_sim/src/reassembly/scheduler_reassembly.sv
new file mode 100644
index 0000000..bf6f56b
--- /dev/null
+++ b/pigasus/hardware/rtl_sim/src/reassembly/scheduler_reassembly.sv
@@ -0,0 +1,1597 @@
+`include "./src/struct_s.sv"
+// `define DEBUG
+
+/**
+ * Implements a scheduler for the flow reassembly module, with three key
+ * responsibilities:
+ *
+ * 1. Maintaining TCP flow state for out-of-order (OOO) flows. Each OOO
+ * flow is assigned an OOO Flow ID in {0, ..., MAX_NUM_OOO_FLOWS-1},
+ * which is stored as part of the flow's context in the primary flow
+ * table. In turn, this serves as a pointer into the scheduler's OOO
+ * flow table (implemented using a simple hash-table in BRAM), where
+ * the complete TCP flow state (e.g., next expected sequence number,
+ * pointer to the reassembly linked list, etc.) resides. To allocate
+ * an OOO flow context, the primary FT issues a "token" containing
+ * the relevant flow metadata and a valid OOO flow ID.
+ *
+ * 2. Deciding which OOO flow should be served next by the reassembler.
+ * As part of the flow state, the scheduler maintains "flow queues":
+ * ordered, per-flow lists of packets that are yet to be serviced by
+ * the reassembler module. When a new packet arrives, it is inserted
+ * at the *tail* of the corresponding flow queue. The scheduler also
+ * maintains a global data-structure -- hereafter referred to as the
+ * scheduling queue -- to decide which flow to seve next. In making
+ * this decision, the scheduler pops the next "active" flow (an OOO
+ * flow that has >= 1 packet in its flow queue and isn't marked for
+ * garbage-collection) from the scheduling queue, deques the head
+ * element from its flow queue, and forwards this packet to the re-
+ * assembler. Updates from the reassembler (i.e. changes in the TCP
+ * flow state) are subsequently written back to the OOO flow table.
+ *
+ * 3. Flow garbage-collection (GC). Lastly, the scheduler is responsible
+ * for managing memory corresponding to: (a) the flow queues, and (b)
+ * the OOO linked lists in the reassembler. When the available memory
+ * in either module falls below some pre-determined watermark levels,
+ * the scheduler first marks a certain flow for GC (deactivating it),
+ * then issues a request to GC FSMs in the scheduler and reassembler.
+ * Note that any incoming packets corresponding to a marked flow are
+ * dropped by the scheduler. When the GC operations in both modules
+ * complete, the scheduler drops the flow by deallocating its context
+ * in the primary flow table.
+ */
+module scheduler_reassembly(
+ // General inputs
+ input logic clk,
+ input logic rst,
+ // Input FIFO
+ input metadata_t in_meta_data,
+ input logic in_meta_valid,
+ output logic in_meta_ready,
+ input fce_t in_fce_data,
+ input logic in_fce_valid,
+ output logic in_fce_ready,
+ // Incoming FT tokens
+ input scheduler_token_t in_token_data,
+ input logic in_token_valid,
+ output logic in_token_ready,
+ // Outgoing FT updates
+ output logic ft_update_fifo_empty,
+ input logic ft_update_fifo_rdreq,
+ output ft_update_t ft_update_fifo_q,
+ // Output FIFO (Scheduler)
+ output metadata_t out_sched_fifo_meta,
+ output logic out_sched_fifo_valid,
+ input logic out_sched_fifo_ready,
+ input logic out_sched_fifo_almost_full,
+ // Output FIFO (Reassembly)
+ output metadata_t out_reassembly_fifo_meta,
+ output logic out_reassembly_fifo_valid,
+ input logic out_reassembly_fifo_ready,
+ input logic out_reassembly_fifo_almost_full
+);
+
+integer i;
+
+/**
+ * Local parameters.
+ */
+// Free lists
+localparam OOO_FLOW_FL_LOW_WATERMARK = 32;
+localparam REASSEMBLY_FL_LOW_WATERMARK = 32;
+// Miscellaneous
+localparam REASSEMBLY_GC_FIFO_DEPTH = 4;
+localparam FT_UPDATE_FIFO_DEPTH = 16;
+localparam HEAP_PRIORITY_PADDED_AWIDTH = (HEAP_PRIORITY_AWIDTH + 3);
+localparam PRIORITY_SCALING_FACTOR = (HEAP_NUM_PRIORITIES / 128);
+localparam LOG_PRIORITY_SCALING_FACTOR = ($clog2(PRIORITY_SCALING_FACTOR));
+localparam OOO_FLOW_LL_INVALID_PTR = {OOO_FLOW_LL_ENTRY_PTR_T_WIDTH{1'b1}};
+
+/**
+ * Local typedefs.
+ */
+typedef logic [HEAP_PRIORITY_PADDED_AWIDTH-1:0] heap_priority_padded_t;
+
+// Free list initialization FSM
+typedef enum logic [1:0] {
+ FL_FSM_STATE_IDLE,
+ FL_FSM_STATE_INIT,
+ FL_FSM_STATE_INIT_DONE
+} ooo_flow_ll_fl_fsm_state_t;
+
+// Flow table service FSM
+typedef enum logic [2:0] {
+ FT_SERVICE_FSM_STATE_IDLE,
+ FT_SERVICE_FSM_STATE_DATA_RD,
+ FT_SERVICE_FSM_STATE_DATA_WR,
+ FT_SERVICE_FSM_STATE_DATA_INSERT_HEAP,
+ FT_SERVICE_FSM_STATE_CTRL_RD,
+ FT_SERVICE_FSM_STATE_CTRL_WR
+} ft_service_fsm_state_t;
+
+// Reassembly service FSM
+typedef enum logic [2:0] {
+ REASSEMBLY_SERVICE_FSM_STATE_IDLE,
+ REASSEMBLY_SERVICE_FSM_STATE_FC_RD,
+ REASSEMBLY_SERVICE_FSM_STATE_LL_RD,
+ REASSEMBLY_SERVICE_FSM_STATE_RD_DONE,
+ REASSEMBLY_SERVICE_FSM_STATE_FC_WR,
+ REASSEMBLY_SERVICE_FSM_STATE_INSERT_HEAP
+} reassembly_service_fsm_state_t;
+
+// Garbage-collection FSM
+typedef enum logic [2:0] {
+ GC_FSM_STATE_IDLE,
+ GC_FSM_STATE_FC_RD,
+ GC_FSM_STATE_INIT_REASSEMBLY_GC,
+ GC_FSM_STATE_LL_RD,
+ GC_FSM_STATE_LL_RD_DONE
+} gc_fsm_state_t;
+
+// Garbage-collection response FSM
+typedef enum logic {
+ GC_RESPONSE_FSM_STATE_IDLE,
+ GC_RESPONSE_FSM_STATE_UPDATE_FT
+} gc_response_fsm_state_t;
+
+/**
+ * Submodule logic.
+ */
+// Free list
+logic fl_rdreq;
+logic fl_wrreq;
+logic fl_empty;
+logic [OOO_FLOW_LL_ENTRY_AWIDTH-1:0] fl_q;
+logic [OOO_FLOW_LL_ENTRY_AWIDTH-1:0] fl_data;
+
+// Flow table update FIFO
+logic ft_update_fifo_full;
+logic ft_update_fifo_wrreq;
+ft_update_t ft_update_fifo_data;
+
+// OOO flow context table
+logic fc_rden_a;
+logic fc_rden_b;
+logic fc_wren_a;
+logic fc_wren_b;
+ooo_flow_fc_entry_t fc_q_a;
+ooo_flow_fc_entry_t fc_q_b;
+ooo_flow_fc_entry_t fc_data_a;
+ooo_flow_fc_entry_t fc_data_b;
+logic [OOO_FLOW_ID_AWIDTH-1:0] fc_address_a;
+logic [OOO_FLOW_ID_AWIDTH-1:0] fc_address_b;
+
+// Linked-list (value) entries
+logic lle_meta_rden_b;
+logic lle_meta_wren_a;
+metadata_t lle_meta_q_a;
+metadata_t lle_meta_q_b;
+metadata_t lle_meta_data_a;
+metadata_t lle_meta_data_b;
+logic [OOO_FLOW_LL_ENTRY_AWIDTH-1:0] lle_meta_address_a;
+logic [OOO_FLOW_LL_ENTRY_AWIDTH-1:0] lle_meta_address_b;
+
+// Linked-list (next pointer) entries
+logic lle_nextptr_rden_b;
+logic lle_nextptr_wren_a;
+ooo_flow_ll_entry_ptr_t lle_nextptr_q_a;
+ooo_flow_ll_entry_ptr_t lle_nextptr_q_b;
+ooo_flow_ll_entry_ptr_t lle_nextptr_data_a;
+ooo_flow_ll_entry_ptr_t lle_nextptr_data_b;
+logic [OOO_FLOW_LL_ENTRY_AWIDTH-1:0] lle_nextptr_address_a;
+logic [OOO_FLOW_LL_ENTRY_AWIDTH-1:0] lle_nextptr_address_b;
+
+/**
+ * Flow reassembly.
+ */
+// Read channel
+logic reassembly_rdch_rden;
+logic reassembly_rdch_ready;
+logic reassembly_rdch_rd_valid;
+metadata_t reassembly_rdch_meta;
+ooo_flow_fc_entry_t reassembly_rdch_fce;
+ooo_flow_id_t reassembly_rdch_ooo_flow_id;
+logic reassembly_rdch_rd_ooo_flow_invalid;
+// Write channel
+logic reassembly_wrch_wren;
+logic reassembly_wrch_ready;
+logic reassembly_wrch_is_delete;
+ooo_flow_fc_entry_t reassembly_wrch_update_fce;
+logic [PKT_AWIDTH-1:0] reassembly_wrch_rel_pkt_cnt;
+// Debug
+logic reassembly_state_idle;
+
+// Reassembly service queue
+logic heap_in_enque_en;
+logic heap_in_enque_ready;
+heap_priority_t heap_in_enque_priority;
+ooo_flow_id_t heap_in_enque_ooo_flow_id;
+heap_priority_padded_t heap_in_enque_priority_padded;
+logic heap_out_deque_min_en;
+logic heap_out_deque_min_ready;
+heap_priority_t heap_out_deque_min_priority;
+ooo_flow_id_t heap_out_deque_min_ooo_flow_id;
+logic heap_in_deque_max_req_en;
+logic heap_in_deque_max_req_ready;
+logic heap_out_deque_max_en;
+logic heap_out_deque_max_ready;
+heap_priority_t heap_out_deque_max_priority;
+ooo_flow_id_t heap_out_deque_max_ooo_flow_id;
+
+/**
+ * Housekeeping.
+ */
+// Scheduler output FIFO
+logic out_sched_fifo_valid_int;
+metadata_t out_sched_fifo_meta_int;
+
+// Garbage-collection flow state
+logic gc_state[MAX_NUM_OOO_FLOWS-1:0];
+logic gc_rden[1:0];
+logic gc_wren[1:0];
+logic gc_rd_q[1:0];
+logic gc_wr_data[1:0];
+logic [OOO_FLOW_ID_AWIDTH-1:0] gc_rd_address[1:0];
+logic [OOO_FLOW_ID_AWIDTH-1:0] gc_wr_address[1:0];
+
+// Heap priority computation
+logic exponent_pos;
+logic [2:0] exponent;
+logic [4:0] num_flits;
+logic [2:0] log2_num_flits;
+
+// Free list management
+ooo_flow_ll_fl_fsm_state_t fl_fsm_state;
+ooo_flow_ll_fl_fsm_state_t fl_fsm_state_next;
+logic [OOO_FLOW_LL_ENTRY_AWIDTH-1:0] fl_data_init;
+logic [OOO_FLOW_LL_ENTRY_AWIDTH-1:0] fl_data_init_next;
+
+// FT service FSM
+logic ft_service_fsm_ooo_flow_valid;
+ft_service_fsm_state_t ft_service_fsm_state;
+ft_service_fsm_state_t ft_service_fsm_state_next;
+
+// Reassembly service FSM
+metadata_t reassembly_service_fsm_lle_meta_q;
+logic reassembly_service_fsm_fc_data_latch_en;
+logic reassembly_service_fsm_ooo_flow_ll_empty;
+ooo_flow_fc_entry_t reassembly_service_fsm_fc_q;
+reassembly_service_fsm_state_t reassembly_service_fsm_state;
+ooo_flow_ll_entry_ptr_t reassembly_service_fsm_lle_nextptr_q;
+reassembly_service_fsm_state_t reassembly_service_fsm_state_next;
+logic [OOO_FLOW_ID_AWIDTH-1:0] reassembly_service_fsm_fc_address;
+logic [OOO_FLOW_LL_ENTRY_AWIDTH-1:0] reassembly_service_fsm_lle_address;
+
+metadata_t reassembly_service_fsm_lle_meta_q_r;
+ooo_flow_fc_entry_t reassembly_service_fsm_fc_q_r;
+ooo_flow_fc_entry_t reassembly_service_fsm_fc_data_r;
+ooo_flow_ll_entry_ptr_t reassembly_service_fsm_lle_nextptr_q_r;
+logic [OOO_FLOW_ID_AWIDTH-1:0] reassembly_service_fsm_fc_address_r;
+logic [OOO_FLOW_LL_ENTRY_AWIDTH-1:0] reassembly_service_fsm_lle_address_r;
+
+// Garbage-collection FSM
+logic gc_lle_rden;
+metadata_t gc_lle_meta_q;
+ooo_flow_fc_entry_t gc_fc_q;
+gc_fsm_state_t gc_fsm_state;
+logic gc_ooo_flow_id_valid;
+ooo_flow_id_t gc_ooo_flow_id;
+logic gc_inc_ooo_flow_fl_size;
+logic gc_dec_ooo_flow_fl_size;
+gc_fsm_state_t gc_fsm_state_next;
+logic [31:0] gc_reassembly_fl_size;
+ooo_flow_ll_entry_ptr_t gc_lle_nextptr_q;
+ooo_flow_ll_entry_ptr_t gc_lle_currptr_q;
+logic [OOO_FLOW_LL_ENTRY_AWIDTH:0] gc_ooo_flow_fl_size_r;
+
+logic gc_lle_rden_r;
+metadata_t gc_lle_meta_q_r;
+logic gc_ooo_flow_id_valid_r;
+ooo_flow_fc_entry_t gc_fc_q_r;
+ooo_flow_id_t gc_ooo_flow_id_r;
+ooo_flow_ll_entry_ptr_t gc_lle_nextptr_q_r;
+ooo_flow_ll_entry_ptr_t gc_lle_currptr_q_r;
+
+// Garbage-collection response FSM
+reassembly_gc_meta_t gc_response_meta;
+gc_response_fsm_state_t gc_response_fsm_state;
+gc_response_fsm_state_t gc_response_fsm_state_next;
+
+reassembly_gc_meta_t gc_response_meta_r;
+
+// Scheduler->Reassembly GC request FIFO
+logic reassembly_gc_req_fifo_full;
+logic reassembly_gc_req_fifo_rdreq;
+logic reassembly_gc_req_fifo_wrreq;
+logic reassembly_gc_req_fifo_empty;
+logic reassembly_gc_req_state_idle;
+reassembly_gc_req_t reassembly_gc_req_fifo_q;
+reassembly_gc_req_t reassembly_gc_req_fifo_data;
+
+// Reassembly->Scheduler GC response FIFO
+logic reassembly_gc_rsp_fifo_full;
+logic reassembly_gc_rsp_fifo_rdreq;
+logic reassembly_gc_rsp_fifo_wrreq;
+logic reassembly_gc_rsp_fifo_empty;
+reassembly_gc_meta_t reassembly_gc_rsp_fifo_q;
+reassembly_gc_meta_t reassembly_gc_rsp_fifo_data;
+
+// Miscellaneous signals
+logic fc_rden_b_r;
+fce_t in_fce_data_r1;
+fce_t in_fce_data_r2;
+heap_size_t heap_size;
+logic lle_nextptr_rden_b_r;
+metadata_t in_meta_data_r1;
+metadata_t in_meta_data_r2;
+ooo_flow_fc_entry_t fc_q_a_r;
+ooo_flow_fc_entry_t fc_data_a_r;
+scheduler_token_t in_token_data_r;
+logic [OOO_FLOW_LL_ENTRY_AWIDTH-1:0] fl_q_r;
+ooo_flow_ll_entry_ptr_t lle_nextptr_data_a_r;
+
+// Debug
+logic gc_inc_cur_ll_release_cnt;
+logic gc_reset_cur_ll_release_cnt;
+logic [OOO_FLOW_LL_ENTRY_AWIDTH-1:0] gc_cur_ll_release_cnt_r;
+
+initial begin
+ fl_q_r = 0;
+ fc_q_a_r = 0;
+ fc_data_a_r = 0;
+ fc_rden_b_r = 0;
+ lle_nextptr_data_a_r = 0;
+ lle_nextptr_rden_b_r = 0;
+
+ in_fce_data_r1 = 0;
+ in_fce_data_r2 = 0;
+ in_meta_data_r1 = 0;
+ in_meta_data_r2 = 0;
+ in_token_data_r = 0;
+
+ reassembly_service_fsm_fc_q_r = 0;
+ reassembly_service_fsm_fc_data_r = 0;
+ reassembly_service_fsm_lle_meta_q_r = 0;
+ reassembly_service_fsm_fc_address_r = 0;
+ reassembly_service_fsm_lle_address_r = 0;
+ reassembly_service_fsm_lle_nextptr_q_r = 0;
+
+ gc_fc_q_r = 0;
+ gc_lle_rden_r = 0;
+ gc_lle_meta_q_r = 0;
+ gc_ooo_flow_id_r = 0;
+ gc_lle_nextptr_q_r = 0;
+ gc_lle_currptr_q_r = 0;
+ gc_ooo_flow_fl_size_r = 0;
+ gc_ooo_flow_id_valid_r = 0;
+
+ gc_response_meta_r = 0;
+
+ for (i = 0; i < MAX_NUM_OOO_FLOWS; i = i + 1) begin
+ gc_state[i] = 0;
+ end
+
+ gc_cur_ll_release_cnt_r = 0;
+
+ fl_data_init = 0;
+ fl_fsm_state = FL_FSM_STATE_IDLE;
+ gc_fsm_state = GC_FSM_STATE_IDLE;
+ ft_service_fsm_state = FT_SERVICE_FSM_STATE_IDLE;
+ gc_response_fsm_state = GC_RESPONSE_FSM_STATE_IDLE;
+ reassembly_service_fsm_state = REASSEMBLY_SERVICE_FSM_STATE_IDLE;
+end
+
+// Heap priority, P, computed as follows:
+// P = (ll_size / num_flits) * PRIORITY_SCALING_FACTOR
+// P = ll_size * (PRIORITY_SCALING_FACTOR / num_flits)
+// P = ll_size * 2^(log2(PRIORITY_SCALING_FACTOR / num_flits))
+// P = ll_size * 2^(LOG_PRIORITY_SCALING_FACTOR - log2(num_flits))
+// Then, P ~ ll_size << (LOG_PRIORITY_SCALING_FACTOR - log2(num_flits))
+assign log2_num_flits = (num_flits <= 1) ? 0 :
+ (num_flits == 2) ? 1 :
+ (num_flits <= 5) ? 2 :
+ (num_flits <= 11) ? 3 :
+ (num_flits <= 22) ? 4 : 5;
+
+assign exponent_pos = (LOG_PRIORITY_SCALING_FACTOR >= log2_num_flits);
+assign exponent = (exponent_pos ? (LOG_PRIORITY_SCALING_FACTOR - log2_num_flits) :
+ (log2_num_flits - LOG_PRIORITY_SCALING_FACTOR));
+// Reassembler logic
+assign reassembly_rdch_fce = reassembly_service_fsm_fc_q;
+assign reassembly_rdch_meta = reassembly_service_fsm_lle_meta_q;
+
+// Dual-port GC register file
+always @(posedge clk) begin
+ if (rst) begin
+ gc_rd_q[0] <= 1'b0;
+ gc_rd_q[1] <= 1'b0;
+ end
+ else begin
+ // Write logic
+ for (i = 0; i < 2; i = i + 1) begin
+ if (gc_wren[i]) begin
+ gc_state[gc_wr_address[i]] <= gc_wr_data[i];
+ end
+ end
+ // Read logic (with write forwarding)
+ for (i = 0; i < 2; i = i + 1) begin
+ if (gc_rden[i]) begin
+ if (gc_wren[1] && (gc_rd_address[i] == gc_wr_address[1])) begin
+ gc_rd_q[i] <= gc_wr_data[1];
+ end
+ else if (gc_wren[0] && (gc_rd_address[i] == gc_wr_address[0])) begin
+ gc_rd_q[i] <= gc_wr_data[0];
+ end
+ else begin
+ gc_rd_q[i] <= gc_state[gc_rd_address[i]];
+ end
+ end
+ end
+ end
+end
+
+always @(*) begin
+ ft_service_fsm_state_next = ft_service_fsm_state;
+ ft_service_fsm_ooo_flow_valid = fc_q_a.valid && !gc_rd_q[0];
+
+ reassembly_service_fsm_state_next = (
+ reassembly_service_fsm_state);
+
+ reassembly_service_fsm_fc_data_latch_en = 0;
+ reassembly_service_fsm_ooo_flow_ll_empty = 0;
+ reassembly_service_fsm_fc_q = reassembly_service_fsm_fc_q_r;
+ reassembly_service_fsm_lle_meta_q = reassembly_service_fsm_lle_meta_q_r;
+ reassembly_service_fsm_fc_address = reassembly_service_fsm_fc_address_r;
+ reassembly_service_fsm_lle_address = reassembly_service_fsm_lle_address_r;
+ reassembly_service_fsm_lle_nextptr_q = reassembly_service_fsm_lle_nextptr_q_r;
+
+ in_fce_ready = 0;
+ in_meta_ready = 0;
+ in_token_ready = 0;
+
+ out_sched_fifo_meta_int = 0;
+ out_sched_fifo_valid_int = 0;
+
+ num_flits = 1;
+ heap_in_enque_en = 0;
+ heap_out_deque_max_en = 0;
+ heap_in_enque_priority = 0;
+ heap_in_deque_max_req_en = 0;
+ heap_in_enque_ooo_flow_id = 0;
+ heap_in_enque_priority_padded = 0;
+
+ gc_rden[0] = 0;
+ gc_rden[1] = 0;
+ gc_wren[0] = 0;
+ gc_wren[1] = 0;
+ gc_wr_data[0] = 0;
+ gc_wr_data[1] = 0;
+ gc_rd_address[0] = 0;
+ gc_rd_address[1] = 0;
+ gc_wr_address[0] = 0;
+ gc_wr_address[1] = 0;
+
+ fc_rden_a = 0;
+ fc_rden_b = 0;
+ fc_wren_a = 0;
+ fc_wren_b = 0;
+ fc_data_a = 0;
+ fc_data_b = 0;
+ fc_address_a = 0;
+ fc_address_b = 0;
+
+ fl_data = 0;
+ fl_rdreq = 0;
+ fl_wrreq = 0;
+
+ fl_data_init_next = 0;
+ fl_fsm_state_next = fl_fsm_state;
+
+ ft_update_fifo_data = 0;
+ ft_update_fifo_wrreq = 0;
+
+ lle_meta_wren_a = 0;
+ lle_meta_data_a = 0;
+ lle_meta_address_a = 0;
+ lle_nextptr_wren_a = 0;
+ lle_nextptr_data_a = 0;
+ lle_nextptr_address_a = 0;
+
+ lle_meta_rden_b = 0;
+ lle_meta_data_b = 0;
+ lle_meta_address_b = 0;
+ lle_nextptr_rden_b = 0;
+ lle_nextptr_data_b = 0;
+ lle_nextptr_address_b = 0;
+
+ reassembly_wrch_ready = 0;
+ reassembly_rdch_ready = 0;
+ reassembly_rdch_rd_valid = 0;
+ reassembly_rdch_rd_ooo_flow_invalid = 0;
+
+ reassembly_gc_req_fifo_data = 0;
+ reassembly_gc_req_fifo_wrreq = 0;
+ reassembly_gc_rsp_fifo_rdreq = 0;
+
+ gc_lle_rden = 0;
+ gc_fc_q = gc_fc_q_r;
+ gc_inc_ooo_flow_fl_size = 0;
+ gc_dec_ooo_flow_fl_size = 0;
+ gc_lle_meta_q = gc_lle_meta_q_r;
+ gc_fsm_state_next = gc_fsm_state;
+ gc_ooo_flow_id = gc_ooo_flow_id_r;
+ gc_lle_nextptr_q = gc_lle_nextptr_q_r;
+ gc_lle_currptr_q = gc_lle_currptr_q_r;
+ gc_ooo_flow_id_valid = gc_ooo_flow_id_valid_r;
+
+ gc_response_meta = gc_response_meta_r;
+ gc_response_fsm_state_next = gc_response_fsm_state;
+
+ gc_inc_cur_ll_release_cnt = 0;
+ gc_reset_cur_ll_release_cnt = 0;
+
+ /**
+ * Free-list initialization FSM.
+ */
+ case (fl_fsm_state)
+ // Idle state: Wait for rst to fall,
+ // then proceed to the init state.
+ FL_FSM_STATE_IDLE: begin
+ fl_fsm_state_next = FL_FSM_STATE_INIT;
+ end
+ // Insert into the free-list FIFO
+ FL_FSM_STATE_INIT: begin
+ fl_wrreq = 1;
+ fl_data = fl_data_init;
+ gc_inc_ooo_flow_fl_size = 1;
+
+ if (fl_data_init ==
+ {OOO_FLOW_LL_ENTRY_AWIDTH{1'b1}}) begin
+ fl_fsm_state_next = FL_FSM_STATE_INIT_DONE;
+ end
+ else begin
+ fl_fsm_state_next = FL_FSM_STATE_INIT;
+ fl_data_init_next = fl_data_init + 1;
+ end
+ end
+ // Initialization complete
+ FL_FSM_STATE_INIT_DONE: begin
+ fl_fsm_state_next = FL_FSM_STATE_INIT_DONE;
+ end
+ endcase
+
+ /**
+ * FT Service FSM.
+ */
+ case (ft_service_fsm_state)
+ // Idle state.
+ FT_SERVICE_FSM_STATE_IDLE: begin
+ // High priority: If the token channel (from FT)
+ // is not empty, perform the scheduler operation.
+ // Since this is not predicated on any resource
+ // availability (free LL entries, etc.), it can
+ // always proceed without stalling.
+ if (in_token_valid) begin
+ ft_service_fsm_state_next = FT_SERVICE_FSM_STATE_CTRL_RD;
+ end
+ // Low priority: If the input (metadata and FCE)
+ // FIFOs contain valid entries, the LL free-list
+ // is not empty, and the scheduler's output FIFO
+ // is not full, proceed to read state.
+ else if (in_meta_valid && in_fce_valid &&
+ !fl_empty && !out_sched_fifo_almost_full) begin
+ ft_service_fsm_state_next = FT_SERVICE_FSM_STATE_DATA_RD;
+ end
+ end
+ // (Data) read state: Read the entry in the context
+ // table corresponding to the given flow ID, deque
+ // both input FIFOs, and finally allocate a new LL
+ // node from the free-list. Also update its data.
+ FT_SERVICE_FSM_STATE_DATA_RD: begin
+ // Proceed to write
+ ft_service_fsm_state_next = FT_SERVICE_FSM_STATE_DATA_WR;
+
+ // Initiate FC read
+ fc_rden_a = 1;
+ fc_address_a = in_fce_data.ooo_flow_id;
+
+ // Read the GC register file
+ gc_rden[0] = 1;
+ gc_rd_address[0] = in_fce_data.ooo_flow_id;
+
+ // Update the LL node
+ lle_meta_wren_a = 1;
+ lle_nextptr_wren_a = 1;
+ lle_meta_address_a = fl_q;
+ lle_nextptr_address_a = fl_q;
+ lle_meta_data_a = in_meta_data;
+ lle_nextptr_data_a = OOO_FLOW_LL_INVALID_PTR;
+
+ // Deque input FIFOs
+ in_fce_ready = 1;
+ in_meta_ready = 1;
+ end
+ // (Data) write state: Update the tail LL entry
+ // to point to the newly-allocated LL node, and
+ // update the tail (and head, if required) ptrs.
+ FT_SERVICE_FSM_STATE_DATA_WR: begin
+ // Return to idle
+ ft_service_fsm_state_next = FT_SERVICE_FSM_STATE_IDLE;
+
+ // If the corresponding OOO flow is valid and
+ // is not marked for GC, commit the update.
+ if (ft_service_fsm_ooo_flow_valid) begin
+ // Write back the FC
+ fc_wren_a = 1;
+ fc_data_a = fc_q_a;
+ fc_address_a = in_fce_data_r1.ooo_flow_id;
+
+ // The flow's LL is empty, update the head pointer. Also
+ // implies that the flow ID is not currently in the heap;
+ // As such, proceed to the heap insertion state.
+ if (fc_q_a.ooo_flow_ll.head == OOO_FLOW_LL_INVALID_PTR) begin
+ fc_data_a.ooo_flow_ll.head = fl_q_r;
+
+ // Move to the heap insertion state
+ ft_service_fsm_state_next = (
+ FT_SERVICE_FSM_STATE_DATA_INSERT_HEAP);
+ end
+ // Else, update the tail LL entry's next pointer
+ else begin
+ lle_nextptr_wren_a = 1;
+ lle_nextptr_data_a = fl_q_r;
+ lle_nextptr_address_a = fc_q_a.ooo_flow_ll.tail;
+ end
+ // Update the tail pointer
+ fc_data_a.ooo_flow_ll.tail = fl_q_r;
+
+ // If this is the first out-of-order packet for
+ // this flow (indicated by a slow count of 0),
+ // update the sequence number and FT address.
+ if (in_fce_data_r1.slow_cnt == 0) begin
+ fc_data_a.seq = in_fce_data_r1.seq;
+ fc_data_a.addr0 = in_fce_data_r1.addr0;
+ fc_data_a.addr1 = in_fce_data_r1.addr1;
+ fc_data_a.addr2 = in_fce_data_r1.addr2;
+ fc_data_a.addr3 = in_fce_data_r1.addr3;
+ end
+
+ // Deque the free-list FIFO
+ fl_rdreq = 1;
+ gc_dec_ooo_flow_fl_size = 1;
+ end
+ // Else, either the OOO flow ID received from the FTW is
+ // invalid, the packet corresponds to stale state, or
+ // the flow has been marked for GC. Drop the packet.
+ else begin
+ out_sched_fifo_valid_int = 1;
+ out_sched_fifo_meta_int = in_meta_data_r1;
+ out_sched_fifo_meta_int.pkt_flags = PKT_DROP;
+ end
+ end
+ // (Data) heap insertion state: Insert the
+ // newly-allocated flow ID into the heap.
+ FT_SERVICE_FSM_STATE_DATA_INSERT_HEAP: begin
+ // Return to idle
+ ft_service_fsm_state_next = FT_SERVICE_FSM_STATE_IDLE;
+
+ // TODO(natre): Assuming that the heap is guaranteed
+ // to be ready. This is currently true (since it can
+ // hold exactly MAX_NUM_OOO_FLOWS entries); however,
+ // this should ideally handle enque stalls.
+ heap_in_enque_en = 1;
+ num_flits = in_meta_data_r2.flits;
+ heap_in_enque_ooo_flow_id = in_fce_data_r2.ooo_flow_id;
+
+ if (fc_q_a_r.ll_size == 0) begin
+ heap_in_enque_priority = (
+ exponent_pos ? (1 << exponent) :
+ (1 >> exponent));
+ end
+ else begin
+ heap_in_enque_priority_padded = (
+ exponent_pos ? (fc_q_a_r.ll_size << exponent) :
+ (fc_q_a_r.ll_size >> exponent));
+
+ heap_in_enque_priority = (
+ heap_in_enque_priority_padded[
+ HEAP_PRIORITY_PADDED_AWIDTH-1:
+ HEAP_PRIORITY_AWIDTH] != 0) ?
+ {HEAP_PRIORITY_AWIDTH{1'b1}} :
+ heap_in_enque_priority_padded;
+ end
+ end
+ // (Ctrl) read state: Read the entry in the
+ // context table corresponding to the given
+ // OOO flow ID and deque the token channel.
+ FT_SERVICE_FSM_STATE_CTRL_RD: begin
+ // Proceed to write
+ ft_service_fsm_state_next = FT_SERVICE_FSM_STATE_CTRL_WR;
+
+ // Initiate FC read
+ fc_rden_a = 1;
+ fc_address_a = in_token_data.ooo_flow_id;
+
+ // Read the GC register file
+ gc_rden[0] = 1;
+ gc_rd_address[0] = in_token_data.ooo_flow_id;
+
+ // Deque the token channel
+ in_token_ready = 1;
+ end
+ // (Ctrl) write state: Update the flow
+ // context table and GC register file.
+ FT_SERVICE_FSM_STATE_CTRL_WR: begin
+ // Return to idle
+ ft_service_fsm_state_next = FT_SERVICE_FSM_STATE_IDLE;
+
+ // Write back the FC
+ fc_wren_a = 1;
+ fc_data_a = 0;
+ fc_address_a = in_token_data_r.ooo_flow_id;
+
+ // Write back the GC entry
+ gc_wren[0] = 1;
+ gc_wr_data[0] = 0;
+ gc_wr_address[0] = in_token_data_r.ooo_flow_id;
+
+ // Allocate the flow context
+ fc_data_a.valid = 1;
+ fc_data_a.tuple = in_token_data_r.tuple;
+ fc_data_a.ooo_flow_ll.head = OOO_FLOW_LL_INVALID_PTR;
+ fc_data_a.ooo_flow_ll.tail = OOO_FLOW_LL_INVALID_PTR;
+ end
+ default: begin
+ ft_service_fsm_state_next = FT_SERVICE_FSM_STATE_IDLE;
+ end
+ endcase
+
+ /**
+ * Flow Reassembly service FSM.
+ */
+ case (reassembly_service_fsm_state)
+ // Idle state. If enable for flow reassembly's read/write
+ // channels is raised, proceed to the corresponding state.
+ REASSEMBLY_SERVICE_FSM_STATE_IDLE: begin
+ if (reassembly_rdch_rden) begin
+ reassembly_service_fsm_state_next = (
+ REASSEMBLY_SERVICE_FSM_STATE_FC_RD);
+ end
+ else if (reassembly_wrch_wren &&
+ !ft_update_fifo_full) begin
+ reassembly_service_fsm_state_next = (
+ REASSEMBLY_SERVICE_FSM_STATE_FC_WR);
+ end
+ end
+ // FC read state. Read the entry in the context table
+ // corresponding to the given flow ID (using port B).
+ // If there is already a write in progress for the
+ // same flow ID on port A, forward the write.
+ REASSEMBLY_SERVICE_FSM_STATE_FC_RD: begin
+ // Read the GC register file
+ gc_rden[1] = 1;
+ gc_rd_address[1] = reassembly_rdch_ooo_flow_id;
+
+ // FC read address
+ fc_address_b = reassembly_rdch_ooo_flow_id;
+ reassembly_service_fsm_fc_address = (
+ reassembly_rdch_ooo_flow_id);
+
+ // If there isn't a contending write happening
+ // on this cycle, raise rden. Else, we simply
+ // use the forwarded write.
+ if (!(fc_wren_a && (fc_address_a ==
+ reassembly_rdch_ooo_flow_id))) begin
+ fc_rden_b = 1;
+ end
+
+ // Acknowledge the reassembler's read channel
+ // request, and proceed to the LL read state.
+ reassembly_rdch_ready = 1;
+ reassembly_service_fsm_state_next = (
+ REASSEMBLY_SERVICE_FSM_STATE_LL_RD);
+ end
+ // LL read state. Read the LL entry corresponding to the
+ // flow's head pointer. If the flow is either: invalid,
+ // marked for GC, or has an invalid head pointer (error
+ // state), skip this lookup and return to idle.
+ REASSEMBLY_SERVICE_FSM_STATE_LL_RD: begin
+ reassembly_service_fsm_fc_q = (
+ fc_rden_b_r ? fc_q_b : fc_data_a_r);
+
+ // The flow state is invalid. Raise read invalid
+ // (indicating to the reassembler that it should
+ // discard the current flow ID and draw another
+ // from the heap), and return to idle.
+ if (!reassembly_service_fsm_fc_q.valid || gc_rd_q[1] ||
+ (reassembly_service_fsm_fc_q.ooo_flow_ll.head ==
+ OOO_FLOW_LL_INVALID_PTR)) begin
+ reassembly_rdch_rd_valid = 1;
+ reassembly_rdch_rd_ooo_flow_invalid = 1;
+
+ reassembly_service_fsm_state_next = (
+ REASSEMBLY_SERVICE_FSM_STATE_IDLE);
+ end
+ // Read the LL entry on port B. If there is an
+ // ongoing write to the corresponding LL entry
+ // (via port A), forward the write data.
+ else begin
+ // Read the LL entry. Note that there should
+ // never be a contending write for the meta-
+ // data entry, so we read it unconditionally.
+ reassembly_service_fsm_lle_address = (
+ reassembly_service_fsm_fc_q.ooo_flow_ll.head);
+
+ lle_meta_rden_b = 1;
+ lle_meta_address_b = reassembly_service_fsm_lle_address;
+ lle_nextptr_address_b = reassembly_service_fsm_lle_address;
+
+ if (!(lle_nextptr_wren_a && (lle_nextptr_address_a ==
+ reassembly_service_fsm_lle_address))) begin
+ // Read the LL entry
+ lle_nextptr_rden_b = 1;
+ end
+ // Proceed to the read done state
+ reassembly_service_fsm_state_next = (
+ REASSEMBLY_SERVICE_FSM_STATE_RD_DONE);
+ end
+ end
+ // Read done state. Respond to the reassembler with the
+ // read FCE and packet metadata. After this point, any
+ // writes to the previously-read FT context and/or LLE
+ // are forwarded to the respective registers.
+ REASSEMBLY_SERVICE_FSM_STATE_RD_DONE: begin
+ reassembly_service_fsm_lle_meta_q = lle_meta_q_b;
+ reassembly_service_fsm_lle_nextptr_q = (
+ lle_nextptr_rden_b_r ? lle_nextptr_q_b :
+ lle_nextptr_data_a_r);
+ // Indicate read completion
+ reassembly_rdch_rd_valid = 1;
+
+ // Return to idle
+ reassembly_service_fsm_state_next = (
+ REASSEMBLY_SERVICE_FSM_STATE_IDLE);
+ end
+ // FC write state. Update the latched tuple with the
+ // reassembler's wr channel data, and commit it back
+ // to the context table.
+ REASSEMBLY_SERVICE_FSM_STATE_FC_WR: begin
+ reassembly_service_fsm_state_next = (
+ REASSEMBLY_SERVICE_FSM_STATE_FC_WR);
+
+ // Prepare the updated context
+ fc_data_b = reassembly_service_fsm_fc_q_r;
+ fc_data_b.seq = reassembly_wrch_update_fce.seq;
+ fc_data_b.pointer = reassembly_wrch_update_fce.pointer;
+ fc_data_b.ll_valid = reassembly_wrch_update_fce.ll_valid;
+ fc_data_b.last_7_bytes = reassembly_wrch_update_fce.last_7_bytes;
+
+ // The flow LL becomes empty (previously had a single entry)
+ if (reassembly_service_fsm_fc_q_r.ooo_flow_ll.head ==
+ reassembly_service_fsm_fc_q_r.ooo_flow_ll.tail) begin
+ reassembly_service_fsm_ooo_flow_ll_empty = 1;
+ fc_data_b.ooo_flow_ll.head = OOO_FLOW_LL_INVALID_PTR;
+ fc_data_b.ooo_flow_ll.tail = OOO_FLOW_LL_INVALID_PTR;
+ end
+ // Else, move the head pointer
+ else begin
+ fc_data_b.ooo_flow_ll.head = (
+ reassembly_service_fsm_lle_nextptr_q_r);
+ end
+
+ // Compute the new LL size
+ fc_data_b.ll_size = (
+ // LL size including this packet
+ (reassembly_service_fsm_fc_q_r.ll_size + 1)
+ - reassembly_wrch_rel_pkt_cnt // Released packet count
+ );
+
+ // Latch the FC data
+ reassembly_service_fsm_fc_data_latch_en = 1;
+
+ // Set the write address
+ fc_address_b = reassembly_service_fsm_fc_address_r;
+
+ // (Preemptively) set the FT update data
+ ft_update_fifo_data.seq = reassembly_wrch_update_fce.seq;
+ ft_update_fifo_data.is_delete = reassembly_wrch_is_delete;
+ ft_update_fifo_data.rel_pkt_cnt = reassembly_wrch_rel_pkt_cnt;
+ ft_update_fifo_data.tuple = reassembly_service_fsm_fc_q_r.tuple;
+ ft_update_fifo_data.addr0 = reassembly_service_fsm_fc_q_r.addr0;
+ ft_update_fifo_data.addr1 = reassembly_service_fsm_fc_q_r.addr1;
+ ft_update_fifo_data.addr2 = reassembly_service_fsm_fc_q_r.addr2;
+ ft_update_fifo_data.addr3 = reassembly_service_fsm_fc_q_r.addr3;
+
+ // If there is an ongoing read or write for the same flow context
+ // on this cycle (via port A), remain in this state and defer the
+ // write; otherwise, commit the data. If required (i.e., non-zero
+ // packets are released), push an update to the FT. Also reinsert
+ // the flow ID back into the heap if required.
+ if (!((fc_rden_a | fc_wren_a) && (
+ fc_address_a == fc_address_b))) begin
+ // Perform the FT update
+ fc_wren_b = 1;
+
+ // Release the LLE back to the free-list
+ fl_wrreq = 1;
+ gc_inc_ooo_flow_fl_size = 1;
+ fl_data = reassembly_service_fsm_lle_address_r;
+
+ // Insert into the FT update FIFO if required
+ if (reassembly_wrch_rel_pkt_cnt != 0 ||
+ reassembly_wrch_is_delete) begin
+ ft_update_fifo_wrreq = 1;
+ end
+
+ // If the flow's LL is not empty and is not marked for
+ // deletion, also re-insert the flow ID into the heap.
+ if (!(reassembly_service_fsm_ooo_flow_ll_empty ||
+ reassembly_wrch_is_delete)) begin
+ reassembly_service_fsm_state_next = (
+ REASSEMBLY_SERVICE_FSM_STATE_INSERT_HEAP);
+ end
+ // Else, return to idle
+ else begin
+ // Indicate completion, return to idle
+ reassembly_wrch_ready = 1;
+ reassembly_service_fsm_state_next = (
+ REASSEMBLY_SERVICE_FSM_STATE_IDLE);
+ end
+ end
+ end
+ // Heap insertion state. Re-insert the latched
+ // flow ID into the heap and return to idle.
+ REASSEMBLY_SERVICE_FSM_STATE_INSERT_HEAP: begin
+ reassembly_service_fsm_state_next = (
+ REASSEMBLY_SERVICE_FSM_STATE_INSERT_HEAP);
+
+ if (heap_in_enque_ready && !heap_in_enque_en) begin
+ heap_in_enque_en = 1;
+ heap_in_enque_ooo_flow_id = reassembly_rdch_ooo_flow_id;
+
+ num_flits = reassembly_service_fsm_lle_meta_q_r.flits;
+ heap_in_enque_priority_padded = (
+ exponent_pos ?
+ (reassembly_service_fsm_fc_data_r.ll_size << exponent) :
+ (reassembly_service_fsm_fc_data_r.ll_size >> exponent));
+
+ heap_in_enque_priority = (
+ heap_in_enque_priority_padded[
+ HEAP_PRIORITY_PADDED_AWIDTH-1:
+ HEAP_PRIORITY_AWIDTH] != 0) ?
+ {HEAP_PRIORITY_AWIDTH{1'b1}} :
+ heap_in_enque_priority_padded;
+
+ // Indicate completion, return to idle
+ reassembly_wrch_ready = 1;
+ reassembly_service_fsm_state_next = (
+ REASSEMBLY_SERVICE_FSM_STATE_IDLE);
+ end
+ end
+ // Invalid state, return to idle
+ default: begin
+ reassembly_service_fsm_state_next = (
+ REASSEMBLY_SERVICE_FSM_STATE_IDLE);
+ end
+ endcase
+
+ /**
+ * Garbage-collection (GC) FSM.
+ */
+ case (gc_fsm_state)
+ // Idle state. If either: a) the heap's deque-max output
+ // FIFO contains at least one entry, or b) the flow LL/
+ // reassembly free list sizes fall below their watermark
+ // levels, start garbage collection.
+ GC_FSM_STATE_IDLE: begin
+ gc_fsm_state_next = GC_FSM_STATE_IDLE;
+
+ gc_ooo_flow_id_valid = 0;
+ gc_reset_cur_ll_release_cnt = 1;
+ if (!reassembly_gc_req_fifo_full) begin
+ // The deque-max output FIFO has an entry.
+ // Deque it, and commence the GC process.
+ if (heap_out_deque_max_ready) begin
+ heap_out_deque_max_en = 1;
+ gc_fsm_state_next = GC_FSM_STATE_FC_RD;
+
+ gc_ooo_flow_id_valid = 1;
+ gc_ooo_flow_id = heap_out_deque_max_ooo_flow_id;
+ end
+ // Else, if the free lists corresponding to either
+ // flow or reassembly LL entries fall below their
+ // watermark levels, issue a heap deque-max op
+ // and continue waiting in this state.
+ else if (
+ (fl_fsm_state == FL_FSM_STATE_INIT_DONE) &&
+ (heap_size != 0) && heap_in_deque_max_req_ready &&
+ ((gc_ooo_flow_fl_size_r <= OOO_FLOW_FL_LOW_WATERMARK) ||
+ ((gc_reassembly_fl_size <= REASSEMBLY_FL_LOW_WATERMARK) &&
+ reassembly_gc_req_state_idle))) begin
+ heap_in_deque_max_req_en = 1;
+ end
+ end
+ end
+ // FC read state. Mark the flow for GC and
+ // read the corresponding flow context.
+ GC_FSM_STATE_FC_RD: begin
+ gc_fsm_state_next = GC_FSM_STATE_FC_RD;
+
+ // Update the GC register file
+ gc_wren[1] = 1;
+ gc_wr_data[1] = 1;
+ gc_wr_address[1] = gc_ooo_flow_id_r;
+
+ // Perform a context table read using port B, deferring to
+ // the reassembly service FSM if required. The only source
+ // of write contention is the FT service FSM's accesses on
+ // port A to the same address.
+ if (!(fc_wren_a && (fc_address_a == gc_ooo_flow_id_r)) &&
+ (reassembly_service_fsm_state !=
+ REASSEMBLY_SERVICE_FSM_STATE_FC_RD) &&
+ (reassembly_service_fsm_state !=
+ REASSEMBLY_SERVICE_FSM_STATE_FC_WR)) begin
+
+ fc_rden_b = 1;
+ fc_address_b = gc_ooo_flow_id_r;
+ gc_fsm_state_next = GC_FSM_STATE_INIT_REASSEMBLY_GC;
+ end
+ end
+ // Init Reassembly GC state. Create and
+ // send a GC request to the reassembler.
+ GC_FSM_STATE_INIT_REASSEMBLY_GC: begin
+ // Latch the FC read context
+ gc_fc_q = fc_q_b;
+
+ // Send a GC request
+ reassembly_gc_req_fifo_wrreq = 1;
+ reassembly_gc_req_fifo_data.pointer = gc_fc_q.pointer;
+ reassembly_gc_req_fifo_data.ll_valid = gc_fc_q.ll_valid;
+ reassembly_gc_req_fifo_data.meta.ooo_flow_id = gc_ooo_flow_id_r;
+
+ reassembly_gc_req_fifo_data.meta.tuple = gc_fc_q.tuple;
+ reassembly_gc_req_fifo_data.meta.addr0 = gc_fc_q.addr0;
+ reassembly_gc_req_fifo_data.meta.addr1 = gc_fc_q.addr1;
+ reassembly_gc_req_fifo_data.meta.addr2 = gc_fc_q.addr2;
+ reassembly_gc_req_fifo_data.meta.addr3 = gc_fc_q.addr3;
+
+ // The OOO flow's head LL entry is invalid. Error state, should
+ // not reach here (for the current design, a OOO flow without a
+ // pending packet in the flow queue should not have an entry
+ // in the heap).
+ if (gc_fc_q.ooo_flow_ll.head == OOO_FLOW_LL_INVALID_PTR) begin
+ `ifdef DEBUG
+ $error("[SC, GC] GC'd flow %0d has invalid head",
+ gc_ooo_flow_id_r);
+ $finish;
+ `endif
+ gc_fsm_state_next = GC_FSM_STATE_IDLE;
+ end
+ else begin
+ // Else, latch currptr and proceed to LL read
+ gc_lle_currptr_q = gc_fc_q.ooo_flow_ll.head;
+ gc_fsm_state_next = GC_FSM_STATE_LL_RD;
+ end
+ end
+ // LL read state. Read this flow's head LL entry.
+ GC_FSM_STATE_LL_RD: begin
+ gc_fsm_state_next = GC_FSM_STATE_LL_RD;
+
+ if (!(lle_nextptr_wren_a && (lle_nextptr_address_a ==
+ gc_lle_currptr_q_r)) &&
+ (reassembly_service_fsm_state !=
+ REASSEMBLY_SERVICE_FSM_STATE_LL_RD)) begin
+ // Read the LLE
+ gc_lle_rden = 1;
+ lle_meta_rden_b = 1;
+ lle_nextptr_rden_b = 1;
+ lle_meta_address_b = gc_lle_currptr_q_r;
+ lle_nextptr_address_b = gc_lle_currptr_q_r;
+
+ // Proceed to read done
+ gc_fsm_state_next = GC_FSM_STATE_LL_RD_DONE;
+ end
+ end
+ // LL read done state. Release the packet metadata (enque
+ // into the scheduler's output FIFO), add the current LLE
+ // to the free-list, and proceed to the next LLE.
+ GC_FSM_STATE_LL_RD_DONE: begin
+ gc_fsm_state_next = GC_FSM_STATE_LL_RD_DONE;
+
+ // If the corresponding read completed on this
+ // cycle, latch the next pointer and metadata.
+ if (gc_lle_rden_r) begin
+ gc_lle_meta_q = lle_meta_q_b;
+ gc_lle_nextptr_q = lle_nextptr_q_b;
+ end
+ if (!out_sched_fifo_almost_full &&
+ !out_sched_fifo_valid_int && !fl_wrreq) begin
+ gc_inc_cur_ll_release_cnt = 1;
+
+ // Release the current LLE
+ fl_wrreq = 1;
+ gc_inc_ooo_flow_fl_size = 1;
+ fl_data = gc_lle_currptr_q_r;
+
+ // Release the packet metadata
+ out_sched_fifo_valid_int = 1;
+ out_sched_fifo_meta_int = gc_lle_meta_q;
+ out_sched_fifo_meta_int.pkt_flags = PKT_DROP;
+
+ // If the next pointer is valid, update (latch) the
+ // current pointer and proceed to the next LL entry.
+ if (gc_lle_nextptr_q != OOO_FLOW_LL_INVALID_PTR) begin
+ gc_lle_currptr_q = gc_lle_nextptr_q;
+ gc_fsm_state_next = GC_FSM_STATE_LL_RD;
+ end
+ // Else, return to idle
+ else begin
+ gc_fsm_state_next = GC_FSM_STATE_IDLE;
+ end
+ end
+ end
+ // Invalid state, return to idle
+ default: begin
+ gc_fsm_state_next = GC_FSM_STATE_IDLE;
+ end
+ endcase
+
+ /**
+ * Garbage-collection (GC) Response FSM.
+ */
+ case (gc_response_fsm_state)
+ // Idle state. Wait until the Reassembly->
+ // Scheduler GC response FIFO is non-empty.
+ GC_RESPONSE_FSM_STATE_IDLE: begin
+ gc_response_fsm_state_next = (
+ GC_RESPONSE_FSM_STATE_IDLE);
+
+ // If the response FIFO has an entry, and it's not the
+ // flow that's currently being served by the GC FSM,
+ // move to the update FT state.
+ if (!reassembly_gc_rsp_fifo_empty &&
+ !(gc_ooo_flow_id_valid && (gc_ooo_flow_id ==
+ reassembly_gc_rsp_fifo_q.ooo_flow_id))) begin
+ // Latch the response metadata and deque the FIFO
+ gc_response_meta = reassembly_gc_rsp_fifo_q;
+ reassembly_gc_rsp_fifo_rdreq = 1;
+
+ gc_response_fsm_state_next = (
+ GC_RESPONSE_FSM_STATE_UPDATE_FT);
+ end
+ end
+ // Update FT state. If the FT update FIFO
+ // is not busy, insert a delete operation
+ // corresponding to this OOO flow.
+ GC_RESPONSE_FSM_STATE_UPDATE_FT: begin
+ gc_response_fsm_state_next = (
+ GC_RESPONSE_FSM_STATE_UPDATE_FT);
+
+ if (reassembly_service_fsm_state !=
+ REASSEMBLY_SERVICE_FSM_STATE_FC_WR) begin
+ // Set the update data
+ ft_update_fifo_wrreq = 1;
+ ft_update_fifo_data.is_delete = 1;
+ ft_update_fifo_data.rel_pkt_cnt = 0;
+ ft_update_fifo_data.tuple = gc_response_meta_r.tuple;
+ ft_update_fifo_data.addr0 = gc_response_meta_r.addr0;
+ ft_update_fifo_data.addr1 = gc_response_meta_r.addr1;
+ ft_update_fifo_data.addr2 = gc_response_meta_r.addr2;
+ ft_update_fifo_data.addr3 = gc_response_meta_r.addr3;
+
+ // Return to idle
+ gc_response_fsm_state_next = (
+ GC_RESPONSE_FSM_STATE_IDLE);
+ end
+ end
+ // Invalid state, return to idle
+ default: begin
+ gc_response_fsm_state_next = (
+ GC_RESPONSE_FSM_STATE_IDLE);
+ end
+ endcase
+end
+
+always @(posedge clk) begin
+ if (rst) begin
+ fl_q_r <= 0;
+ fc_q_a_r <= 0;
+ fc_data_a_r <= 0;
+ fc_rden_b_r <= 0;
+ in_fce_data_r1 <= 0;
+ in_meta_data_r1 <= 0;
+ in_token_data_r <= 0;
+ lle_nextptr_rden_b_r <= 0;
+ lle_nextptr_data_a_r <= 0;
+
+ reassembly_service_fsm_fc_q_r <= 0;
+ reassembly_service_fsm_fc_data_r = 0;
+ reassembly_service_fsm_lle_meta_q_r <= 0;
+ reassembly_service_fsm_fc_address_r <= 0;
+ reassembly_service_fsm_lle_address_r <= 0;
+ reassembly_service_fsm_lle_nextptr_q_r <= 0;
+
+ gc_fc_q_r <= 0;
+ gc_lle_rden_r <= 0;
+ gc_lle_meta_q_r <= 0;
+ gc_ooo_flow_id_r <= 0;
+ gc_lle_nextptr_q_r <= 0;
+ gc_lle_currptr_q_r <= 0;
+ gc_ooo_flow_fl_size_r <= 0;
+ gc_ooo_flow_id_valid_r <= 0;
+
+ gc_response_meta_r <= 0;
+
+ gc_fsm_state <= GC_FSM_STATE_IDLE;
+ ft_service_fsm_state <= FT_SERVICE_FSM_STATE_IDLE;
+ gc_response_fsm_state <= GC_RESPONSE_FSM_STATE_IDLE;
+ reassembly_service_fsm_state <= REASSEMBLY_SERVICE_FSM_STATE_IDLE;
+
+ // Scheduler output FIFO
+ out_sched_fifo_meta <= 0;
+ out_sched_fifo_valid <= 0;
+
+ // Free-list initialization
+ fl_data_init <= 0;
+ fl_fsm_state <= FL_FSM_STATE_IDLE;
+
+ // Debug signals
+ gc_cur_ll_release_cnt_r <= 0;
+ end
+ else begin
+ fl_q_r <= fl_q;
+ fc_q_a_r <= fc_q_a;
+ in_fce_data_r1 <= in_fce_data;
+ in_fce_data_r2 <= in_fce_data_r1;
+
+ fc_data_a_r <= fc_data_a;
+ fc_rden_b_r <= fc_rden_b;
+ lle_nextptr_rden_b_r <= lle_nextptr_rden_b;
+ lle_nextptr_data_a_r <= lle_nextptr_data_a;
+
+ gc_fc_q_r <= gc_fc_q;
+ gc_lle_rden_r <= gc_lle_rden;
+ gc_lle_meta_q_r <= gc_lle_meta_q;
+ gc_ooo_flow_id_r <= gc_ooo_flow_id;
+ gc_lle_nextptr_q_r <= gc_lle_nextptr_q;
+ gc_lle_currptr_q_r <= gc_lle_currptr_q;
+ gc_ooo_flow_id_valid_r <= gc_ooo_flow_id_valid;
+ gc_ooo_flow_fl_size_r <= ((gc_ooo_flow_fl_size_r +
+ gc_inc_ooo_flow_fl_size) -
+ gc_dec_ooo_flow_fl_size);
+
+ gc_response_meta_r <= gc_response_meta;
+
+ /**
+ * Reassembly latches, forwarding logic.
+ */
+ reassembly_service_fsm_lle_meta_q_r <= (
+ reassembly_service_fsm_lle_meta_q);
+
+ reassembly_service_fsm_fc_address_r <= (
+ reassembly_service_fsm_fc_address);
+
+ reassembly_service_fsm_lle_address_r <= (
+ reassembly_service_fsm_lle_address);
+
+ // If there's a colliding write to the latched reassembly
+ // FC address on any cycle, register the portA write data.
+ if (fc_wren_a && (fc_address_a ==
+ reassembly_service_fsm_fc_address)) begin
+ reassembly_service_fsm_fc_q_r <= fc_data_a;
+ end
+ // Else, keep the latched value
+ else begin
+ reassembly_service_fsm_fc_q_r <= (
+ reassembly_service_fsm_fc_q);
+ end
+
+ // If there's a colliding write to the latched reassembly
+ // LLE address on a cycle, register the portA write data.
+ if (lle_nextptr_wren_a && (lle_nextptr_address_a ==
+ reassembly_service_fsm_lle_address)) begin
+ reassembly_service_fsm_lle_nextptr_q_r <= (
+ lle_nextptr_data_a);
+ end
+ // Else, keep the latched value
+ else begin
+ reassembly_service_fsm_lle_nextptr_q_r <= (
+ reassembly_service_fsm_lle_nextptr_q);
+ end
+
+ // Register the FCE data
+ if (reassembly_service_fsm_fc_data_latch_en) begin
+ reassembly_service_fsm_fc_data_r <= fc_data_b;
+ end
+
+ in_meta_data_r1 <= in_meta_data;
+ in_meta_data_r2 <= in_meta_data_r1;
+
+ in_token_data_r <= in_token_data;
+ gc_fsm_state <= gc_fsm_state_next;
+ ft_service_fsm_state <= ft_service_fsm_state_next;
+ gc_response_fsm_state <= gc_response_fsm_state_next;
+ reassembly_service_fsm_state <= reassembly_service_fsm_state_next;
+
+ // Scheduler output FIFO
+ out_sched_fifo_meta <= out_sched_fifo_meta_int;
+ out_sched_fifo_valid <= out_sched_fifo_valid_int;
+
+ // Free-list initialization
+ fl_fsm_state <= fl_fsm_state_next;
+ fl_data_init <= fl_data_init_next;
+
+ // Debug signals
+ if (gc_reset_cur_ll_release_cnt) begin
+ gc_cur_ll_release_cnt_r <= 0;
+ end
+ else if (gc_inc_cur_ll_release_cnt) begin
+ gc_cur_ll_release_cnt_r <= gc_cur_ll_release_cnt_r + 1;
+
+ if (gc_fsm_state_next == GC_FSM_STATE_IDLE) begin
+ `ifdef DEBUG
+ $display("[SC, GC] Dropped OOO flow ID %0d with %0d LL packets",
+ gc_ooo_flow_id, (gc_cur_ll_release_cnt_r + 1));
+ `endif
+ end
+ end
+
+ if ((ft_service_fsm_state == FT_SERVICE_FSM_STATE_DATA_WR) &&
+ ft_service_fsm_ooo_flow_valid && (in_fce_data_r1.slow_cnt != 0)) begin
+ // Sanity check: The OOO flow state should be consistent
+ // with the corresponding flow context in the primary FT.
+ assert(fc_q_a.tuple == in_fce_data_r1.tuple)
+ else begin
+ $error("[SC] OOO flow state is inconsistent with FT");
+ $finish;
+ end
+ end
+ end
+ // DEBUG
+ `ifdef DEBUG
+ if (heap_in_enque_en) begin
+ $display("[SC] Inserting OOO flow ID %0d into the heap with priority %0d",
+ heap_in_enque_ooo_flow_id, heap_in_enque_priority);
+ end
+ if ((gc_fsm_state == GC_FSM_STATE_IDLE) && gc_ooo_flow_id_valid) begin
+ $display("[SC, GC] Started dropping OOO flow ID %0d", gc_ooo_flow_id);
+ end
+ if (reassembly_state_idle && heap_size != 0) begin
+ $display("[SC] Performance warning: Heap has entries but reassembly is idle!");
+ end
+ `endif
+end
+
+/**
+ * Module instantiations.
+ */
+// Free-list entries FIFO
+sc_fifo #(
+ .IS_SHOWAHEAD(1),
+ .IS_OUTDATA_REG(0),
+ .DWIDTH(OOO_FLOW_LL_ENTRY_AWIDTH),
+ .DEPTH(OOO_FLOW_LL_MAX_NUM_ENTRIES)
+)
+ooo_flow_ll_fl_fifo (
+ .clock(clk),
+ .data(fl_data),
+ .rdreq(fl_rdreq),
+ .wrreq(fl_wrreq),
+ .empty(fl_empty),
+ .full(), // Unused
+ .q(fl_q),
+ .usedw() // Unused
+);
+
+// FT update FIFO
+sc_fifo #(
+ .IS_SHOWAHEAD(1),
+ .IS_OUTDATA_REG(0),
+ .DWIDTH(FT_UPDATE_T_WIDTH),
+ .DEPTH(FT_UPDATE_FIFO_DEPTH)
+)
+ft_update_fifo (
+ .clock(clk),
+ .data(ft_update_fifo_data),
+ .rdreq(ft_update_fifo_rdreq),
+ .wrreq(ft_update_fifo_wrreq),
+ .empty(ft_update_fifo_empty),
+ .full(ft_update_fifo_full),
+ .q(ft_update_fifo_q),
+ .usedw() // Unused
+);
+
+// Reassembly GC request FIFO
+sc_fifo #(
+ .IS_SHOWAHEAD(1),
+ .IS_OUTDATA_REG(0),
+ .DWIDTH(REASSEMBLY_GC_REQ_T_WIDTH),
+ .DEPTH(REASSEMBLY_GC_FIFO_DEPTH)
+)
+reassembly_gc_req_fifo (
+ .clock(clk),
+ .data(reassembly_gc_req_fifo_data),
+ .rdreq(reassembly_gc_req_fifo_rdreq),
+ .wrreq(reassembly_gc_req_fifo_wrreq),
+ .empty(reassembly_gc_req_fifo_empty),
+ .full(reassembly_gc_req_fifo_full),
+ .q(reassembly_gc_req_fifo_q),
+ .usedw() // Unused
+);
+
+// Reassembly GC response FIFO
+sc_fifo #(
+ .IS_SHOWAHEAD(1),
+ .IS_OUTDATA_REG(0),
+ .DWIDTH(REASSEMBLY_GC_META_T_WIDTH),
+ .DEPTH(REASSEMBLY_GC_FIFO_DEPTH)
+)
+reassembly_gc_response_fifo (
+ .clock(clk),
+ .data(reassembly_gc_rsp_fifo_data),
+ .rdreq(reassembly_gc_rsp_fifo_rdreq),
+ .wrreq(reassembly_gc_rsp_fifo_wrreq),
+ .empty(reassembly_gc_rsp_fifo_empty),
+ .full(reassembly_gc_rsp_fifo_full),
+ .q(reassembly_gc_rsp_fifo_q),
+ .usedw() // Unused
+);
+
+// OOO flow context table
+bram_true2port #(
+ .DWIDTH(OOO_FLOW_FC_ENTRY_T_WIDTH),
+ .AWIDTH(OOO_FLOW_ID_AWIDTH),
+ .DEPTH(MAX_NUM_OOO_FLOWS),
+ .IS_OUTDATA_REG(0)
+)
+ooo_flow_context_table (
+ .clock(clk),
+ .address_a(fc_address_a),
+ .address_b(fc_address_b),
+ .data_a(fc_data_a),
+ .data_b(fc_data_b),
+ .rden_a(fc_rden_a),
+ .rden_b(fc_rden_b),
+ .wren_a(fc_wren_a),
+ .wren_b(fc_wren_b),
+ .q_a(fc_q_a),
+ .q_b(fc_q_b)
+);
+
+// Linked-list entries (value)
+bram_true2port #(
+ .DWIDTH(META_WIDTH),
+ .AWIDTH(OOO_FLOW_LL_ENTRY_AWIDTH),
+ .DEPTH(OOO_FLOW_LL_MAX_NUM_ENTRIES),
+ .IS_OUTDATA_REG(0)
+)
+ooo_flow_ll_entries_meta (
+ .clock(clk),
+ .address_a(lle_meta_address_a),
+ .address_b(lle_meta_address_b),
+ .data_a(lle_meta_data_a),
+ .data_b(lle_meta_data_b),
+ .rden_a(1'b0), // Unused
+ .rden_b(lle_meta_rden_b),
+ .wren_a(lle_meta_wren_a),
+ .wren_b(1'b0), // Unused
+ .q_a(lle_meta_q_a),
+ .q_b(lle_meta_q_b)
+);
+
+// Linked-list entries (next pointer)
+bram_true2port #(
+ .DWIDTH(OOO_FLOW_LL_ENTRY_PTR_T_WIDTH),
+ .AWIDTH(OOO_FLOW_LL_ENTRY_AWIDTH),
+ .DEPTH(OOO_FLOW_LL_MAX_NUM_ENTRIES),
+ .IS_OUTDATA_REG(0)
+)
+ooo_flow_ll_entries_nextptr (
+ .clock(clk),
+ .address_a(lle_nextptr_address_a),
+ .address_b(lle_nextptr_address_b),
+ .data_a(lle_nextptr_data_a),
+ .data_b(lle_nextptr_data_b),
+ .rden_a(1'b0), // Unused
+ .rden_b(lle_nextptr_rden_b),
+ .wren_a(lle_nextptr_wren_a),
+ .wren_b(1'b0), // Unused
+ .q_a(lle_nextptr_q_a),
+ .q_b(lle_nextptr_q_b)
+);
+
+// Bounded queue
+if (SCHEDULER_REASSEMBLY_POLICY == "WSJF") begin
+bounded_wsjf_queue wsjf_queue (
+ // General inputs
+ .clk(clk),
+ .rst(rst),
+ .in_enque_en(heap_in_enque_en),
+ .in_enque_ooo_flow_id(heap_in_enque_ooo_flow_id),
+ .in_enque_priority(heap_in_enque_priority),
+ .in_enque_ready(heap_in_enque_ready),
+ .out_deque_min_en(heap_out_deque_min_en),
+ .out_deque_min_ready(heap_out_deque_min_ready),
+ .out_deque_min_ooo_flow_id(heap_out_deque_min_ooo_flow_id),
+ .out_deque_min_priority(heap_out_deque_min_priority),
+ .in_deque_max_req_en(heap_in_deque_max_req_en),
+ .in_deque_max_req_ready(heap_in_deque_max_req_ready),
+ .out_deque_max_en(heap_out_deque_max_en),
+ .out_deque_max_ready(heap_out_deque_max_ready),
+ .out_deque_max_ooo_flow_id(heap_out_deque_max_ooo_flow_id),
+ .out_deque_max_priority(heap_out_deque_max_priority),
+ .queue_ready(), // Unused
+ .queue_size(heap_size)
+);
+end
+else if (SCHEDULER_REASSEMBLY_POLICY == "FCFS") begin
+bounded_fcfs_queue fcfs_queue (
+ // General inputs
+ .clk(clk),
+ .rst(rst),
+ .in_enque_en(heap_in_enque_en),
+ .in_enque_ooo_flow_id(heap_in_enque_ooo_flow_id),
+ .in_enque_priority(heap_in_enque_priority),
+ .in_enque_ready(heap_in_enque_ready),
+ .out_deque_min_en(heap_out_deque_min_en),
+ .out_deque_min_ready(heap_out_deque_min_ready),
+ .out_deque_min_ooo_flow_id(heap_out_deque_min_ooo_flow_id),
+ .out_deque_min_priority(heap_out_deque_min_priority),
+ .in_deque_max_req_en(heap_in_deque_max_req_en),
+ .in_deque_max_req_ready(heap_in_deque_max_req_ready),
+ .out_deque_max_en(heap_out_deque_max_en),
+ .out_deque_max_ready(heap_out_deque_max_ready),
+ .out_deque_max_ooo_flow_id(heap_out_deque_max_ooo_flow_id),
+ .out_deque_max_priority(heap_out_deque_max_priority),
+ .queue_ready(), // Unused
+ .queue_size(heap_size)
+);
+end
+else begin
+$error("[SC] Unimplemented scheduling policy.");
+end
+
+// Flow reassembly
+flow_reassembly flow_reassembly_inst (
+ // General inputs
+ .clk(clk),
+ .rst(rst),
+ // Heap input
+ .heap_out_deque_min_en(heap_out_deque_min_en),
+ .heap_out_deque_min_ready(heap_out_deque_min_ready),
+ .heap_out_deque_min_ooo_flow_id(heap_out_deque_min_ooo_flow_id),
+ .heap_out_deque_min_priority(heap_out_deque_min_priority),
+ // Scheduler read channel
+ .scheduler_rdch_rden(reassembly_rdch_rden),
+ .scheduler_rdch_ooo_flow_id(reassembly_rdch_ooo_flow_id),
+ .scheduler_rdch_ready(reassembly_rdch_ready),
+ .scheduler_rdch_rd_valid(reassembly_rdch_rd_valid),
+ .scheduler_rdch_rd_ooo_flow_invalid(reassembly_rdch_rd_ooo_flow_invalid),
+ .scheduler_rdch_meta(reassembly_rdch_meta),
+ .scheduler_rdch_fce(reassembly_rdch_fce),
+ // Scheduler write channel
+ .scheduler_wrch_wren(reassembly_wrch_wren),
+ .scheduler_wrch_ready(reassembly_wrch_ready),
+ .scheduler_wrch_is_delete(reassembly_wrch_is_delete),
+ .scheduler_wrch_update_fce(reassembly_wrch_update_fce),
+ .scheduler_wrch_rel_pkt_cnt(reassembly_wrch_rel_pkt_cnt),
+ // GC request
+ .gc_req_state_idle(reassembly_gc_req_state_idle),
+ .gc_req_fifo_rdreq(reassembly_gc_req_fifo_rdreq),
+ .gc_req_fifo_empty(reassembly_gc_req_fifo_empty),
+ .gc_req_fifo_q(reassembly_gc_req_fifo_q),
+ .gc_fl_fill_level(gc_reassembly_fl_size),
+ // GC response
+ .gc_rsp_fifo_full(reassembly_gc_rsp_fifo_full),
+ .gc_rsp_fifo_data(reassembly_gc_rsp_fifo_data),
+ .gc_rsp_fifo_wrreq(reassembly_gc_rsp_fifo_wrreq),
+ // Reorder FIFO
+ .reorder_meta(out_reassembly_fifo_meta),
+ .reorder_valid(out_reassembly_fifo_valid),
+ .reorder_ready(out_reassembly_fifo_ready),
+ .reorder_almost_full(out_reassembly_fifo_almost_full),
+ // Debug
+ .reassembly_state_idle(reassembly_state_idle)
+);
+
+endmodule
diff --git a/pigasus/hardware/rtl_sim/src/reassembly/surge_protector/README.md b/pigasus/hardware/rtl_sim/src/reassembly/surge_protector/README.md
new file mode 100644
index 0000000..d6816fe
--- /dev/null
+++ b/pigasus/hardware/rtl_sim/src/reassembly/surge_protector/README.md
@@ -0,0 +1,25 @@
+# SurgeProtector: Mitigating Temporal Algorithmic Complexity Attacks (ACAs) using Adversarial Scheduling
+
+## Algorithmic Complexity Attacks (ACAs)
+Many Network Functions (NFs) on the Internet are inherently vulnerable to temporal Algorithmic Complexity Attacks (ACAs), a highly potent class of Denial-of-Service (DoS) attacks targeting the algorithms and/or data-structures underlying the NFs. With an ACA, an attacker crafts carefully-designed inputs that require a _small amount of network and compute resources_ for the attacker to produce, and yet consume a _large amount of compute resources_ at the target system. Given a sufficient request rate, an attacker can drive the victim into overload, causing it to drop requests from the innocent, intended users of the service. ACAs are particularly dangerous because they allow a adversary with only modest resources to overload a much more powerful service (e.g., an attacker producing just 100Mbps of adversarial traffic may be able to overwhelm an NF provisioned to handle 10Gbps in the common case).
+
+## ACA Vulnerability in Pigasus
+In order to detect attack signatures spanning multiple TCP segments, Pigasus performs TCP reassembly (i.e., the process of reconstructing a TCP bytestream from a sequence of order-of-order packets) for each out-of-order flow. Given the limited BRAM available aboard FPGAs, the Pigasus reassembler prioritizes memory efficiency, and employs a linked list-based design to manage out-of-order flow state. While this achieves excellent memory utilization, the worst-case linear complexity of linked-list operations makes it susceptible to ACAs. An example of the vulnerability is illustrated below.
+
+
+
+
+
+When a new packet arrives (with PSN range \[35, 50) in the above figure), the reassembler linearly scans the list and inserts the node at the appropriate position. In order to exploit this, an attacker crafts _highly out-of-order flows_, linearly increasing the number of traversals required for each subsequent attack packet. Finally, they use _minimum-sized packets_ (with a 1-byte TCP payload) to inflate their packet arrival rate, maximizing the work injected into the system. We find that this is a highly effective DoS attack against Pigasus, allowing the attacker to displace ~9.5Gbps of innocent traffic using just 500Mbps of attack bandwidth.
+
+## SurgeProtector
+SurgeProtector is a scheduling framework developed at CMU that mitigates the impact of ACAs on NFs (such as Pigasus) using novel insights from adversarial scheduling theory. SurgeProtector interposes a _scheduler_ between the vulnerable component and its ingress link, which in turn decides the order in which packets are served by the NF. The key ingredient for ACA mitigation is the novel scheduling policy underlying the SurgeProtector scheduler: **Packet-Size Weighted Shortest-Job First (WSJF)**. While details are elided for the sake of brevity (please refer to the full research paper), WSJF imposes a theoretical upper-bound on the "harm" an adversary can induce via ACAs. In particular, it guarantees that, _to displace 1 bit-per-second (bps) of innocent traffic, the adversary must inject **at least** 1 bps of their own bandwidth into the attack_, significantly reducing the potency of ACAs. Further, this bound is independent of the load on the server, the packet and job size distributions for innocent traffic, and the underlying application itself. Thus, SurgeProtector represents a _general_ ACA mitigation strategy that is not tied to any specific NF.
+
+This directory contains part of the source code used to implement SurgeProtector in the context of the Pigasus Reassembler (the scheduler itself is implemented in `scheduler_reassembly.sv`, which can be found in the parent directory). A brief description of each file is provided below:
+- ffs.sv: Implements the _Find First Set_ operation to find the index of the least- and most-significant set bit in a given bitvector
+- pipelined_heap.sv: Implements a fully pipelined min+max heap using a two-level Hierarchical Find-First Set (HFFS) queue; the heap supports `Insert`, `ExtractMin`, and `ExtractMax` operations, all with constant worst-case time complexity and a throughput of 1 op/cycle
+- pipelined_heap_wrapper.sv: A wrapper for the pipelined_heap module that provides limited support for multiple ops/cycle and a more convenient heap interface
+- heap_ops_pkg.sv: Package defining the available heap operations (`Insert`, `ExtractMin`, `ExtractMax`)
+- bounded_wsjf_queue.sv: Implements the policy underlying SurgeProtector (Packet-Size Weighted Shortest-Job First) using the pipelined_heap data-structure
+
+**IMPORTANT**: SurgeProtector is provided as an optional feature that is disabled by default. To enable it, uncomment ``// `define ENABLE_SURGEPROTECTOR`` in `pigasus.py`.
diff --git a/pigasus/hardware/rtl_sim/src/reassembly/surge_protector/bounded_wsjf_queue.sv b/pigasus/hardware/rtl_sim/src/reassembly/surge_protector/bounded_wsjf_queue.sv
new file mode 100644
index 0000000..4747b62
--- /dev/null
+++ b/pigasus/hardware/rtl_sim/src/reassembly/surge_protector/bounded_wsjf_queue.sv
@@ -0,0 +1,72 @@
+`include "./src/struct_s.sv"
+
+import heap_ops_pkg::*;
+
+/**
+ * Implements a bounded WSJF queue using a pipelined heap.
+ */
+module bounded_wsjf_queue(
+ // General inputs
+ input logic clk,
+ input logic rst,
+
+ // Enque
+ input logic in_enque_en,
+ input ooo_flow_id_t in_enque_ooo_flow_id,
+ input heap_priority_t in_enque_priority,
+ output logic in_enque_ready,
+
+ // Deque-min
+ input logic out_deque_min_en,
+ output ooo_flow_id_t out_deque_min_ooo_flow_id,
+ output heap_priority_t out_deque_min_priority,
+ output logic out_deque_min_ready,
+
+ // Deque-max (request)
+ input logic in_deque_max_req_en,
+ output logic in_deque_max_req_ready,
+ // Deque-max (output)
+ input logic out_deque_max_en,
+ output ooo_flow_id_t out_deque_max_ooo_flow_id,
+ output heap_priority_t out_deque_max_priority,
+ output logic out_deque_max_ready,
+
+ // Feedback
+ output heap_size_t queue_size,
+ output logic queue_ready
+);
+
+// Instantiate the min-heap
+pipelined_heap_wrapper #(
+ .HEAP_BITMAP_WIDTH(HEAP_BITMAP_WIDTH),
+ .HEAP_MAX_NUM_ENTRIES(HEAP_MAX_NUM_ENTRIES),
+ .HEAP_ENTRY_VALUE_WIDTH(OOO_FLOW_ID_AWIDTH),
+ .HEAP_OP_DEQUE_PRIMARY_TYPE(HEAP_OP_DEQUE_MIN)
+) heap_wrapper (
+ // General inputs
+ .clk(clk),
+ .rst(rst),
+ // Enque
+ .in_enque_en(in_enque_en),
+ .in_enque_ready(in_enque_ready),
+ .in_enque_value(in_enque_ooo_flow_id),
+ .in_enque_priority(in_enque_priority),
+ // Deque-PRIMARY
+ .out_deque_primary_en(out_deque_min_en),
+ .out_deque_primary_ready(out_deque_min_ready),
+ .out_deque_primary_value(out_deque_min_ooo_flow_id),
+ .out_deque_primary_priority(out_deque_min_priority),
+ // Deque-SECONDARY (request)
+ .in_deque_secondary_req_en(in_deque_max_req_en),
+ .in_deque_secondary_req_ready(in_deque_max_req_ready),
+ // Deque-SECONDARY (response)
+ .out_deque_secondary_en(out_deque_max_en),
+ .out_deque_secondary_ready(out_deque_max_ready),
+ .out_deque_secondary_value(out_deque_max_ooo_flow_id),
+ .out_deque_secondary_priority(out_deque_max_priority),
+ // Feedback
+ .heap_size(queue_size),
+ .heap_ready(queue_ready)
+);
+
+endmodule
diff --git a/pigasus/hardware/rtl_sim/src/reassembly/surge_protector/ffs.sv b/pigasus/hardware/rtl_sim/src/reassembly/surge_protector/ffs.sv
new file mode 100644
index 0000000..4c0c1c2
--- /dev/null
+++ b/pigasus/hardware/rtl_sim/src/reassembly/surge_protector/ffs.sv
@@ -0,0 +1,59 @@
+/*
+ * Find first-set bits (MSB, LSB).
+ */
+module ffs #(
+ parameter WIDTH_LOG = 4,
+ localparam WIDTH = (1 << WIDTH_LOG)
+) (
+ input logic [WIDTH-1:0] x,
+ output logic [WIDTH_LOG-1:0] msb,
+ output logic [WIDTH_LOG-1:0] lsb,
+ output logic zero
+);
+
+integer i, width;
+logic [WIDTH-1:0] y;
+logic [WIDTH-1:0] part_msb;
+logic [WIDTH-1:0] part_lsb;
+
+// Zero input?
+assign zero = (x == 0);
+
+// Leading one (MSB) detector
+always @(*) begin
+ msb = 0;
+ part_msb = x;
+ for (i = (WIDTH_LOG-1); i >= 0; i = i - 1) begin
+ width = 1 << i;
+ if (|(part_msb >> width)) begin
+ msb[i] = 1;
+ end
+ part_msb = msb[i] ? (part_msb >> width) :
+ (part_msb & ((1'd1 << width) - 1'd1));
+ end
+end
+
+// Reverse bit order for LSB detection
+always @(*) begin
+ for (i = (WIDTH-1); i >= 0; i = i - 1) begin
+ y[i] = x[(WIDTH-1) - i];
+ end
+end
+
+// Trailing one (LSB) detector
+// TODO(natre): Optimize impl.
+always @(*) begin
+ lsb = 0;
+ part_lsb = y;
+ for (i = (WIDTH_LOG-1); i >= 0; i = i - 1) begin
+ width = 1 << i;
+ if (|(part_lsb >> width)) begin
+ lsb[i] = 1;
+ end
+ part_lsb = lsb[i] ? (part_lsb >> width) :
+ (part_lsb & ((1'd1 << width) - 1'd1));
+ end
+ lsb = ~lsb;
+end
+
+endmodule
diff --git a/pigasus/hardware/rtl_sim/src/reassembly/surge_protector/heap_ops_pkg.sv b/pigasus/hardware/rtl_sim/src/reassembly/surge_protector/heap_ops_pkg.sv
new file mode 100644
index 0000000..4f36af6
--- /dev/null
+++ b/pigasus/hardware/rtl_sim/src/reassembly/surge_protector/heap_ops_pkg.sv
@@ -0,0 +1,9 @@
+package heap_ops_pkg;
+
+typedef enum logic [1:0] {
+ HEAP_OP_ENQUE = 0,
+ HEAP_OP_DEQUE_MIN,
+ HEAP_OP_DEQUE_MAX
+} heap_op_t;
+
+endpackage
diff --git a/pigasus/hardware/rtl_sim/src/reassembly/surge_protector/pipelined_heap.sv b/pigasus/hardware/rtl_sim/src/reassembly/surge_protector/pipelined_heap.sv
new file mode 100644
index 0000000..8b4296e
--- /dev/null
+++ b/pigasus/hardware/rtl_sim/src/reassembly/surge_protector/pipelined_heap.sv
@@ -0,0 +1,890 @@
+import heap_ops_pkg::*;
+// `define DEBUG
+
+/**
+ * Implements a min+max heap using a two-level Hierarchical
+ * Find First Set (HFFS) Queue. The heap is fully pipelined,
+ * capable of performing one operation (enque, deque-min, or
+ * deque-max) every cycle.
+ *
+ * The heap structure (for HEAP_BITMAP_WIDTH=2) is as follows:
+ *
+ * L1 Bitmap: 1 0
+ * ________| |________
+ * | |
+ * --- ---
+ * L2 Bitmaps: 1 0 0 0
+ * |
+ * ------------
+ * Priority bucket: (Head, Tail)
+ * ______| |_______
+ * | |
+ * Heap Entries: A -> B -> C -> D -> E
+ *
+ * Free List: (Head, Tail)
+ * __| |__
+ * | |
+ * F -> ... -> Z
+ *
+ * A "1" in any bitmap indicates a non-empty priority bucket in the
+ * corresponding subtree rooted at that node. In order to find the
+ * min (or max) heap entry, we recursively follow the leftmost (or
+ * rightmost) set bit in each bitmap starting from the root.
+ *
+ * ------------------------------------------------------------------
+ * Glossary
+ * ------------------------------------------------------------------
+ * L1 bitmap is the root-level bitmap, where each bit corresponds
+ * to a unique L2 bitmap. The heap contains a single L1 bitmap
+ * and HEAP_BITMAP_WIDTH number of L2 bitmaps.
+ *
+ * L2 bitmaps are the leaf-level bitmaps, where each bit maps to
+ * a unique priority bucket. In all, the heap can represent
+ * HEAP_BITMAP_WIDTH^2 number of unique integer priorities.
+ *
+ * Priority Buckets (PBs) represent unique priorities. Each PB stores
+ * a 2-tuple of (head, tail) pointers (to heap entries); these are
+ * in turn used to implement a singly-linked list. If the priority
+ * bucket is empty, both pointers are invalid.
+ *
+ * Heap Entries (HEs) serve two purposes: they hold the value of the
+ * corresponding object, and they store a pointer to another heap
+ * entry. When an object is enqueued into the heap, it is first
+ * allocated a heap entry (from the Free List), which is then
+ * inserted into the corresponding priority bucket.
+ *
+ * Free List (FL) implements a singly-linked list of pointers to
+ * unused HEs which can be allocated to new objects.
+ *
+ * ------------------------------------------------------------------
+ * Semantics
+ * ------------------------------------------------------------------
+ * Both enque and deque-* operations are implemented using a 5-stage
+ * pipeline with the following steps:
+ *
+ * Stage 0: Register inputs
+ * Stage 1: Compute the index of the L2 bitmap which this operation
+ * maps to (for enque, this is the given priority divided
+ * by HEAP_BITMAP_WIDTH; for deque-*, this is computed by
+ * performing FFS/FLS on the L1 bitmap), and issue a read
+ * for corresponding L2 bitmap.
+ *
+ * Stage 2: Compute the index of the PB which this operation maps to
+ * (for enque, this is just the given priority; for deque-*,
+ * this is computed by performing FFS/FLS on the L2 bitmap),
+ * and issue a read for the corresponding priority bucket.
+ *
+ * Stage 3: For enque, read the free list head pointer (i.e. allocate
+ * a new heap entry). For deque, read the PB's head pointer.
+ *
+ * Stage 4: Commit changes. For enque, updates: (a) the FL to commit
+ * the "pop" in the previous stage, (b) the allocated heap
+ * entry with the object data and next pointer, (c) the PB,
+ * if required, and (d) the {L1, L2} bitmaps, if required.
+ * The process is similar for deque-*, except it involves
+ * a "push" to the FL, and no HEs need to be updated. All
+ * output op signals are registered.
+ *
+ * Note that there may be data hazards in the pipeline (e.g., an L2
+ * bitmap that is being read in Stage 2 may be concurrently updated
+ * in Stage 4). As such, the heap implements write forwarding logic
+ * across pipeline stages to ensure stall-free operation.
+ *
+ * Notes:
+ * 1. Operations that would result in invalid state (e.g., enques into
+ * a full heap, or deques from an empty heap) are ignored. As such,
+ * it is the overlying module's responsibility to ensure these
+ * situations don't arise.
+ *
+ * 2. Once rst is pulsed, existing ops in the pipeline are discarded.
+ * Must also wait until "ready" is re-asserted to issue a new op.
+ */
+module pipelined_heap #(
+ parameter HEAP_BITMAP_WIDTH,
+ parameter HEAP_MAX_NUM_ENTRIES,
+ parameter HEAP_ENTRY_VALUE_WIDTH,
+ localparam HEAP_NUM_PRIORITIES = (HEAP_BITMAP_WIDTH ** 2),
+ localparam HEAP_ENTRY_AWIDTH = ($clog2(HEAP_MAX_NUM_ENTRIES)),
+ localparam HEAP_PRIORITY_AWIDTH = ($clog2(HEAP_NUM_PRIORITIES))
+) (
+ // General I/O
+ input logic clk,
+ input logic rst,
+ output logic ready,
+
+ // Operation input
+ input logic in_en,
+ input heap_op_t in_op_type,
+ input logic [HEAP_ENTRY_VALUE_WIDTH-1:0] in_he_value,
+ input logic [HEAP_PRIORITY_AWIDTH-1:0] in_he_priority,
+
+ // Operation output
+ output logic out_valid,
+ output heap_op_t out_op_type,
+ output logic [HEAP_ENTRY_VALUE_WIDTH-1:0] out_he_value,
+ output logic [HEAP_PRIORITY_AWIDTH-1:0] out_he_priority,
+
+ // Feedback signals
+ output logic [HEAP_ENTRY_AWIDTH:0] size,
+ output logic [2:0] num_ops_enque,
+ output logic [2:0] num_ops_deque_min,
+ output logic [2:0] num_ops_deque_max
+);
+
+integer i;
+
+/**
+ * Derived parameters.
+ */
+localparam NUM_BITMAPS_L1 = 1;
+localparam NUM_BITMAPS_L2 = (HEAP_BITMAP_WIDTH);
+localparam L2_BITMAP_IDX_MASK = (HEAP_BITMAP_WIDTH - 1);
+localparam HEAP_ENTRY_PTR_T_WIDTH = (HEAP_ENTRY_AWIDTH + 1);
+localparam FREE_LIST_ENTRY_T_WIDTH = (HEAP_ENTRY_PTR_T_WIDTH);
+localparam HEAP_LOG_BITMAP_WIDTH = ($clog2(HEAP_BITMAP_WIDTH));
+localparam LIST_T_WIDTH = (HEAP_ENTRY_PTR_T_WIDTH * 2);
+localparam INVALID_PTR = {HEAP_ENTRY_PTR_T_WIDTH{1'b1}};
+localparam HEAP_ENTRY_T_WIDTH = (HEAP_ENTRY_VALUE_WIDTH + HEAP_ENTRY_PTR_T_WIDTH);
+
+/**
+ * Local typedefs.
+ */
+typedef logic [HEAP_BITMAP_WIDTH-1:0] bitmap_t;
+typedef logic [HEAP_PRIORITY_AWIDTH-1:0] heap_priority_t;
+typedef logic [HEAP_ENTRY_PTR_T_WIDTH-1:0] heap_entry_ptr_t;
+typedef logic [HEAP_ENTRY_VALUE_WIDTH-1:0] heap_entry_value_t;
+typedef struct packed { heap_entry_ptr_t next; } free_list_entry_t;
+typedef struct packed { heap_entry_ptr_t head; heap_entry_ptr_t tail; } list_t;
+typedef struct packed { heap_entry_value_t value; heap_entry_ptr_t next; } heap_entry_t;
+
+typedef enum logic [1:0] {
+ FSM_STATE_IDLE,
+ FSM_STATE_INIT,
+ FSM_STATE_READY
+} fsm_state_t;
+
+// Heap state
+list_t free_list; // Free list
+bitmap_t l1_bitmap; // L1 bitmap
+bitmap_t l2_bitmaps[NUM_BITMAPS_L2-1:0]; // L2 bitmaps
+
+// Free list
+logic fl_rden;
+logic fl_wren;
+logic fl_rdwr_conflict;
+logic [FREE_LIST_ENTRY_T_WIDTH-1:0] fl_q;
+logic [HEAP_ENTRY_AWIDTH-1:0] fl_rdaddress;
+logic [HEAP_ENTRY_AWIDTH-1:0] fl_wraddress;
+logic [FREE_LIST_ENTRY_T_WIDTH-1:0] fl_data;
+logic [HEAP_ENTRY_AWIDTH-1:0] fl_wraddress_counter_r;
+
+// Heap entries
+logic he_rden;
+logic he_wren;
+logic he_rdwr_conflict;
+logic [HEAP_ENTRY_T_WIDTH-1:0] he_q;
+logic [HEAP_ENTRY_T_WIDTH-1:0] he_data;
+logic [HEAP_ENTRY_AWIDTH-1:0] he_rdaddress;
+logic [HEAP_ENTRY_AWIDTH-1:0] he_wraddress;
+
+// Priority buckets
+logic pb_rden;
+logic pb_wren;
+logic pb_rdwr_conflict;
+logic [LIST_T_WIDTH-1:0] pb_q;
+logic [LIST_T_WIDTH-1:0] pb_data;
+logic [HEAP_PRIORITY_AWIDTH-1:0] pb_rdaddress;
+logic [HEAP_PRIORITY_AWIDTH-1:0] pb_wraddress;
+logic [HEAP_PRIORITY_AWIDTH-1:0] pb_wraddress_counter_r;
+
+/**
+ * Housekeeping.
+ */
+// Common pipeline metadata
+logic reg_en_q[4:0];
+heap_op_t reg_op_type_q[4:0];
+logic reg_is_enque_q[4:0];
+heap_priority_t reg_priority_q[4:0];
+heap_entry_value_t reg_he_value_q[4:0];
+bitmap_t reg_l2_bitmap_q[4:0];
+logic reg_is_deque_min_q[4:0];
+logic reg_is_deque_max_q[4:0];
+logic [HEAP_LOG_BITMAP_WIDTH-1:0] reg_l1_bitmap_idx_q[4:0];
+logic [HEAP_BITMAP_WIDTH-1:0] reg_l2_bitmap_mask_q[4:0];
+logic [HEAP_BITMAP_WIDTH-1:0] reg_l1_bitmap_mask_q[4:0];
+
+// Stage 0 metadata
+bitmap_t reg_l1_bitmap_q0 = 0;
+
+// Stage 1 metadata
+logic en_q1;
+logic [HEAP_LOG_BITMAP_WIDTH-1:0] l1_bitmap_idx_q1;
+
+// Stage 2 metadata
+logic reg_pb_rdwr_conflict_q2 = 0;
+logic en_q2;
+logic [HEAP_LOG_BITMAP_WIDTH-1:0] l2_bitmap_idx_q2;
+
+// Stage 3 metadata
+logic reg_he_rdwr_conflict_q3 = 0;
+logic reg_fl_rdwr_conflict_q3 = 0;
+list_t reg_pb_q3 = 0;
+logic en_q3;
+logic is_enque_q3;
+logic pb_empty_q3;
+logic fl_empty_q3;
+
+// Stage 4 metadata
+free_list_entry_t reg_fl_data_q4 = 0;
+heap_entry_t reg_he_data_q4 = 0;
+list_t reg_pb_data_q4 = 0;
+logic en_q4;
+logic pb_empty_q4;
+heap_entry_value_t he_value_q4;
+bitmap_t l2_bitmap_data_q4;
+
+// Init signals
+fsm_state_t state = FSM_STATE_IDLE;
+logic pb_init_done_r = 0;
+logic fl_init_done_r = 0;
+logic pb_init_done;
+logic fl_init_done;
+fsm_state_t state_next;
+
+// Intermediate signals
+bitmap_t int_l1_bitmap;
+list_t int_free_list;
+free_list_entry_t int_fl_q;
+free_list_entry_t int_fl_data;
+heap_entry_t int_he_q;
+heap_entry_t int_he_data;
+list_t int_pb_q;
+list_t int_pb_data;
+logic [HEAP_ENTRY_AWIDTH:0] int_size;
+
+// Miscellaneous signals
+logic [HEAP_LOG_BITMAP_WIDTH-1:0] ffs_l1_inst_msb;
+logic [HEAP_LOG_BITMAP_WIDTH-1:0] ffs_l1_inst_lsb;
+logic ffs_l1_inst_zero;
+logic [HEAP_LOG_BITMAP_WIDTH-1:0] ffs_l2_inst_msb;
+logic [HEAP_LOG_BITMAP_WIDTH-1:0] ffs_l2_inst_lsb;
+logic ffs_l2_inst_zero;
+
+assign fl_data = int_fl_data;
+assign he_data = int_he_data;
+assign pb_data = int_pb_data;
+assign pb_empty_q4 = (reg_pb_q3.head == reg_pb_q3.tail);
+
+// Output assignments
+assign out_valid = reg_en_q[4];
+assign out_op_type = reg_op_type_q[4];
+assign out_he_value = reg_he_value_q[4];
+assign out_he_priority = reg_priority_q[4];
+assign ready = !rst & (state == FSM_STATE_READY);
+
+initial begin
+ // Initialize the free list
+ free_list.head = 0;
+ free_list.tail = (HEAP_MAX_NUM_ENTRIES - 1);
+
+ // Initialize the bitmaps
+ l1_bitmap = 0;
+ for (i = 0; i < NUM_BITMAPS_L2; i = i + 1) begin
+ l2_bitmaps[i] = 0;
+ end
+
+ // Initialize pipeline stages
+ for (i = 0; i <= 4; i = i + 1) begin
+ reg_en_q[i] = 0;
+ reg_op_type_q[i] = HEAP_OP_ENQUE;
+ reg_is_enque_q[i] = 0;
+ reg_priority_q[i] = 0;
+ reg_he_value_q[i] = 0;
+ reg_l2_bitmap_q[i] = 0;
+ reg_is_deque_min_q[i] = 0;
+ reg_is_deque_max_q[i] = 0;
+ reg_l1_bitmap_idx_q[i] = 0;
+ reg_l2_bitmap_mask_q[i] = 0;
+ reg_l1_bitmap_mask_q[i] = 0;
+ end
+
+ // Initialize outputs
+ size = 0;
+end
+
+always @(*) begin
+ num_ops_enque = 0;
+ num_ops_deque_min = 0;
+ num_ops_deque_max = 0;
+
+ // Compute the outstanding op counts. The final
+ // (output) pipeline stage is excluded since it
+ // represents an already-committed operation.
+ for (i = 0; i <= 3; i = i + 1) begin
+ num_ops_enque = (
+ num_ops_enque +
+ (reg_en_q[i] & reg_is_enque_q[i]));
+
+ num_ops_deque_min = (
+ num_ops_deque_min +
+ (reg_en_q[i] & reg_is_deque_min_q[i]));
+
+ num_ops_deque_max = (
+ num_ops_deque_max +
+ (reg_en_q[i] & reg_is_deque_max_q[i]));
+ end
+end
+
+always @(*) begin
+ en_q1 = 0;
+ l1_bitmap_idx_q1 = 0;
+ en_q2 = 0;
+ l2_bitmap_idx_q2 = 0;
+ en_q3 = 0;
+ is_enque_q3 = 0;
+ pb_empty_q3 = 0;
+ fl_empty_q3 = 0;
+ en_q4 = 0;
+ he_value_q4 = 0;
+ l2_bitmap_data_q4 = reg_l2_bitmap_q[3];
+
+ int_l1_bitmap = l1_bitmap;
+ int_free_list = free_list;
+ int_fl_q = fl_q;
+ int_he_q = he_q;
+ int_pb_q = pb_q;
+ int_fl_data = 0;
+ int_he_data = 0;
+ int_pb_data = reg_pb_q3;
+ int_size = size;
+
+ fl_rden = 0;
+ fl_wren = 0;
+ fl_rdaddress = 0;
+ fl_wraddress = 0;
+ fl_rdwr_conflict = 0;
+
+ pb_rden = 0;
+ pb_wren = 0;
+ pb_rdaddress = 0;
+ pb_wraddress = 0;
+ pb_rdwr_conflict = 0;
+
+ he_rden = 0;
+ he_wren = 0;
+ he_rdaddress = 0;
+ he_wraddress = 0;
+ he_rdwr_conflict = 0;
+
+ state_next = state;
+ fl_init_done = fl_init_done_r;
+ pb_init_done = pb_init_done_r;
+
+ // Initialization state
+ if (state == FSM_STATE_INIT) begin
+ if (!fl_init_done_r) begin
+ fl_wren = 1;
+ fl_wraddress = fl_wraddress_counter_r;
+ int_fl_data.next = fl_wraddress_counter_r + 1;
+
+ fl_init_done = (fl_wraddress_counter_r ==
+ (HEAP_MAX_NUM_ENTRIES - 1));
+ end
+ if (!pb_init_done_r) begin
+ pb_wren = 1;
+ int_pb_data.head = INVALID_PTR;
+ int_pb_data.tail = INVALID_PTR;
+ pb_wraddress = pb_wraddress_counter_r;
+
+ pb_init_done = (pb_wraddress_counter_r ==
+ (HEAP_NUM_PRIORITIES - 1));
+ end
+ // Done initializing both the free list and
+ // priority buckets. Proceed to ready state.
+ if (fl_init_done & pb_init_done) begin
+ state_next = FSM_STATE_READY;
+ end
+ end
+ else begin
+ /**
+ * Stage 4: Perform writes: update the L1, L2 bitmaps, the
+ * priority bucket, the free list and free list entry, and
+ * the heap entry.
+ */
+ if (reg_en_q[3]) begin
+ en_q4 = 1;
+ // The heap entry was updated on the last cycle
+ if (reg_en_q[4] && reg_he_rdwr_conflict_q3) begin
+ int_he_q = reg_he_data_q4;
+ end
+ // The free list entry was updated on the last cycle
+ if (reg_en_q[4] && reg_fl_rdwr_conflict_q3) begin
+ int_fl_q = reg_fl_data_q4;
+ end
+
+ // Perform enque
+ if (reg_is_enque_q[3]) begin
+ // Compute updated bitmaps
+ l2_bitmap_data_q4 = (reg_l2_bitmap_q[3] | reg_l2_bitmap_mask_q[3]);
+ int_l1_bitmap = (l1_bitmap | reg_l1_bitmap_mask_q[3]);
+
+ // Update the heap entry
+ he_wren = 1;
+ he_wraddress = free_list.head[HEAP_ENTRY_AWIDTH-1:0];
+ int_he_data.next = reg_pb_q3.head;
+ int_he_data.value = reg_he_value_q[3];
+
+ // Update the free list (head and tail)
+ if (free_list.head == free_list.tail) begin
+ int_free_list.head = INVALID_PTR;
+ int_free_list.tail = INVALID_PTR;
+ end
+ else begin
+ // Update head, tail is unchanged
+ int_free_list.head = int_fl_q.next;
+ end
+
+ // Update the priority bucket
+ pb_wren = 1;
+ pb_wraddress = reg_priority_q[3];
+ if (reg_pb_q3.head == INVALID_PTR) begin
+ int_pb_data.tail = he_wraddress;
+ end
+ int_pb_data.head = he_wraddress;
+
+ // Update the value
+ he_value_q4 = reg_he_value_q[3];
+
+ // Update the heap size
+ int_size = size + 1'b1;
+ end
+ // Perform deque
+ else begin
+ // Update the priority bucket
+ pb_wren = 1;
+ pb_wraddress = reg_priority_q[3];
+ if (reg_pb_q3.head == reg_pb_q3.tail) begin
+ int_pb_data.head = INVALID_PTR;
+ int_pb_data.tail = INVALID_PTR;
+ end
+ else begin
+ // Update head, tail is unchanged
+ int_pb_data.head = int_he_q.next;
+ end
+
+ // Update the free list
+ int_free_list.tail = reg_pb_q3.head;
+ if (free_list.head == INVALID_PTR) begin
+ int_free_list.head = reg_pb_q3.head;
+ end
+ else begin
+ // Update tail free list entry
+ fl_wren = 1;
+ int_fl_data.next = reg_pb_q3.head;
+ fl_wraddress = free_list.tail[HEAP_ENTRY_AWIDTH-1:0];
+ end
+
+ // Update the value
+ he_value_q4 = int_he_q.value;
+
+ // Compute updated bitmaps
+ if (pb_empty_q4) begin
+ l2_bitmap_data_q4 = (reg_l2_bitmap_q[3] &
+ ~reg_l2_bitmap_mask_q[3]);
+
+ if (l2_bitmap_data_q4 == 0) begin
+ int_l1_bitmap = (l1_bitmap &
+ ~reg_l1_bitmap_mask_q[3]);
+ end
+ end
+
+ // Update the heap size
+ int_size = size - 1'b1;
+ end
+ end
+
+ /**
+ * Stage 3: Read the free list and the corresponding heap entry.
+ */
+ if (reg_en_q[2]) begin
+ // The target priority bucket is being updated on this cycle
+ if (en_q4 && (reg_priority_q[3] == reg_priority_q[2])) begin
+ int_pb_q = int_pb_data;
+ end
+ // The PB was updated on the last cycle (and thus never read)
+ else if (reg_en_q[4] && reg_pb_rdwr_conflict_q2) begin
+ int_pb_q = reg_pb_data_q4;
+ end
+ // Fallthrough: default to pb_q
+
+ pb_empty_q3 = (int_pb_q.head == INVALID_PTR);
+ fl_empty_q3 = (int_free_list.head == INVALID_PTR);
+
+ // Disable pipeline stage op if:
+ // 1. Enqueing and the free list is empty, OR
+ // 2. Dequeing and the priority bucket is empty
+ en_q3 = !((reg_is_enque_q[2] && fl_empty_q3) ||
+ (!reg_is_enque_q[2] && pb_empty_q3));
+
+ // Dequeing, read the heap entry
+ if (en_q3 && !reg_is_enque_q[2]) begin
+ he_rden = 1;
+ he_rdaddress = int_pb_q.head[HEAP_ENTRY_AWIDTH-1:0];
+ if (he_wren && (he_wraddress == he_rdaddress)) begin
+ he_rden = 0; // Disable reads during writes
+ he_rdwr_conflict = 1;
+ end
+ end
+ // Enqueing, read the free list entry
+ if (en_q3 && reg_is_enque_q[2]) begin
+ fl_rden = 1;
+ fl_rdaddress = int_free_list.head[HEAP_ENTRY_AWIDTH-1:0];
+ if (fl_wren && (fl_wraddress == fl_rdaddress)) begin
+ fl_rden = 0; // Disable reads during writes
+ fl_rdwr_conflict = 1;
+ end
+ end
+ end
+
+ /**
+ * Stage 2: Compute L2 bitmap index and read the
+ * corresponding priority bucket (head and tail).
+ */
+ if (reg_en_q[1]) begin
+ if (reg_is_enque_q[1]) begin
+ en_q2 = 1;
+ pb_rden = 1;
+ pb_rdaddress = reg_priority_q[1];
+ l2_bitmap_idx_q2 = (reg_priority_q[1] & L2_BITMAP_IDX_MASK);
+ end
+ else if (!ffs_l2_inst_zero) begin
+ en_q2 = 1;
+ pb_rden = 1;
+ l2_bitmap_idx_q2 = (
+ (reg_op_type_q[1] == HEAP_OP_DEQUE_MAX) ?
+ ffs_l2_inst_msb : ffs_l2_inst_lsb
+ );
+ pb_rdaddress = (l2_bitmap_idx_q2 +
+ (reg_l1_bitmap_idx_q[1] << HEAP_LOG_BITMAP_WIDTH));
+ end
+ // Disable priority bucket reads during conflicting writes
+ if (en_q4 && (reg_priority_q[3] == pb_rdaddress)) begin
+ pb_rdwr_conflict = 1;
+ pb_rden = 0;
+ end
+ end
+
+ /**
+ * Stage 1: Compute L1 bitmap index
+ * and read corresponding L2 bitmap.
+ */
+ if (reg_en_q[0]) begin
+ if (reg_is_enque_q[0]) begin
+ en_q1 = 1;
+ l1_bitmap_idx_q1 = (
+ reg_priority_q[0] >> HEAP_LOG_BITMAP_WIDTH);
+ end
+ else if (!ffs_l1_inst_zero) begin
+ en_q1 = 1;
+ l1_bitmap_idx_q1 = (
+ (reg_op_type_q[0] == HEAP_OP_DEQUE_MAX) ?
+ ffs_l1_inst_msb : ffs_l1_inst_lsb
+ );
+ end
+ end
+ end
+end
+
+always @(posedge clk) begin
+ if (rst) begin
+ // Reset free list
+ free_list.head <= 0;
+ free_list.tail <= (HEAP_MAX_NUM_ENTRIES - 1);
+
+ // Reset bitmaps
+ l1_bitmap = 0;
+ for (i = 0; i < NUM_BITMAPS_L2; i = i + 1) begin
+ l2_bitmaps[i] <= 0;
+ end
+
+ // Reset pipeline stages
+ for (i = 0; i <= 4; i = i + 1) begin
+ reg_en_q[i] <= 0;
+ end
+
+ // Reset init signals
+ fl_init_done_r <= 0;
+ pb_init_done_r <= 0;
+ fl_wraddress_counter_r <= 0;
+ pb_wraddress_counter_r <= 0;
+
+ // Reset FSM state and size
+ state <= FSM_STATE_INIT;
+ size <= 0;
+ end
+ else begin
+ /**
+ * Stage 4: Perform writes: update the L1, L2 bitmaps, the
+ * priority bucket, the free list and free list entry, and
+ * the heap entry.
+ */
+ reg_en_q[4] <= en_q4;
+ reg_he_value_q[4] <= he_value_q4;
+ reg_op_type_q[4] <= reg_op_type_q[3];
+ reg_is_enque_q[4] <= reg_is_enque_q[3];
+ reg_priority_q[4] <= reg_priority_q[3];
+ reg_is_deque_min_q[4] <= reg_is_deque_min_q[3];
+ reg_is_deque_max_q[4] <= reg_is_deque_max_q[3];
+
+ reg_pb_data_q4 <= int_pb_data;
+ reg_he_data_q4 <= int_he_data;
+ reg_fl_data_q4 <= int_fl_data;
+ reg_l2_bitmap_q[4] <= l2_bitmap_data_q4;
+ reg_l1_bitmap_idx_q[4] <= reg_l1_bitmap_idx_q[3];
+ reg_l2_bitmap_mask_q[4] <= reg_l2_bitmap_mask_q[3];
+ reg_l1_bitmap_mask_q[4] <= reg_l1_bitmap_mask_q[3];
+
+ // Update the heap size, free list and bitmaps
+ size <= int_size;
+ free_list <= int_free_list;
+ l1_bitmap <= int_l1_bitmap;
+ if (en_q4) begin
+ l2_bitmaps[reg_l1_bitmap_idx_q[3]] <= l2_bitmap_data_q4;
+ end
+
+ `ifdef DEBUG
+ if (reg_en_q[3] && !reg_is_enque_q[3] && en_q4) begin
+ $display(
+ "[PH] Deque is updating (L1_idx, priority) = (%0d, %0d)",
+ reg_l1_bitmap_idx_q[3], reg_priority_q[3],
+ " from %b to %b, pb contents %b",
+ reg_l2_bitmap_q[3], l2_bitmap_data_q4, int_pb_data);
+ end
+ `endif
+
+ /**
+ * Stage 3: Read the free list and corresponding heap entry.
+ */
+ reg_en_q[3] <= en_q3;
+ reg_op_type_q[3] <= reg_op_type_q[2];
+ reg_is_enque_q[3] <= reg_is_enque_q[2];
+ reg_priority_q[3] <= reg_priority_q[2];
+ reg_he_value_q[3] <= reg_he_value_q[2];
+ reg_is_deque_min_q[3] <= reg_is_deque_min_q[2];
+ reg_is_deque_max_q[3] <= reg_is_deque_max_q[2];
+
+ reg_pb_q3 <= int_pb_q;
+ reg_he_rdwr_conflict_q3 <= he_rdwr_conflict;
+ reg_fl_rdwr_conflict_q3 <= fl_rdwr_conflict;
+ reg_l1_bitmap_idx_q[3] <= reg_l1_bitmap_idx_q[2];
+ reg_l2_bitmap_mask_q[3] <= reg_l2_bitmap_mask_q[2];
+ reg_l1_bitmap_mask_q[3] <= reg_l1_bitmap_mask_q[2];
+
+ reg_l2_bitmap_q[3] <= (
+ (en_q4 && (reg_l1_bitmap_idx_q[3] == reg_l1_bitmap_idx_q[2])) ?
+ l2_bitmap_data_q4 : reg_l2_bitmap_q[2]);
+
+ `ifdef DEBUG
+ if (reg_en_q[2] && !en_q3) begin
+ if (reg_is_deque_min_q[2]) begin
+ $display("[PH] Deque-min rejected at Stage 2->3, priority %0d,",
+ reg_priority_q[2], " pb contents %b", int_pb_q);
+ end
+ else if (reg_is_deque_max_q[2]) begin
+ $display("[PH] Deque-max rejected at Stage 2->3, priority %0d,",
+ reg_priority_q[2], " pb contents %b", int_pb_q);
+ end
+ else begin
+ $display("[PH] Enque rejected at Stage 2->3, value %0d",
+ reg_he_value_q[2]);
+ end
+ end
+ `endif
+
+ /**
+ * Stage 2: Compute L2 bitmap index and read the
+ * corresponding priority bucket (head and tail).
+ */
+ reg_en_q[2] <= en_q2;
+ reg_priority_q[2] <= pb_rdaddress;
+ reg_op_type_q[2] <= reg_op_type_q[1];
+ reg_is_enque_q[2] <= reg_is_enque_q[1];
+ reg_he_value_q[2] <= reg_he_value_q[1];
+ reg_is_deque_min_q[2] <= reg_is_deque_min_q[1];
+ reg_is_deque_max_q[2] <= reg_is_deque_max_q[1];
+
+ reg_pb_rdwr_conflict_q2 <= pb_rdwr_conflict;
+ reg_l1_bitmap_idx_q[2] <= reg_l1_bitmap_idx_q[1];
+ reg_l2_bitmap_mask_q[2] <= (1 << l2_bitmap_idx_q2);
+ reg_l1_bitmap_mask_q[2] <= reg_l1_bitmap_mask_q[1];
+
+ reg_l2_bitmap_q[2] <= (
+ (en_q4 && (reg_l1_bitmap_idx_q[3] == reg_l1_bitmap_idx_q[1])) ?
+ l2_bitmap_data_q4 : reg_l2_bitmap_q[1]);
+
+ `ifdef DEBUG
+ if (reg_en_q[1] && !en_q2) begin
+ if (reg_is_deque_min_q[1]) begin
+ $display("[PH] Deque-min rejected at Stage 1->2");
+ end
+ else if (reg_is_deque_max_q[1]) begin
+ $display("[PH] Deque-max rejected at Stage 1->2");
+ end
+ end
+ `endif
+
+ /**
+ * Stage 1: Compute L1 bitmap index
+ * and read corresponding L2 bitmap.
+ */
+ reg_en_q[1] <= en_q1;
+ reg_op_type_q[1] <= reg_op_type_q[0];
+ reg_is_enque_q[1] <= reg_is_enque_q[0];
+ reg_priority_q[1] <= reg_priority_q[0];
+ reg_he_value_q[1] <= reg_he_value_q[0];
+ reg_is_deque_min_q[1] <= reg_is_deque_min_q[0];
+ reg_is_deque_max_q[1] <= reg_is_deque_max_q[0];
+
+ reg_l1_bitmap_idx_q[1] <= l1_bitmap_idx_q1;
+ reg_l1_bitmap_mask_q[1] <= (1 << l1_bitmap_idx_q1);
+
+ reg_l2_bitmap_mask_q[1] <= 0;
+ reg_l2_bitmap_q[1] <= (
+ (en_q4 && (reg_l1_bitmap_idx_q[3] == l1_bitmap_idx_q1)) ?
+ l2_bitmap_data_q4 : l2_bitmaps[l1_bitmap_idx_q1]);
+
+ `ifdef DEBUG
+ if (reg_en_q[0] && reg_is_deque_min_q[0] && !en_q1) begin
+ $display("[PH] Deque-min rejected at Stage 0->1");
+ end
+ else if (reg_en_q[0] && reg_is_deque_max_q[0] && !en_q1) begin
+ $display("[PH] Deque-max rejected at Stage 0->1");
+ end
+ `endif
+
+ /**
+ * Stage 0: Register inputs.
+ */
+ reg_en_q[0] <= (ready & in_en);
+ reg_op_type_q[0] <= in_op_type;
+ reg_he_value_q[0] <= in_he_value;
+ reg_priority_q[0] <= in_he_priority;
+ reg_is_enque_q[0] <= (in_op_type == HEAP_OP_ENQUE);
+ reg_is_deque_min_q[0] <= (in_op_type == HEAP_OP_DEQUE_MIN);
+ reg_is_deque_max_q[0] <= (in_op_type == HEAP_OP_DEQUE_MAX);
+
+ reg_l2_bitmap_q[0] <= 0;
+ reg_l1_bitmap_idx_q[0] <= 0;
+ reg_l2_bitmap_mask_q[0] <= 0;
+ reg_l1_bitmap_mask_q[0] <= 0;
+ reg_l1_bitmap_q0 <= int_l1_bitmap;
+
+ `ifdef DEBUG
+ if (in_en) begin
+ if (in_op_type == HEAP_OP_ENQUE) begin
+ $display("[PH] Inserting value %0d with priority %0d",
+ in_he_value, in_he_priority);
+ end
+ else if (in_op_type == HEAP_OP_DEQUE_MIN) begin
+ $display("[PH] Dequeing-min, heap size is: %0d, ",
+ size, "L1 bitmap is: %b", int_l1_bitmap);
+ end
+ else begin
+ $display("[PH] Dequeing-max, heap size is: %0d, ",
+ size, "L1 bitmap is: %b", int_l1_bitmap);
+ end
+ end
+ `endif
+
+ // Register init signals
+ fl_init_done_r <= fl_init_done;
+ pb_init_done_r <= pb_init_done;
+ fl_wraddress_counter_r <= fl_wraddress + 1;
+ pb_wraddress_counter_r <= pb_wraddress + 1;
+
+ `ifdef DEBUG
+ if ((state == FSM_STATE_INIT) &&
+ (state_next == FSM_STATE_READY)) begin
+ $display("[PH] Heap initialization complete");
+ end
+ `endif
+
+ // Update FSM state
+ state <= state_next;
+ end
+end
+
+// Free-list entries
+bram_simple2port #(
+ .DWIDTH(FREE_LIST_ENTRY_T_WIDTH),
+ .AWIDTH(HEAP_ENTRY_AWIDTH),
+ .DEPTH(HEAP_MAX_NUM_ENTRIES),
+ .IS_OUTDATA_REG(0)
+)
+free_list_entries (
+ .clock(clk),
+ .data(fl_data),
+ .rden(fl_rden),
+ .wren(fl_wren),
+ .rdaddress(fl_rdaddress),
+ .wraddress(fl_wraddress),
+ .q(fl_q)
+);
+
+// Heap entries
+bram_simple2port #(
+ .DWIDTH(HEAP_ENTRY_T_WIDTH),
+ .AWIDTH(HEAP_ENTRY_AWIDTH),
+ .DEPTH(HEAP_MAX_NUM_ENTRIES),
+ .IS_OUTDATA_REG(0)
+)
+heap_entries (
+ .clock(clk),
+ .data(he_data),
+ .rden(he_rden),
+ .wren(he_wren),
+ .rdaddress(he_rdaddress),
+ .wraddress(he_wraddress),
+ .q(he_q)
+);
+
+// Priority buckets
+bram_simple2port #(
+ .DWIDTH(LIST_T_WIDTH),
+ .AWIDTH(HEAP_PRIORITY_AWIDTH),
+ .DEPTH(HEAP_NUM_PRIORITIES),
+ .IS_OUTDATA_REG(0)
+)
+priority_buckets (
+ .clock(clk),
+ .data(pb_data),
+ .rden(pb_rden),
+ .wren(pb_wren),
+ .rdaddress(pb_rdaddress),
+ .wraddress(pb_wraddress),
+ .q(pb_q)
+);
+
+// L1 FFS
+ffs #(
+ .WIDTH_LOG(HEAP_LOG_BITMAP_WIDTH)
+)
+ffs_l1_inst (
+ .x(reg_l1_bitmap_q0),
+ .msb(ffs_l1_inst_msb),
+ .lsb(ffs_l1_inst_lsb),
+ .zero(ffs_l1_inst_zero)
+);
+
+// L2 FFS
+ffs #(
+ .WIDTH_LOG(HEAP_LOG_BITMAP_WIDTH)
+)
+ffs_l2_inst (
+ .x(reg_l2_bitmap_q[1]),
+ .msb(ffs_l2_inst_msb),
+ .lsb(ffs_l2_inst_lsb),
+ .zero(ffs_l2_inst_zero)
+);
+
+endmodule
diff --git a/pigasus/hardware/rtl_sim/src/reassembly/surge_protector/pipelined_heap_wrapper.sv b/pigasus/hardware/rtl_sim/src/reassembly/surge_protector/pipelined_heap_wrapper.sv
new file mode 100644
index 0000000..3223605
--- /dev/null
+++ b/pigasus/hardware/rtl_sim/src/reassembly/surge_protector/pipelined_heap_wrapper.sv
@@ -0,0 +1,367 @@
+import heap_ops_pkg::*;
+// `define DEBUG
+
+/**
+ * Implements a wrapper for pipelined_heap.
+ *
+ * Since the pipelined heap itself offers a rather simplistic inter-
+ * face, this module augments it with some additional functionality.
+ * In particular, the wrapper:
+ *
+ * 1. Allows multiple operations (enque, deque-min, or deque-max) to
+ * be performed every cycle. This is implemented by maintaining a
+ * small input FIFO for enque ops, and two small output FIFOs for
+ * deque ops (one for deque-PRIMARY, another for deque-SECONDARY).
+ * Performing an operation involves simply enqueing into/dequeing
+ * from the appropriate FIFO.
+ *
+ * 2. Reduces the common-case latency of deque-PRIMARY ops. The module
+ * opportunistically (i.e., automatically) issues deque-PRIMARY ops
+ * whenever possible, storing the result in the corresponding out-
+ * FIFO. This allows the result to be accessed in 1 cycle (instead
+ * of 5 cycles, the end-to-end latency cost of the heap pipeline).
+ * Note that deque-SECONDARY ops are not performed automatically,
+ * but can be issued through the request/response interface.
+ *
+ * 3. Ensures that ops are not "lost". Any failed deque-* operations
+ * (due to, e.g., two back-to-back deques landing on a priority
+ * bucket with just one heap entry) are automatically re-issued.
+ *
+ * TODO(natre): rst is implemented for the pipelined heap, but not
+ * for the wrapper itself. Fix this (OK for now since global reset
+ * is not implemented anywhere yet).
+ */
+module pipelined_heap_wrapper #(
+ parameter HEAP_BITMAP_WIDTH,
+ parameter HEAP_MAX_NUM_ENTRIES,
+ parameter HEAP_ENTRY_VALUE_WIDTH,
+ parameter HEAP_OP_DEQUE_PRIMARY_TYPE,
+ localparam HEAP_NUM_PRIORITIES = (HEAP_BITMAP_WIDTH ** 2),
+ localparam HEAP_ENTRY_AWIDTH = ($clog2(HEAP_MAX_NUM_ENTRIES)),
+ localparam HEAP_PRIORITY_AWIDTH = ($clog2(HEAP_NUM_PRIORITIES))
+) (
+ // General inputs
+ input logic clk,
+ input logic rst,
+
+ // Enque
+ input logic in_enque_en,
+ output logic in_enque_ready,
+ input logic [HEAP_ENTRY_VALUE_WIDTH-1:0] in_enque_value,
+ input logic [HEAP_PRIORITY_AWIDTH-1:0] in_enque_priority,
+
+ // Deque-PRIMARY
+ input logic out_deque_primary_en,
+ output logic out_deque_primary_ready,
+ output logic [HEAP_ENTRY_VALUE_WIDTH-1:0] out_deque_primary_value,
+ output logic [HEAP_PRIORITY_AWIDTH-1:0] out_deque_primary_priority,
+
+ // Deque-SECONDARY (request)
+ input logic in_deque_secondary_req_en,
+ output logic in_deque_secondary_req_ready,
+ // Deque-SECONDARY (response)
+ input logic out_deque_secondary_en,
+ output logic out_deque_secondary_ready,
+ output logic [HEAP_ENTRY_VALUE_WIDTH-1:0] out_deque_secondary_value,
+ output logic [HEAP_PRIORITY_AWIDTH-1:0] out_deque_secondary_priority,
+
+ // Feedback
+ output logic [HEAP_ENTRY_AWIDTH:0] heap_size,
+ output logic heap_ready
+);
+
+/**
+ * Local/derived parameters.
+ */
+localparam DEQUE_FIFO_DEPTH = 4;
+localparam ENQUE_FIFO_DEPTH = 64;
+localparam DEQUE_FIFO_LOG_DEPTH = $clog2(DEQUE_FIFO_DEPTH);
+localparam ENQUE_FIFO_LOG_DEPTH = $clog2(ENQUE_FIFO_DEPTH);
+localparam FIFO_WIDTH = (HEAP_ENTRY_VALUE_WIDTH + HEAP_PRIORITY_AWIDTH);
+
+localparam IS_PRIMARY_TYPE_MIN = (
+ HEAP_OP_DEQUE_PRIMARY_TYPE == HEAP_OP_DEQUE_MIN);
+
+localparam HEAP_OP_DEQUE_SECONDARY_TYPE = (
+ IS_PRIMARY_TYPE_MIN ? HEAP_OP_DEQUE_MAX :
+ HEAP_OP_DEQUE_MIN);
+
+localparam IS_SECONDARY_TYPE_MIN = (
+ HEAP_OP_DEQUE_SECONDARY_TYPE == HEAP_OP_DEQUE_MIN);
+
+/**
+ * Local typedefs.
+ */
+typedef logic [HEAP_PRIORITY_AWIDTH-1:0] heap_priority_t;
+typedef logic [HEAP_ENTRY_VALUE_WIDTH-1:0] heap_entry_value_t;
+
+typedef enum logic [1:0] {
+ DEQUE_SECONDARY_REQ_STATE_IDLE,
+ DEQUE_SECONDARY_REQ_STATE_INIT,
+ DEQUE_SECONDARY_REQ_STATE_WAIT
+} deque_secondary_req_state_t;
+
+/**
+ * Housekeeping.
+ */
+// Enque in-FIFO
+logic enque_fifo_full;
+logic enque_fifo_empty;
+logic enque_fifo_rdreq;
+logic enque_fifo_wrreq;
+heap_entry_value_t enque_fifo_q_value;
+heap_priority_t enque_fifo_q_priority;
+
+// Deque-PRIMARY out-FIFO
+logic deque_primary_fifo_empty;
+logic deque_primary_fifo_rdreq;
+logic deque_primary_fifo_wrreq;
+heap_entry_value_t deque_primary_fifo_data_value;
+heap_priority_t deque_primary_fifo_data_priority;
+
+// Deque-SECONDARY out-FIFO
+logic deque_secondary_fifo_full;
+logic deque_secondary_fifo_empty;
+logic deque_secondary_fifo_rdreq;
+logic deque_secondary_fifo_wrreq;
+heap_entry_value_t deque_secondary_fifo_data_value;
+heap_priority_t deque_secondary_fifo_data_priority;
+
+// Pipelined heap
+logic heap_in_en;
+logic heap_out_valid;
+heap_op_t heap_in_op_type;
+heap_op_t heap_out_op_type;
+heap_entry_value_t heap_in_value;
+heap_priority_t heap_in_priority;
+heap_entry_value_t heap_out_value;
+heap_priority_t heap_out_priority;
+logic [2:0] heap_num_ops_enque;
+logic [2:0] heap_num_ops_deque_min;
+logic [2:0] heap_num_ops_deque_max;
+
+// Miscellaneous
+logic [2:0] heap_num_ops_deque_primary;
+logic [2:0] heap_num_ops_deque_secondary;
+
+// Deque-SECONDARY FSM state
+deque_secondary_req_state_t deque_secondary_req_state;
+deque_secondary_req_state_t deque_secondary_req_state_next;
+
+// Miscellaneous
+logic [HEAP_ENTRY_AWIDTH:0] heap_size_plus_enques;
+
+// Enque in-FIFO logic
+assign in_enque_ready = !enque_fifo_full;
+assign enque_fifo_wrreq = in_enque_ready & in_enque_en;
+
+// Deque-PRIMARY out-FIFO logic
+assign deque_primary_fifo_data_value = heap_out_value;
+assign out_deque_primary_ready = !deque_primary_fifo_empty;
+assign deque_primary_fifo_data_priority = heap_out_priority;
+assign deque_primary_fifo_rdreq = (out_deque_primary_ready &
+ out_deque_primary_en);
+
+assign deque_primary_fifo_wrreq = (heap_out_valid & (heap_out_op_type ==
+ HEAP_OP_DEQUE_PRIMARY_TYPE));
+// Deque-SECONDARY out-FIFO logic
+assign deque_secondary_fifo_data_value = heap_out_value;
+assign deque_secondary_fifo_data_priority = heap_out_priority;
+assign out_deque_secondary_ready = !deque_secondary_fifo_empty;
+assign deque_secondary_fifo_rdreq = (out_deque_secondary_en &
+ out_deque_secondary_ready);
+
+assign deque_secondary_fifo_wrreq = (heap_out_valid & (heap_out_op_type ==
+ HEAP_OP_DEQUE_SECONDARY_TYPE));
+// Miscellaneous
+assign heap_num_ops_deque_primary = (
+ IS_PRIMARY_TYPE_MIN ? heap_num_ops_deque_min :
+ heap_num_ops_deque_max);
+
+assign heap_num_ops_deque_secondary = (
+ IS_SECONDARY_TYPE_MIN ? heap_num_ops_deque_min :
+ heap_num_ops_deque_max);
+
+initial begin
+ deque_secondary_req_state = DEQUE_SECONDARY_REQ_STATE_IDLE;
+end
+
+always @(*) begin
+ heap_in_en = 0;
+ heap_in_value = 0;
+ heap_in_priority = 0;
+ enque_fifo_rdreq = 0;
+ heap_in_op_type = HEAP_OP_ENQUE;
+ in_deque_secondary_req_ready = 0;
+ heap_size_plus_enques = (heap_size + heap_num_ops_enque);
+ deque_secondary_req_state_next = deque_secondary_req_state;
+
+ // Deque-SECONDARY FSM
+ case (deque_secondary_req_state)
+ // Idle state: Raise request-ready if the deque-SECONDARY
+ // output FIFO is empty. If request-enable is high, go to
+ // the init state.
+ DEQUE_SECONDARY_REQ_STATE_IDLE: begin
+ in_deque_secondary_req_ready = ((heap_size != 0) &&
+ !out_deque_secondary_ready);
+
+ if (in_deque_secondary_req_ready & in_deque_secondary_req_en) begin
+ deque_secondary_req_state_next = DEQUE_SECONDARY_REQ_STATE_INIT;
+ end
+ end
+ // Init state: Insert a deque-SECONDARY operation into the
+ // heap pipeline. Once the op is successful (it appears on
+ // the pipeline output register), proceed to wait.
+ DEQUE_SECONDARY_REQ_STATE_INIT: begin
+ // Wait for the op to appear on the output
+ if (deque_secondary_fifo_wrreq) begin
+ deque_secondary_req_state_next = (
+ DEQUE_SECONDARY_REQ_STATE_WAIT);
+ end
+ // If the number of deque-SECONDARY ops in the pipeline
+ // is 0 (i.e. either because we just entered this state,
+ // or a prior op has been discarded), schedule a new op.
+ else if ((heap_num_ops_deque_secondary == 0) &&
+ (heap_size != 0) && heap_ready) begin
+ heap_in_en = 1;
+ heap_in_op_type = HEAP_OP_DEQUE_SECONDARY_TYPE;
+ end
+ end
+ // Wait state: The deque-SECONDARY op completed successfully.
+ // Wait for the dequed entry to appear in the out FIFO, then
+ // proceed to the idle state.
+ DEQUE_SECONDARY_REQ_STATE_WAIT: begin
+ if (out_deque_secondary_ready) begin
+ deque_secondary_req_state_next = (
+ DEQUE_SECONDARY_REQ_STATE_IDLE);
+ end
+ end
+ endcase
+
+ // Performs arbitration for the pipelined heap
+ // interface. In decreasing order of priority:
+ //
+ // 1. Deque-SECONDARY operations done manually
+ // (i.e. using the request interface above).
+ //
+ // 2. Deque-PRIMARY operations are issued if the
+ // underlying heap has at least one entry,
+ // the output FIFO is empty, and no deque-
+ // PRIMARY operations are outstanding.
+ //
+ // 3. Enque operations are issued if the input
+ // FIFO has at least one entry and the size
+ // of the underlying heap (including outst-
+ // anding enque operations) is LT the hard
+ // heap limit.
+ if (heap_ready && !heap_in_en) begin
+ if (heap_size != 0 && deque_primary_fifo_empty &&
+ (heap_num_ops_deque_primary == 0) &&
+ !deque_primary_fifo_wrreq) begin
+ heap_in_en = 1;
+ heap_in_op_type = HEAP_OP_DEQUE_PRIMARY_TYPE;
+ end
+ else if (!enque_fifo_empty &&
+ (heap_size_plus_enques <
+ HEAP_MAX_NUM_ENTRIES)) begin
+ heap_in_en = 1;
+ enque_fifo_rdreq = 1;
+ heap_in_op_type = HEAP_OP_ENQUE;
+ heap_in_value = enque_fifo_q_value;
+ heap_in_priority = enque_fifo_q_priority;
+ end
+ end
+end
+
+always @(posedge clk) begin
+ deque_secondary_req_state <= (
+ deque_secondary_req_state_next);
+
+ `ifdef DEBUG
+ if (enque_fifo_wrreq) begin
+ $display("[PHW] Inserting value %0d into in-FIFO",
+ in_enque_value);
+ end
+ `endif
+end
+
+
+// Enque in-FIFO
+sc_fifo #(
+ .IS_SHOWAHEAD(1),
+ .IS_OUTDATA_REG(0),
+ .DWIDTH(FIFO_WIDTH),
+ .DEPTH(ENQUE_FIFO_DEPTH)
+)
+enque_fifo (
+ .clock(clk),
+ .data({in_enque_value, in_enque_priority}),
+ .rdreq(enque_fifo_rdreq),
+ .wrreq(enque_fifo_wrreq),
+ .empty(enque_fifo_empty),
+ .full(enque_fifo_full),
+ .q({enque_fifo_q_value, enque_fifo_q_priority}),
+ .usedw() // Unused
+);
+
+// Deque-PRIMARY out-FIFO
+sc_fifo #(
+ .IS_SHOWAHEAD(1),
+ .IS_OUTDATA_REG(0),
+ .DWIDTH(FIFO_WIDTH),
+ .DEPTH(DEQUE_FIFO_DEPTH)
+)
+deque_primary_fifo (
+ .clock(clk),
+ .data({deque_primary_fifo_data_value, deque_primary_fifo_data_priority}),
+ .rdreq(deque_primary_fifo_rdreq),
+ .wrreq(deque_primary_fifo_wrreq),
+ .empty(deque_primary_fifo_empty),
+ .full(), // Unused
+ .q({out_deque_primary_value, out_deque_primary_priority}),
+ .usedw() // Unused
+);
+
+// Deque-SECONDARY out-FIFO
+sc_fifo #(
+ .IS_SHOWAHEAD(1),
+ .IS_OUTDATA_REG(0),
+ .DWIDTH(FIFO_WIDTH),
+ .DEPTH(DEQUE_FIFO_DEPTH)
+)
+deque_secondary_fifo (
+ .clock(clk),
+ .data({deque_secondary_fifo_data_value, deque_secondary_fifo_data_priority}),
+ .rdreq(deque_secondary_fifo_rdreq),
+ .wrreq(deque_secondary_fifo_wrreq),
+ .empty(deque_secondary_fifo_empty),
+ .full(deque_secondary_fifo_full),
+ .q({out_deque_secondary_value, out_deque_secondary_priority}),
+ .usedw() // Unused
+);
+
+// Pipelined heap
+pipelined_heap #(
+ .HEAP_BITMAP_WIDTH(HEAP_BITMAP_WIDTH),
+ .HEAP_MAX_NUM_ENTRIES(HEAP_MAX_NUM_ENTRIES),
+ .HEAP_ENTRY_VALUE_WIDTH(HEAP_ENTRY_VALUE_WIDTH)
+)
+heap (
+ .clk(clk),
+ .rst(rst),
+ .ready(heap_ready),
+ .in_en(heap_in_en),
+ .in_op_type(heap_in_op_type),
+ .in_he_value(heap_in_value),
+ .in_he_priority(heap_in_priority),
+ .out_valid(heap_out_valid),
+ .out_op_type(heap_out_op_type),
+ .out_he_value(heap_out_value),
+ .out_he_priority(heap_out_priority),
+ .size(heap_size),
+ .num_ops_enque(heap_num_ops_enque),
+ .num_ops_deque_min(heap_num_ops_deque_min),
+ .num_ops_deque_max(heap_num_ops_deque_max)
+);
+
+endmodule
diff --git a/pigasus/hardware/rtl_sim/src/services/data_mover_service.sv b/pigasus/hardware/rtl_sim/src/services/data_mover_service.sv
index 9fbd040..8049177 100644
--- a/pigasus/hardware/rtl_sim/src/services/data_mover_service.sv
+++ b/pigasus/hardware/rtl_sim/src/services/data_mover_service.sv
@@ -12,6 +12,7 @@ module data_mover_service (
output logic [31:0] stats_in_check_meta,
output logic [31:0] stats_in_ooo_meta,
output logic [31:0] stats_in_forward_ooo_meta,
+ output logic [63:0] stats_out_bytes,
output logic [31:0] stats_nopayload_pkt,
output logic [31:0] stats_check_pkt,
@@ -120,8 +121,11 @@ module data_mover_service (
);
//stats
- metadata_t tmp_meta;
- assign tmp_meta = port_meta[0].data;
+ metadata_t port_in_meta;
+ assign port_in_meta = port_meta[0].data;
+
+ metadata_t port_out_meta;
+ assign port_out_meta = port_meta[1].data;
always @ (posedge Clk) begin
if (~Rst_n) begin
@@ -130,10 +134,11 @@ module data_mover_service (
stats_in_check_meta <= 0;
stats_in_ooo_meta <= 0;
stats_in_forward_ooo_meta <= 0;
+ stats_out_bytes <= 0;
end else begin
if (port_meta[0].ready & port_meta[0].valid) begin
- case (tmp_meta.pkt_flags)
+ case (port_in_meta.pkt_flags)
PKT_FORWARD: stats_in_forward_meta <= stats_in_forward_meta + 1;
PKT_DROP: stats_in_drop_meta <= stats_in_drop_meta + 1;
PKT_CHECK: stats_in_check_meta <= stats_in_check_meta + 1;
@@ -141,6 +146,12 @@ module data_mover_service (
PKT_FORWARD_OOO: stats_in_forward_ooo_meta <= stats_in_forward_ooo_meta + 1;
endcase
end
+
+ if (port_meta[1].ready & port_meta[1].valid) begin
+ stats_out_bytes <= (stats_out_bytes +
+ port_out_meta.len +
+ port_out_meta.hdr_len);
+ end
end
end
diff --git a/pigasus/hardware/rtl_sim/src/services/flow_table_client.sv b/pigasus/hardware/rtl_sim/src/services/flow_table_client.sv
index a02298c..d09603e 100644
--- a/pigasus/hardware/rtl_sim/src/services/flow_table_client.sv
+++ b/pigasus/hardware/rtl_sim/src/services/flow_table_client.sv
@@ -13,22 +13,25 @@ module flow_table_client (
server.svr in_meta,
server.clt out_meta,
server.clt forward_meta,
- server.clt reorder_meta
+ server.clt reorder_meta,
+ server.clt scheduler_meta
);
- channel_if#(.WIDTH($bits(metadata_t))) port[4]();
- channel_if#(.WIDTH($bits(metadata_t))) blank[4]();
+ channel_if#(.WIDTH($bits(metadata_t))) port[5]();
+ channel_if#(.WIDTH($bits(metadata_t))) blank[5]();
//Stats
logic [31:0] out_meta_cnt;
logic [31:0] forward_meta_cnt;
logic [31:0] reorder_meta_cnt;
+ logic [31:0] scheduler_meta_cnt;
always @ (posedge Clk) begin
if (~Rst_n) begin
stats_out_meta <= 0;
end else begin
- stats_out_meta <= out_meta_cnt + forward_meta_cnt + reorder_meta_cnt;
+ stats_out_meta <= (out_meta_cnt + forward_meta_cnt +
+ reorder_meta_cnt + scheduler_meta_cnt);
end
end
@@ -57,7 +60,11 @@ module flow_table_client (
.reorder_meta_data (port[3].data),
.reorder_meta_valid (port[3].valid),
.reorder_meta_ready (port[3].ready),
- .reorder_meta_almost_full (port[3].almost_full)
+ .reorder_meta_almost_full (port[3].almost_full),
+ .scheduler_meta_data (port[4].data),
+ .scheduler_meta_valid (port[4].valid),
+ .scheduler_meta_ready (port[4].ready),
+ .scheduler_meta_almost_full(port[4].almost_full)
);
server_epig_shim#(.DATA_BITS($bits(metadata_t))) out_ss
@@ -87,6 +94,15 @@ module flow_table_client (
.rx(port[3])
);
+ server_epig_shim#(.DATA_BITS($bits(metadata_t))) scheduler_ss
+ (
+ .clk(Clk),
+ .SoftReset(~Rst_n),
+ .fl2clt(scheduler_meta),
+ .tx(blank[4]),
+ .rx(port[4])
+ );
+
//stats
stats_cnt in_meta_inst(
.Clk (Clk),
@@ -116,5 +132,12 @@ module flow_table_client (
.ready (port[3].ready),
.stats_flit (reorder_meta_cnt)
);
+ stats_cnt scheduler_meta_inst(
+ .Clk (Clk),
+ .Rst_n (Rst_n),
+ .valid (port[4].valid),
+ .ready (port[4].ready),
+ .stats_flit (scheduler_meta_cnt)
+ );
endmodule
diff --git a/pigasus/hardware/rtl_sim/src/services/parser_client.sv b/pigasus/hardware/rtl_sim/src/services/parser_client.sv
index 3e3cb9b..3e7ede2 100644
--- a/pigasus/hardware/rtl_sim/src/services/parser_client.sv
+++ b/pigasus/hardware/rtl_sim/src/services/parser_client.sv
@@ -6,6 +6,7 @@ module parser_client (
input logic Clk,
input logic Rst_n,
output logic [31:0] stats_out_meta,
+ output logic [63:0] stats_out_bytes,
server.svr in_meta,
server.svr in_pkt,
@@ -75,4 +76,19 @@ module parser_client (
.stats_flit (stats_out_meta)
);
+ metadata_t port_out_meta;
+ assign port_out_meta = port_meta[1].data;
+
+ always @ (posedge Clk) begin
+ if (~Rst_n) begin
+ stats_out_bytes <= 0;
+ end else begin
+ if (port_meta[1].ready & port_meta[1].valid) begin
+ stats_out_bytes <= (stats_out_bytes +
+ port_out_meta.len +
+ port_out_meta.hdr_len);
+ end
+ end
+ end
+
endmodule
diff --git a/pigasus/hardware/rtl_sim/src/services/reassembler_service.sv b/pigasus/hardware/rtl_sim/src/services/reassembler_service.sv
index c15081a..7d3c770 100644
--- a/pigasus/hardware/rtl_sim/src/services/reassembler_service.sv
+++ b/pigasus/hardware/rtl_sim/src/services/reassembler_service.sv
@@ -19,6 +19,7 @@ module reassembler_service (
output logic [31:0] parser_meta_csr_readdata,
output logic [31:0] stats_incomp_out_meta,
output logic [31:0] stats_parser_out_meta,
+ output logic [63:0] stats_parser_out_bytes,
output logic [31:0] stats_ft_in_meta,
output logic [31:0] stats_ft_out_meta,
output logic [31:0] stats_emptylist_in,
@@ -30,6 +31,7 @@ module reassembler_service (
output logic [31:0] stats_dm_in_check_meta,
output logic [31:0] stats_dm_in_ooo_meta,
output logic [31:0] stats_dm_in_forward_ooo_meta,
+ output logic [63:0] stats_dm_out_bytes,
output logic [31:0] stats_nopayload_pkt,
output logic [31:0] stats_dm_check_pkt,
@@ -48,6 +50,8 @@ module reassembler_service (
server#(.DATA_BITS($bits(metadata_t))) parser_meta_fifo();
server#(.DATA_BITS($bits(metadata_t))) ftw_out_meta();
server#(.DATA_BITS($bits(metadata_t))) ftw_reorder_meta();
+ server#(.DATA_BITS($bits(metadata_t))) ftw_scheduler_meta();
+ server#(.DATA_BITS($bits(metadata_t))) ftw_reassembly_meta();
server#(.DATA_BITS($bits(metadata_t))) ftw_nonforward_meta();
server#(.DATA_BITS($bits(metadata_t))) ftw_forward_meta();
server#(.DATA_BITS($bits(metadata_t))) dm_meta_in();
@@ -77,6 +81,7 @@ parser_client my_parser (
.Clk(Clk),
.Rst_n(Rst_n),
.stats_out_meta(stats_parser_out_meta),
+ .stats_out_bytes(stats_parser_out_bytes),
.in_meta(incomp_meta),
.in_pkt(incomp_pkt),
@@ -110,7 +115,8 @@ flow_table_client ftw_inst (
.in_meta (parser_meta_fifo),
.out_meta (ftw_out_meta),
.forward_meta (ftw_forward_meta),
- .reorder_meta (ftw_reorder_meta)
+ .reorder_meta (ftw_reorder_meta),
+ .scheduler_meta (ftw_scheduler_meta)
);
arb_2_af_service #(
@@ -118,11 +124,24 @@ arb_2_af_service #(
.DEPTH(512),
.FULL_LEVEL(480)
)
-arb_inorder_ooo(
+arb_inorder_reassembly(
.Clk (Clk),
.Rst_n (Rst_n),
- .in0 (ftw_out_meta),
+ .in0 (ftw_scheduler_meta),
.in1 (ftw_reorder_meta),
+ .out (ftw_reassembly_meta)
+);
+
+arb_2_af_service #(
+ .DWIDTH(META_WIDTH),
+ .DEPTH(512),
+ .FULL_LEVEL(480)
+)
+arb_inorder_ooo(
+ .Clk (Clk),
+ .Rst_n (Rst_n),
+ .in0 (ftw_reassembly_meta),
+ .in1 (ftw_out_meta),
.out (ftw_nonforward_meta)
);
@@ -148,6 +167,7 @@ data_mover_service dm_inst (
.stats_in_check_meta (stats_dm_in_check_meta),
.stats_in_ooo_meta (stats_dm_in_ooo_meta),
.stats_in_forward_ooo_meta (stats_dm_in_forward_ooo_meta),
+ .stats_out_bytes (stats_dm_out_bytes),
.stats_nopayload_pkt (stats_nopayload_pkt),
.stats_check_pkt (stats_dm_check_pkt),
diff --git a/pigasus/hardware/rtl_sim/src/stats_reg.sv b/pigasus/hardware/rtl_sim/src/stats_reg.sv
index d2c7b3b..ba76910 100644
--- a/pigasus/hardware/rtl_sim/src/stats_reg.sv
+++ b/pigasus/hardware/rtl_sim/src/stats_reg.sv
@@ -1,7 +1,7 @@
`ifndef STATS_REG
`define STATS_REG
-parameter NUM_REG = 54;
+parameter NUM_REG = 58;
parameter REG_IN_PKT = 0;
parameter REG_OUT_PKT = 1;
@@ -57,4 +57,8 @@ parameter REG_MAX_BYPASS2NF = 50;
parameter REG_MAX_NF2PDU = 51;
parameter REG_SM_BYPASS_AF = 52;
parameter REG_SM_CDC_AF = 53;
+parameter REG_PARSER_OUT_BYTES_L = 54;
+parameter REG_PARSER_OUT_BYTES_H = 55;
+parameter REG_DM_OUT_BYTES_L = 56;
+parameter REG_DM_OUT_BYTES_H = 57;
`endif
diff --git a/pigasus/hardware/rtl_sim/src/struct_s.sv b/pigasus/hardware/rtl_sim/src/struct_s.sv
index a33d743..2f8d8ab 100644
--- a/pigasus/hardware/rtl_sim/src/struct_s.sv
+++ b/pigasus/hardware/rtl_sim/src/struct_s.sv
@@ -6,18 +6,30 @@
`ifndef STRUCT_S
`define STRUCT_S
-//`define SIM
+// `define SIM
`define USE_BRAM
`define BRAM_CHECKPKT_BUF
`define NO_C2F
-//`define DISABLE_NF_BYPASS
+// `define DISABLE_NF_BYPASS
// `define NO_BP
+// `define ENABLE_SURGEPROTECTOR
+
+// Reassembler scheduling policy
+`ifdef ENABLE_SURGEPROTECTOR
+parameter SCHEDULER_REASSEMBLY_POLICY = "WSJF";
+`else
+parameter SCHEDULER_REASSEMBLY_POLICY = "FCFS";
+`endif
// Packet buffer
// STORE 1024 pkts, each pkt takes 32 * 512 bits = 2 KB.
// 32 * 1024 = 32768 entries.
`ifdef USE_BRAM
+`ifdef PKT_NUM
+parameter PKT_NUM = `PKT_NUM;
+`else
parameter PKT_NUM = 1024;
+`endif
`else
parameter PKT_NUM = 2688;
`endif
@@ -93,7 +105,6 @@ typedef struct packed {
logic eop;
logic [5:0] empty;
} flit_meta_t;
-parameter LL_DWIDTH = ((((((1 + 32) + 16) + 16) + PKT_AWIDTH) + 1) + 56);
typedef struct packed {
logic valid; // Valid
@@ -105,7 +116,7 @@ typedef struct packed {
logic last; // Last
logic [55:0] last_7_bytes; // Last
} entry_t;
-parameter TUPLE_DWIDTH = (((32 + 32) + 16) + 16);
+parameter LL_DWIDTH = (((((((1 + 32) + 16) + LL_AWIDTH) + PKT_AWIDTH) + 5) + 1) + 56);
typedef struct packed {
logic [31:0] sIP;
@@ -113,7 +124,86 @@ typedef struct packed {
logic [15:0] sPort;
logic [15:0] dPort;
} tuple_t;
+parameter TUPLE_DWIDTH = (((32 + 32) + 16) + 16);
+
+/**
+ * Reassembler service.
+ */
+// OOO flow IDs
+parameter MAX_NUM_OOO_FLOWS = 1024;
+parameter OOO_FLOW_ID_AWIDTH = $clog2(MAX_NUM_OOO_FLOWS);
+// Service Queue
+parameter HEAP_BITMAP_WIDTH = 32;
+parameter HEAP_MAX_NUM_ENTRIES = MAX_NUM_OOO_FLOWS;
+parameter HEAP_NUM_PRIORITIES = (HEAP_BITMAP_WIDTH ** 2);
+parameter HEAP_PRIORITY_AWIDTH = $clog2(HEAP_NUM_PRIORITIES);
+parameter HEAP_LOG_MAX_NUM_ENTRIES = $clog2(HEAP_MAX_NUM_ENTRIES);
+// Scheduler
+parameter OOO_FLOW_LL_MAX_NUM_ENTRIES = (PKT_NUM / 2);
+parameter OOO_FLOW_LL_ENTRY_AWIDTH = $clog2(OOO_FLOW_LL_MAX_NUM_ENTRIES);
+parameter OOO_FLOW_LL_ENTRY_PTR_T_WIDTH = (OOO_FLOW_LL_ENTRY_AWIDTH + 1);
+
+typedef logic [OOO_FLOW_ID_AWIDTH-1:0] ooo_flow_id_t;
+typedef logic [HEAP_LOG_MAX_NUM_ENTRIES:0] heap_size_t;
+typedef logic [HEAP_PRIORITY_AWIDTH-1:0] heap_priority_t;
+typedef logic [OOO_FLOW_LL_ENTRY_PTR_T_WIDTH-1:0] ooo_flow_ll_entry_ptr_t;
+
+typedef struct packed {
+ tuple_t tuple;
+ ooo_flow_id_t ooo_flow_id;
+} scheduler_token_t;
+parameter SCHEDULER_TOKEN_T_WIDTH = (TUPLE_DWIDTH + OOO_FLOW_ID_AWIDTH);
+typedef struct packed {
+ ooo_flow_ll_entry_ptr_t head;
+ ooo_flow_ll_entry_ptr_t tail;
+} ooo_flow_list_t;
+localparam OOO_FLOW_LIST_T_WIDTH = (2 * OOO_FLOW_LL_ENTRY_PTR_T_WIDTH);
+
+typedef struct packed {
+ logic valid;
+ tuple_t tuple;
+ logic [31:0] seq;
+ logic ll_valid;
+ logic [LL_AWIDTH-1:0] pointer;
+ logic [LL_AWIDTH-1:0] ll_size;
+ logic [55:0] last_7_bytes;
+ logic [FT_AWIDTH-1:0] addr0;
+ logic [FT_AWIDTH-1:0] addr1;
+ logic [FT_AWIDTH-1:0] addr2;
+ logic [FT_AWIDTH-1:0] addr3;
+ ooo_flow_list_t ooo_flow_ll;
+} ooo_flow_fc_entry_t;
+localparam OOO_FLOW_FC_ENTRY_T_WIDTH = ((((((((1 + TUPLE_DWIDTH) + 32) + 1) + LL_AWIDTH) + LL_AWIDTH) + 56) + (4 * FT_AWIDTH)) + OOO_FLOW_LIST_T_WIDTH);
+
+typedef struct packed {
+ tuple_t tuple;
+ logic is_delete;
+ logic [31:0] seq;
+ logic [FT_AWIDTH-1:0] addr0;
+ logic [FT_AWIDTH-1:0] addr1;
+ logic [FT_AWIDTH-1:0] addr2;
+ logic [FT_AWIDTH-1:0] addr3;
+ logic [PKT_AWIDTH-1:0] rel_pkt_cnt;
+} ft_update_t;
+localparam FT_UPDATE_T_WIDTH = ((((TUPLE_DWIDTH + 1) + 32) + (4 * FT_AWIDTH)) + PKT_AWIDTH);
+
+typedef struct packed {
+ ooo_flow_id_t ooo_flow_id;
+ tuple_t tuple;
+ logic [FT_AWIDTH-1:0] addr0;
+ logic [FT_AWIDTH-1:0] addr1;
+ logic [FT_AWIDTH-1:0] addr2;
+ logic [FT_AWIDTH-1:0] addr3;
+} reassembly_gc_meta_t;
+localparam REASSEMBLY_GC_META_T_WIDTH = ((OOO_FLOW_ID_AWIDTH + TUPLE_DWIDTH) + (4 * FT_AWIDTH));
+
+typedef struct packed {
+ logic ll_valid;
+ logic [LL_AWIDTH-1:0] pointer;
+ reassembly_gc_meta_t meta;
+} reassembly_gc_req_t;
+localparam REASSEMBLY_GC_REQ_T_WIDTH = ((1 + LL_AWIDTH) + REASSEMBLY_GC_META_T_WIDTH);
typedef struct packed {
logic [31:0] c2f_kmem_high_1; // higher 32 bit of kernel memory, FPGA read only
@@ -136,21 +226,21 @@ typedef struct packed {
logic [31:0] f2c_head; // head pointer, FPGA read only
logic [31:0] f2c_tail; // tail pointer, CPU read only
} pcie_block_t;
-parameter FT_DWIDTH = (((((((1 + TUPLE_DWIDTH) + 32) + LL_AWIDTH) + 1) + PKT_AWIDTH) + 56) + (4 * FT_AWIDTH));
typedef struct packed {
logic valid;
tuple_t tuple;
logic [31:0] seq;
- logic [LL_AWIDTH-1:0] pointer;
- logic ll_valid;
logic [PKT_AWIDTH-1:0] slow_cnt;
logic [55:0] last_7_bytes;
logic [FT_AWIDTH-1:0] addr0;
logic [FT_AWIDTH-1:0] addr1;
logic [FT_AWIDTH-1:0] addr2;
logic [FT_AWIDTH-1:0] addr3;
+ logic ooo_flow_id_valid;
+ logic [OOO_FLOW_ID_AWIDTH-1:0] ooo_flow_id;
} fce_t; // Flow context entry
+parameter FT_DWIDTH = (((((((1 + TUPLE_DWIDTH) + 32) + PKT_AWIDTH) + 56) + (4 * FT_AWIDTH)) + 1) + OOO_FLOW_ID_AWIDTH);
typedef struct packed {
tuple_t tuple;
diff --git a/pigasus/hardware/rtl_sim/src/tb.sv b/pigasus/hardware/rtl_sim/src/tb.sv
index c719128..9df183d 100644
--- a/pigasus/hardware/rtl_sim/src/tb.sv
+++ b/pigasus/hardware/rtl_sim/src/tb.sv
@@ -457,6 +457,10 @@ always @(posedge clk_status) begin
REG_MAX_NF2PDU : $display("REG_MAX_NF2PDU :\t%d",top_readdata);
REG_SM_BYPASS_AF : $display("REG_SM_BYPASS_AF :\t%d",top_readdata);
REG_SM_CDC_AF : $display("REG_SM_CDC_AF :\t%d",top_readdata);
+ REG_PARSER_OUT_BYTES_L : $display("REG_PARSER_OUT_BYTES_L :\t%d",top_readdata);
+ REG_PARSER_OUT_BYTES_H : $display("REG_PARSER_OUT_BYTES_H :\t%d",top_readdata);
+ REG_DM_OUT_BYTES_L : $display("REG_DM_OUT_BYTES_L :\t%d",top_readdata);
+ REG_DM_OUT_BYTES_H : $display("REG_DM_OUT_BYTES_H :\t%d",top_readdata);
//REG_PARTITION1_OUT_PKT : $display("REG_PARTITION1_OUT_PKT :\t%d",top_readdata);
default : $display("unsupported reg");
endcase
diff --git a/pigasus/hardware/rtl_sim/src/top.sv b/pigasus/hardware/rtl_sim/src/top.sv
index eba0aa8..2fe7996 100644
--- a/pigasus/hardware/rtl_sim/src/top.sv
+++ b/pigasus/hardware/rtl_sim/src/top.sv
@@ -69,6 +69,7 @@ module top (
logic [31:0] parser_meta_csr_readdata_r;
logic [31:0] stats_incomp_out_meta_r;
logic [31:0] stats_parser_out_meta_r;
+ logic [63:0] stats_parser_out_bytes_r;
logic [31:0] stats_ft_in_meta_r;
logic [31:0] stats_ft_out_meta_r;
logic [31:0] stats_emptylist_in_r;
@@ -80,6 +81,7 @@ module top (
logic [31:0] stats_dm_in_check_meta_r;
logic [31:0] stats_dm_in_ooo_meta_r;
logic [31:0] stats_dm_in_forward_ooo_meta_r;
+ logic [63:0] stats_dm_out_bytes_r;
logic [31:0] stats_nopayload_pkt_r;
logic [31:0] stats_dm_check_pkt_r;
logic [31:0] in_pkt_fill_level_dm2sm;
@@ -139,6 +141,7 @@ module top (
logic [31:0] out_pkt_status;
logic [31:0] incomp_out_meta_status;
logic [31:0] parser_out_meta_status;
+logic [63:0] parser_out_bytes_status;
logic [31:0] ft_in_meta_status;
logic [31:0] ft_out_meta_status;
logic [31:0] emptylist_in_status;
@@ -150,6 +153,7 @@ logic [31:0] dm_in_drop_meta_status;
logic [31:0] dm_in_check_meta_status;
logic [31:0] dm_in_ooo_meta_status;
logic [31:0] dm_in_forward_ooo_meta_status;
+logic [63:0] dm_out_bytes_status;
logic [31:0] nopayload_pkt_status;
logic [31:0] dm_check_pkt_status;
logic [31:0] sm_pkt_status;
@@ -226,6 +230,7 @@ logic [31:0] in_pkt_r1;
logic [31:0] out_pkt_r1;
logic [31:0] incomp_out_meta_r1;
logic [31:0] parser_out_meta_r1;
+logic [63:0] parser_out_bytes_r1;
logic [31:0] ft_in_meta_r1;
logic [31:0] ft_out_meta_r1;
logic [31:0] emptylist_in_r1;
@@ -237,6 +242,7 @@ logic [31:0] dm_in_drop_meta_r1;
logic [31:0] dm_in_check_meta_r1;
logic [31:0] dm_in_ooo_meta_r1;
logic [31:0] dm_in_forward_ooo_meta_r1;
+logic [63:0] dm_out_bytes_r1;
logic [31:0] nopayload_pkt_r1;
logic [31:0] dm_check_pkt_r1;
logic [31:0] sm_pkt_r1;
@@ -289,6 +295,9 @@ assign incomp_out_meta = stats_incomp_out_meta_r;
logic [31:0] parser_out_meta;
assign parser_out_meta = stats_parser_out_meta_r;
+logic [63:0] parser_out_bytes;
+assign parser_out_bytes = stats_parser_out_bytes_r;
+
logic [31:0] ft_in_meta;
assign ft_in_meta = stats_ft_in_meta_r;
@@ -322,6 +331,9 @@ assign dm_in_ooo_meta = stats_dm_in_ooo_meta_r;
logic [31:0] dm_in_forward_ooo_meta;
assign dm_in_forward_ooo_meta = stats_dm_in_forward_meta_r;
+logic [63:0] dm_out_bytes;
+assign dm_out_bytes = stats_dm_out_bytes_r;
+
logic [31:0] nopayload_pkt;
assign nopayload_pkt = stats_nopayload_pkt_r;
@@ -510,6 +522,8 @@ always @(posedge clk_status) begin
incomp_out_meta_status <= incomp_out_meta_r1;
parser_out_meta_r1 <= parser_out_meta;
parser_out_meta_status <= parser_out_meta_r1;
+ parser_out_bytes_r1 <= parser_out_bytes;
+ parser_out_bytes_status <= parser_out_bytes_r1;
ft_in_meta_r1 <= ft_in_meta;
ft_in_meta_status <= ft_in_meta_r1;
ft_out_meta_r1 <= ft_out_meta;
@@ -532,6 +546,8 @@ always @(posedge clk_status) begin
dm_in_ooo_meta_status <= dm_in_ooo_meta_r1;
dm_in_forward_ooo_meta_r1 <= dm_in_forward_ooo_meta;
dm_in_forward_ooo_meta_status <= dm_in_forward_ooo_meta_r1;
+ dm_out_bytes_r1 <= dm_out_bytes;
+ dm_out_bytes_status <= dm_out_bytes_r1;
nopayload_pkt_r1 <= nopayload_pkt;
nopayload_pkt_status <= nopayload_pkt_r1;
dm_check_pkt_r1 <= dm_check_pkt;
@@ -677,6 +693,10 @@ always @(posedge clk_status) begin
REG_MAX_NF2PDU : status_readdata <= max_nf2pdu_status;
REG_SM_BYPASS_AF : status_readdata <= sm_bypass_af_status;
REG_SM_CDC_AF : status_readdata <= sm_cdc_af_status;
+ REG_PARSER_OUT_BYTES_L : status_readdata <= parser_out_bytes_status[31:0];
+ REG_PARSER_OUT_BYTES_H : status_readdata <= parser_out_bytes_status[63:32];
+ REG_DM_OUT_BYTES_L : status_readdata <= dm_out_bytes_status[31:0];
+ REG_DM_OUT_BYTES_H : status_readdata <= dm_out_bytes_status[63:32];
default : status_readdata <= 32'hDEADBEEF;
endcase
end
@@ -769,6 +789,7 @@ assign pdumeta_cnt = pdumeta_cpu_csr_readdata[9:0];
.parser_meta_csr_readdata(parser_meta_csr_readdata_r),
.stats_incomp_out_meta(stats_incomp_out_meta_r),
.stats_parser_out_meta(stats_parser_out_meta_r),
+ .stats_parser_out_bytes(stats_parser_out_bytes_r),
.stats_ft_in_meta(stats_ft_in_meta_r),
.stats_ft_out_meta(stats_ft_out_meta_r),
.stats_emptylist_in(stats_emptylist_in_r),
@@ -780,6 +801,7 @@ assign pdumeta_cnt = pdumeta_cpu_csr_readdata[9:0];
.stats_dm_in_check_meta(stats_dm_in_check_meta_r),
.stats_dm_in_ooo_meta(stats_dm_in_ooo_meta_r),
.stats_dm_in_forward_ooo_meta(stats_dm_in_forward_ooo_meta_r),
+ .stats_dm_out_bytes(stats_dm_out_bytes_r),
.stats_nopayload_pkt(stats_nopayload_pkt_r),
.stats_dm_check_pkt(stats_dm_check_pkt_r),
.eth(r_eth_direct),
diff --git a/pigasus/hardware/rtl_sim/tb/ffs/run_test.sh b/pigasus/hardware/rtl_sim/tb/ffs/run_test.sh
new file mode 100755
index 0000000..7c7d67c
--- /dev/null
+++ b/pigasus/hardware/rtl_sim/tb/ffs/run_test.sh
@@ -0,0 +1,48 @@
+#!/bin/bash
+altera_ver="$SIM_LIB_PATH/altera_ver"
+lpm_ver="$SIM_LIB_PATH/lpm_ver"
+sgate_ver="$SIM_LIB_PATH/sgate_ver"
+altera_mf_ver="$SIM_LIB_PATH/altera_mf_ver"
+altera_lnsim_ver="$SIM_LIB_PATH/altera_lnsim_ver"
+fourteennm_ver="$SIM_LIB_PATH/fourteennm_ver"
+fourteennm_ct1_ver="$SIM_LIB_PATH/fourteennm_ct1_ver"
+
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+NC='\033[0m' # No Color
+
+run_testcase () {
+ rm -rf work
+ rm -f vsim.wlf
+
+ vlib work > /dev/null 2>&1
+ vlog ../../src/reassembly/surge_protector/ffs.sv -sv > /dev/null 2>&1
+ vlog +define+TEST_CASE=\"$1\" tb_ffs.sv -sv > /dev/null 2>&1
+
+ OUTPUT=$(vsim -L $altera_mf_ver -L $altera_lnsim_ver -L $altera_ver \
+ -L $lpm_ver -L $sgate_ver -L $fourteennm_ver -L $fourteennm_ct1_ver \
+ -c -do "run -all" tb_ffs | grep -e "PASS" -e "FAIL")
+
+ printf "Running $1... "
+ if grep -q "FAIL" <<< ${OUTPUT}
+ then
+ printf "${RED}${OUTPUT}${NC}\n"
+ elif grep -q "PASS $1" <<< ${OUTPUT}
+ then
+ printf "${GREEN}PASS${NC}\n"
+ else
+ printf "${RED}Test not run${NC}\n"
+ fi
+}
+
+declare -a testcases=(
+ 'TEST_ZERO'
+ 'TEST_LSB_SET'
+ 'TEST_MSB_SET'
+ 'TEST_ALL_SET'
+ 'TEST_RANDOM_BITMAP'
+)
+
+for c in ${testcases[@]}; do
+ run_testcase $c
+done
diff --git a/pigasus/hardware/rtl_sim/tb/ffs/tb_ffs.sv b/pigasus/hardware/rtl_sim/tb/ffs/tb_ffs.sv
new file mode 100644
index 0000000..627ab7e
--- /dev/null
+++ b/pigasus/hardware/rtl_sim/tb/ffs/tb_ffs.sv
@@ -0,0 +1,175 @@
+`timescale 1 ns/10 ps
+
+module tb_ffs;
+
+// Simulation parameters
+localparam WIDTH = 16;
+localparam PERIOD = 10;
+localparam WIDTH_LOG = $clog2(WIDTH);
+
+`ifndef TEST_CASE
+ $error("FAIL: No test case specified");
+`endif
+
+/**
+ * List of tests:
+ * ---------------------
+ * TEST_ZERO
+ * TEST_LSB_SET
+ * TEST_MSB_SET
+ * TEST_ALL_SET
+ * TEST_RANDOM_BITMAP
+ */
+
+// Global state
+logic clk;
+logic rst;
+logic zero;
+logic [WIDTH-1:0] x;
+logic [WIDTH_LOG-1:0] msb;
+logic [WIDTH_LOG-1:0] lsb;
+logic [WIDTH-1:0] x_initval;
+
+initial x = 0;
+initial clk = 0;
+initial rst = 1;
+always #(PERIOD) clk = ~clk;
+
+if (`TEST_CASE == "TEST_ZERO") begin
+// Ensure zero is correctly asserted
+assign x_initval = 16'b0;
+
+always @(posedge clk) begin
+ if (!rst) begin
+ if (!zero) begin
+ $display("FAIL %s: Zero was not asserted", `TEST_CASE);
+ $finish;
+ end
+ else begin
+ $display("PASS %s", `TEST_CASE);
+ $finish;
+ end
+ end
+end
+end
+
+else if (`TEST_CASE == "TEST_LSB_SET") begin
+// Ensure output is correct when the LSB is set
+assign x_initval = 16'b0000000000000001;
+
+always @(posedge clk) begin
+ if (!rst) begin
+ if (zero) begin
+ $display("FAIL %s: Zero incorrectly asserted", `TEST_CASE);
+ $finish;
+ end
+ else if (lsb != 0 || msb != 0) begin
+ $display("FAIL %s: ", `TEST_CASE,
+ "Incorrect output. Expected (lsb=0, msb=0) ",
+ "for x=%b, got (lsb=%0d, msb=%0d)", x, lsb, msb);
+ $finish;
+ end
+ else begin
+ $display("PASS %s", `TEST_CASE);
+ $finish;
+ end
+ end
+end
+end
+
+else if (`TEST_CASE == "TEST_MSB_SET") begin
+// Ensure output is correct when the MSB is set
+assign x_initval = 16'b1000000000000000;
+
+always @(posedge clk) begin
+ if (!rst) begin
+ if (zero) begin
+ $display("FAIL %s: Zero incorrectly asserted", `TEST_CASE);
+ $finish;
+ end
+ else if (lsb != 15 || msb != 15) begin
+ $display("FAIL %s: ", `TEST_CASE,
+ "Incorrect output. Expected (lsb=15, msb=15) ",
+ "for x=%b, got (lsb=%0d, msb=%0d)", x, lsb, msb);
+ $finish;
+ end
+ else begin
+ $display("PASS %s", `TEST_CASE);
+ $finish;
+ end
+ end
+end
+end
+
+else if (`TEST_CASE == "TEST_ALL_SET") begin
+// Ensure output is correct when all bits are set
+assign x_initval = 16'b1111111111111111;
+
+always @(posedge clk) begin
+ if (!rst) begin
+ if (zero) begin
+ $display("FAIL %s: Zero incorrectly asserted", `TEST_CASE);
+ $finish;
+ end
+ else if (lsb != 0 || msb != 15) begin
+ $display("FAIL %s: ", `TEST_CASE,
+ "Incorrect output. Expected (lsb=0, msb=15) ",
+ "for x=%b, got (lsb=%0d, msb=%0d)", x, lsb, msb);
+ $finish;
+ end
+ else begin
+ $display("PASS %s", `TEST_CASE);
+ $finish;
+ end
+ end
+end
+end
+
+else if (`TEST_CASE == "TEST_RANDOM_BITMAP") begin
+// Ensure output is correct when random bits are set
+assign x_initval = 16'b0011100010101010;
+
+always @(posedge clk) begin
+ if (!rst) begin
+ if (zero) begin
+ $display("FAIL %s: Zero incorrectly asserted", `TEST_CASE);
+ $finish;
+ end
+ else if (lsb != 1 || msb != 13) begin
+ $display("FAIL %s: ", `TEST_CASE,
+ "Incorrect output. Expected (lsb=1, msb=13) ",
+ "for x=%b, got (lsb=%0d, msb=%0d)", x, lsb, msb);
+ $finish;
+ end
+ else begin
+ $display("PASS %s", `TEST_CASE);
+ $finish;
+ end
+ end
+end
+end
+
+else begin
+ $error("FAIL: Unknown test %s", `TEST_CASE);
+end
+
+// Value initialization logic
+always @(posedge clk) begin
+ if (rst) begin
+ rst <= 0;
+ x <= x_initval;
+ end
+end
+
+// FFS instance
+ffs #(
+ .WIDTH_LOG(WIDTH_LOG)
+)
+ffs_instance (
+ .x(x),
+ .lsb(lsb),
+ .msb(msb),
+ .zero(zero)
+);
+
+endmodule
diff --git a/pigasus/hardware/rtl_sim/tb/pipelined_heap/run_test.sh b/pigasus/hardware/rtl_sim/tb/pipelined_heap/run_test.sh
new file mode 100755
index 0000000..a0644eb
--- /dev/null
+++ b/pigasus/hardware/rtl_sim/tb/pipelined_heap/run_test.sh
@@ -0,0 +1,56 @@
+#!/bin/bash
+altera_ver="$SIM_LIB_PATH/altera_ver"
+lpm_ver="$SIM_LIB_PATH/lpm_ver"
+sgate_ver="$SIM_LIB_PATH/sgate_ver"
+altera_mf_ver="$SIM_LIB_PATH/altera_mf_ver"
+altera_lnsim_ver="$SIM_LIB_PATH/altera_lnsim_ver"
+fourteennm_ver="$SIM_LIB_PATH/fourteennm_ver"
+fourteennm_ct1_ver="$SIM_LIB_PATH/fourteennm_ct1_ver"
+
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+NC='\033[0m' # No Color
+
+run_testcase () {
+ rm -rf work
+ rm -f vsim.wlf
+
+ vlib work > /dev/null 2>&1
+ vlog +define+SIM ../../src/common/*.sv -sv > /dev/null 2>&1
+ vlog +define+SIM ../../src/common/*.v > /dev/null 2>&1
+ vlog +define+SIM ../../src/reassembly/surge_protector/ffs.sv -sv > /dev/null 2>&1
+ vlog +define+SIM ../../src/reassembly/surge_protector/heap_ops_pkg.sv -sv > /dev/null 2>&1
+ vlog +define+SIM ../../src/reassembly/surge_protector/pipelined_heap.sv -sv > /dev/null 2>&1
+ vlog +define+SIM +define+TEST_CASE=\"$1\" tb_pipelined_heap.sv -sv > /dev/null 2>&1
+
+ OUTPUT=$(vsim -L $altera_mf_ver -L $altera_lnsim_ver -L $altera_ver \
+ -L $lpm_ver -L $sgate_ver -L $fourteennm_ver -L $fourteennm_ct1_ver \
+ -c -do "run -all" tb_pipelined_heap | grep -e "PASS" -e "FAIL")
+
+ printf "Running $1... "
+ if grep -q "FAIL" <<< ${OUTPUT}
+ then
+ printf "${RED}${OUTPUT}${NC}\n"
+ elif grep -q "PASS $1" <<< ${OUTPUT}
+ then
+ printf "${GREEN}PASS${NC}\n"
+ else
+ printf "${RED}Test not run${NC}\n"
+ fi
+}
+
+declare -a testcases=(
+ 'TEST_BASIC_ENQUE'
+ 'TEST_BASIC_DEQUE_MIN'
+ 'TEST_BASIC_DEQUE_MAX'
+ 'TEST_HEAP_PROPERTY'
+ 'TEST_CAPACITY_LIMITS'
+ 'TEST_PIPELINING_ENQUE'
+ 'TEST_PIPELINING_MIXED'
+ 'TEST_DEQUE_COLLISIONS'
+ 'TEST_RESET'
+)
+
+for c in ${testcases[@]}; do
+ run_testcase $c
+done
diff --git a/pigasus/hardware/rtl_sim/tb/pipelined_heap/tb_pipelined_heap.sv b/pigasus/hardware/rtl_sim/tb/pipelined_heap/tb_pipelined_heap.sv
new file mode 100644
index 0000000..49ae8a8
--- /dev/null
+++ b/pigasus/hardware/rtl_sim/tb/pipelined_heap/tb_pipelined_heap.sv
@@ -0,0 +1,747 @@
+`timescale 1 ns/10 ps
+
+import heap_ops_pkg::*;
+
+module tb_pipelined_heap;
+
+// Simulation parameters
+localparam PERIOD = 10;
+localparam HEAP_BITMAP_WIDTH = 32;
+localparam HEAP_MAX_NUM_ENTRIES = 128;
+localparam HEAP_ENTRY_VALUE_WIDTH = 64;
+localparam HEAP_NUM_PRIORITIES = (HEAP_BITMAP_WIDTH ** 2);
+localparam HEAP_ENTRY_AWIDTH = ($clog2(HEAP_MAX_NUM_ENTRIES));
+localparam HEAP_PRIORITY_AWIDTH = ($clog2(HEAP_NUM_PRIORITIES));
+
+localparam HEAP_INIT_CYCLES = (
+ (HEAP_MAX_NUM_ENTRIES > HEAP_NUM_PRIORITIES) ?
+ HEAP_MAX_NUM_ENTRIES : HEAP_NUM_PRIORITIES);
+
+localparam HEAP_MIN_NUM_PRIORITIES_AND_ENTRIES = (
+ (HEAP_MAX_NUM_ENTRIES < HEAP_NUM_PRIORITIES) ?
+ HEAP_MAX_NUM_ENTRIES : HEAP_NUM_PRIORITIES);
+
+localparam MAX_HEAP_INIT_CYCLES = (HEAP_INIT_CYCLES << 1);
+
+// Local typedefs
+typedef logic [HEAP_PRIORITY_AWIDTH-1:0] heap_priority_t;
+typedef logic [HEAP_ENTRY_VALUE_WIDTH-1:0] heap_entry_value_t;
+
+/**
+ * List of tests:
+ * ---------------------
+ * TEST_BASIC_ENQUE
+ * TEST_BASIC_DEQUE_MIN
+ * TEST_BASIC_DEQUE_MAX
+ * TEST_HEAP_PROPERTY
+ * TEST_CAPACITY_LIMITS
+ * TEST_PIPELINING_ENQUE
+ * TEST_PIPELINING_MIXED
+ * TEST_DEQUE_COLLISIONS
+ * TEST_RESET
+ */
+
+// Global state
+logic clk;
+logic rst;
+logic init_done;
+logic [31:0] counter;
+logic [31:0] test_timer;
+
+initial clk = 0;
+initial rst = 1;
+initial counter = 0;
+initial init_done = 0;
+initial test_timer = 0;
+always #(PERIOD) clk = ~clk;
+
+// Heap signals
+logic heap_ready;
+logic heap_in_en;
+logic heap_out_valid;
+heap_op_t heap_in_op_type;
+heap_op_t heap_out_op_type;
+heap_entry_value_t heap_in_value;
+heap_priority_t heap_in_priority;
+heap_entry_value_t heap_out_value;
+heap_priority_t heap_out_priority;
+logic [2:0] heap_num_ops_enque;
+logic [2:0] heap_num_ops_deque_min;
+logic [2:0] heap_num_ops_deque_max;
+logic [HEAP_ENTRY_AWIDTH:0] heap_size;
+
+`ifndef TEST_CASE
+ $error("FAIL: No test case specified");
+`else
+if (`TEST_CASE == "TEST_BASIC_ENQUE") begin
+// Test a single enque operation
+always @(posedge clk) begin
+ rst <= 0;
+ heap_in_en <= 0;
+ heap_in_value <= 0;
+ heap_in_priority <= 0;
+ test_timer <= test_timer + 1;
+ heap_in_op_type <= HEAP_OP_ENQUE;
+ init_done <= init_done | heap_ready;
+
+ if (init_done) begin
+ counter <= counter + 1;
+ if (counter == 0) begin
+ heap_in_en <= 1;
+ heap_in_value <= 23;
+ heap_in_priority <= 1023;
+ heap_in_op_type <= HEAP_OP_ENQUE;
+ end
+ else if (heap_out_valid) begin
+ if ((heap_out_op_type == HEAP_OP_ENQUE) &&
+ (heap_out_priority == 1023) &&
+ (heap_out_value == 23)) begin
+ $display("PASS %s", `TEST_CASE);
+ $finish;
+ end
+ else begin
+ $display("FAIL %s: Expected ", `TEST_CASE,
+ "(op: HEAP_OP_ENQUE, value: 23, priority: 1023), got ",
+ "(%s, %0d, %0d)", heap_out_op_type.name, heap_out_value, heap_out_priority);
+ $finish;
+ end
+ end
+ else if (counter >= 10) begin
+ $display("FAIL %s: Test timed out", `TEST_CASE);
+ $finish;
+ end
+ end
+ else if (test_timer > MAX_HEAP_INIT_CYCLES) begin
+ $display("FAIL %s: Heap init timed out", `TEST_CASE);
+ $finish;
+ end
+end
+end
+
+else if (`TEST_CASE == "TEST_BASIC_DEQUE_MIN") begin
+// Test a single deque-min operation
+always @(posedge clk) begin
+ rst <= 0;
+ heap_in_en <= 0;
+ heap_in_value <= 0;
+ heap_in_priority <= 0;
+ test_timer <= test_timer + 1;
+ heap_in_op_type <= HEAP_OP_ENQUE;
+ init_done <= init_done | heap_ready;
+
+ if (init_done) begin
+ counter <= counter + 1;
+ if (counter == 0) begin
+ heap_in_en <= 1;
+ heap_in_value <= 42;
+ heap_in_priority <= 397;
+ heap_in_op_type <= HEAP_OP_ENQUE;
+ end
+ else if (counter == 10) begin
+ heap_in_en <= 1;
+ heap_in_op_type <= HEAP_OP_DEQUE_MIN;
+ end
+ else if ((counter > 10) && heap_out_valid) begin
+ if ((heap_out_op_type == HEAP_OP_DEQUE_MIN) &&
+ (heap_out_priority == 397) &&
+ (heap_out_value == 42)) begin
+ $display("PASS %s", `TEST_CASE);
+ $finish;
+ end
+ else begin
+ $display("FAIL %s: Expected ", `TEST_CASE,
+ "(op: HEAP_OP_DEQUE_MIN, value: 42, priority: 397), got ",
+ "(%s, %0d, %0d)", heap_out_op_type.name, heap_out_value, heap_out_priority);
+ $finish;
+ end
+ end
+ else if (counter >= 20) begin
+ $display("FAIL %s: Test timed out", `TEST_CASE);
+ $finish;
+ end
+ end
+ else if (test_timer > MAX_HEAP_INIT_CYCLES) begin
+ $display("FAIL %s: Heap init timed out", `TEST_CASE);
+ $finish;
+ end
+end
+end
+
+else if (`TEST_CASE == "TEST_BASIC_DEQUE_MAX") begin
+// Test a single deque-max operation
+always @(posedge clk) begin
+ rst <= 0;
+ heap_in_en <= 0;
+ heap_in_value <= 0;
+ heap_in_priority <= 0;
+ test_timer <= test_timer + 1;
+ heap_in_op_type <= HEAP_OP_ENQUE;
+ init_done <= init_done | heap_ready;
+
+ if (init_done) begin
+ counter <= counter + 1;
+ if (counter == 0) begin
+ heap_in_en <= 1;
+ heap_in_value <= 13455345;
+ heap_in_priority <= 7;
+ heap_in_op_type <= HEAP_OP_ENQUE;
+ end
+ else if (counter == 10) begin
+ heap_in_en <= 1;
+ heap_in_op_type <= HEAP_OP_DEQUE_MAX;
+ end
+ else if ((counter > 10) && heap_out_valid) begin
+ if ((heap_out_op_type == HEAP_OP_DEQUE_MAX) &&
+ (heap_out_priority == 7) &&
+ (heap_out_value == 13455345)) begin
+ $display("PASS %s", `TEST_CASE);
+ $finish;
+ end
+ else begin
+ $display("FAIL %s: Expected ", `TEST_CASE,
+ "(op: HEAP_OP_DEQUE_MAX, value: 13455345, priority: 7), got ",
+ "(%s, %0d, %0d)", heap_out_op_type.name, heap_out_value, heap_out_priority);
+ $finish;
+ end
+ end
+ else if (counter >= 20) begin
+ $display("FAIL %s: Test timed out", `TEST_CASE);
+ $finish;
+ end
+ end
+ else if (test_timer > MAX_HEAP_INIT_CYCLES) begin
+ $display("FAIL %s: Heap init timed out", `TEST_CASE);
+ $finish;
+ end
+end
+end
+
+else if (`TEST_CASE == "TEST_HEAP_PROPERTY") begin
+// Ensures that the heap property is maintained
+logic deque_min_done;
+logic deque_max_done;
+
+initial begin
+ deque_min_done = 0;
+ deque_max_done = 0;
+end
+
+always @(posedge clk) begin
+ rst <= 0;
+ heap_in_en <= 0;
+ heap_in_value <= 0;
+ heap_in_priority <= 0;
+ test_timer <= test_timer + 1;
+ heap_in_op_type <= HEAP_OP_ENQUE;
+ init_done <= init_done | heap_ready;
+
+ if (init_done) begin
+ counter <= counter + 1;
+ if (counter == 0) begin
+ heap_in_en <= 1;
+ heap_in_value <= 1;
+ heap_in_priority <= 667;
+ heap_in_op_type <= HEAP_OP_ENQUE;
+ end
+ else if (counter == 1) begin
+ heap_in_en <= 1;
+ heap_in_value <= 2;
+ heap_in_priority <= 653;
+ heap_in_op_type <= HEAP_OP_ENQUE;
+ end
+ else if (counter == 2) begin
+ heap_in_en <= 1;
+ heap_in_value <= 3;
+ heap_in_priority <= 471;
+ heap_in_op_type <= HEAP_OP_ENQUE;
+ end
+ else if (counter == 3) begin
+ heap_in_en <= 1;
+ heap_in_value <= 4;
+ heap_in_priority <= 1023;
+ heap_in_op_type <= HEAP_OP_ENQUE;
+ end
+ else if (counter < 10) begin
+ // NOOP
+ end
+ else if (counter == 10) begin
+ heap_in_en <= 1;
+ heap_in_op_type <= HEAP_OP_DEQUE_MIN;
+ end
+ else if (counter < 20) begin
+ if (heap_out_valid) begin
+ if ((heap_out_op_type == HEAP_OP_DEQUE_MIN) &&
+ (heap_out_priority == 471) &&
+ (heap_out_value == 3)) begin
+ deque_min_done <= 1;
+ end
+ else begin
+ $display("FAIL %s: Expected ", `TEST_CASE,
+ "(op: HEAP_OP_DEQUE_MIN, value: 3, priority: 471), got ",
+ "(%s, %0d, %0d)", heap_out_op_type.name, heap_out_value, heap_out_priority);
+ $finish;
+ end
+ end
+ end
+ else if (counter == 20) begin
+ heap_in_en <= 1;
+ heap_in_op_type <= HEAP_OP_DEQUE_MAX;
+ end
+ else if (counter < 30) begin
+ if (heap_out_valid) begin
+ if ((heap_out_op_type == HEAP_OP_DEQUE_MAX) &&
+ (heap_out_priority == 1023) &&
+ (heap_out_value == 4)) begin
+ deque_max_done <= 1;
+ end
+ else begin
+ $display("FAIL %s: Expected ", `TEST_CASE,
+ "(op: HEAP_OP_DEQUE_MAX, value: 4, priority: 1023), got ",
+ "(%s, %0d, %0d)", heap_out_op_type.name, heap_out_value, heap_out_priority);
+ $finish;
+ end
+ end
+ end
+ else begin
+ if (deque_min_done & deque_max_done) begin
+ $display("PASS %s", `TEST_CASE);
+ end
+ else begin
+ $display("FAIL %s: Test timed out", `TEST_CASE);
+ end
+ $finish;
+ end
+ end
+ else if (test_timer > MAX_HEAP_INIT_CYCLES) begin
+ $display("FAIL %s: Heap init timed out", `TEST_CASE);
+ $finish;
+ end
+end
+end
+
+else if (`TEST_CASE == "TEST_CAPACITY_LIMITS") begin
+// Test heap capacity limits (i.e. enqueing into
+// a full heap or dequeing from an empty heap).
+always @(posedge clk) begin
+ rst <= 0;
+ heap_in_en <= 0;
+ heap_in_value <= 0;
+ heap_in_priority <= 0;
+ test_timer <= test_timer + 1;
+ heap_in_op_type <= HEAP_OP_ENQUE;
+ init_done <= init_done | heap_ready;
+
+ if (init_done) begin
+ counter <= counter + 1;
+ if (counter == 0) begin
+ heap_in_en <= 1;
+ heap_in_op_type <= HEAP_OP_DEQUE_MIN;
+ end
+ else if (counter <= 10) begin
+ if (heap_out_valid) begin
+ $display("FAIL %s: Deque'd empty heap", `TEST_CASE);
+ $finish;
+ end
+ end
+ else if (counter <= (20 + HEAP_MAX_NUM_ENTRIES)) begin
+ if (counter < (11 + HEAP_MAX_NUM_ENTRIES)) begin
+ heap_in_en <= 1;
+ heap_in_value <= counter;
+ heap_in_priority <= (counter << 1);
+ heap_in_op_type <= HEAP_OP_ENQUE;
+ end
+ end
+ else if (counter < (50 + HEAP_MAX_NUM_ENTRIES)) begin
+ heap_in_en <= 1;
+ heap_in_value <= counter;
+ heap_in_priority <= (counter << 1);
+ heap_in_op_type <= HEAP_OP_ENQUE;
+
+ if (heap_out_valid) begin
+ $display("FAIL %s: Enque'd into a full heap", `TEST_CASE);
+ $finish;
+ end
+ end
+ else begin
+ $display("PASS %s", `TEST_CASE);
+ $finish;
+ end
+ end
+ else if (test_timer > MAX_HEAP_INIT_CYCLES) begin
+ $display("FAIL %s: Heap init timed out", `TEST_CASE);
+ $finish;
+ end
+end
+end
+
+else if (`TEST_CASE == "TEST_PIPELINING_ENQUE") begin
+// Make sure enque pipelining works as expected
+logic [31:0] first_enque_done_count;
+logic first_enque_done;
+
+initial begin
+ first_enque_done = 0;
+ first_enque_done_count = 0;
+end
+
+always @(posedge clk) begin
+ rst <= 0;
+ heap_in_en <= 0;
+ heap_in_value <= 0;
+ heap_in_priority <= 0;
+ test_timer <= test_timer + 1;
+ heap_in_op_type <= HEAP_OP_ENQUE;
+ init_done <= init_done | heap_ready;
+
+ if (init_done) begin
+ counter <= counter + 1;
+ if (counter <= 15) begin
+ heap_in_en <= 1;
+ heap_in_value <= counter;
+ heap_in_priority <= counter;
+ heap_in_op_type <= HEAP_OP_ENQUE;
+
+ if (heap_out_valid) begin
+ if (!first_enque_done) begin
+ first_enque_done <= 1;
+ first_enque_done_count <= counter;
+
+ if ((heap_out_priority != 0) || (heap_out_value != 0)) begin
+ $display("FAIL %s: Unexpected output value", `TEST_CASE);
+ $finish;
+ end
+ end
+ else begin
+ if ((heap_out_priority != (counter - first_enque_done_count) ||
+ (heap_out_value != (counter - first_enque_done_count)))) begin
+ $display("FAIL %s: Unexpected output value", `TEST_CASE);
+ $finish;
+ end
+ end
+ end
+ end
+ else begin
+ if (first_enque_done) begin
+ $display("PASS %s", `TEST_CASE);
+ $finish;
+ end
+ else begin
+ $display("FAIL %s: No enques completed", `TEST_CASE);
+ $finish;
+ end
+ end
+ end
+ else if (test_timer > MAX_HEAP_INIT_CYCLES) begin
+ $display("FAIL %s: Heap init timed out", `TEST_CASE);
+ $finish;
+ end
+end
+end
+
+else if (`TEST_CASE == "TEST_PIPELINING_MIXED") begin
+// Make sure mixed pipelining works as expected. Note:
+// This exercises relatively fine heap implementation
+// details, so consider disabling if too flaky.
+logic deque_done[1:0];
+
+initial begin
+ deque_done[0] = 0;
+ deque_done[1] = 0;
+end
+
+always @(posedge clk) begin
+ rst <= 0;
+ heap_in_en <= 0;
+ heap_in_value <= 0;
+ heap_in_priority <= 0;
+ test_timer <= test_timer + 1;
+ heap_in_op_type <= HEAP_OP_ENQUE;
+ init_done <= init_done | heap_ready;
+
+ if (init_done) begin
+ counter <= counter + 1;
+ if (counter == 0) begin
+ heap_in_en <= 1;
+ heap_in_value <= 23;
+ heap_in_priority <= 521;
+ heap_in_op_type <= HEAP_OP_ENQUE;
+ end
+ // A deque issued PIPELINE_DEPTH cycles after enque should
+ // pick up the enque op. TODO(natre): Don't hardcode this.
+ else if (counter == 4) begin
+ heap_in_en <= 1;
+ heap_in_op_type <= HEAP_OP_DEQUE_MIN;
+ end
+ else if (counter == 5) begin
+ heap_in_en <= 1;
+ heap_in_value <= 5218;
+ heap_in_priority <= 74;
+ heap_in_op_type <= HEAP_OP_ENQUE;
+ end
+ else if (counter == 6) begin
+ heap_in_en <= 1;
+ heap_in_value <= 2849;
+ heap_in_priority <= 1023;
+ heap_in_op_type <= HEAP_OP_ENQUE;
+ end
+ else if (counter == 10) begin
+ heap_in_en <= 1;
+ heap_in_op_type <= HEAP_OP_DEQUE_MAX;
+ end
+
+ if (heap_out_valid) begin
+ if (heap_out_op_type == HEAP_OP_DEQUE_MIN) begin
+ if ((heap_out_priority == 521) &&
+ (heap_out_value == 23)) begin
+ deque_done[0] <= 1;
+ end
+ else begin
+ $display("FAIL %s: Expected ", `TEST_CASE,
+ "(op: HEAP_OP_DEQUE_MIN, value: 23, priority: 521), got ",
+ "(%s, %0d, %0d)", heap_out_op_type.name, heap_out_value, heap_out_priority);
+ $finish;
+ end
+ end
+ else if (heap_out_op_type == HEAP_OP_DEQUE_MAX) begin
+ if ((heap_out_priority == 1023) &&
+ (heap_out_value == 2849)) begin
+ deque_done[1] <= 1;
+ end
+ else begin
+ $display("FAIL %s: Expected ", `TEST_CASE,
+ "(op: HEAP_OP_DEQUE_MAX, value: 2849, priority: 1023), got ",
+ "(%s, %0d, %0d)", heap_out_op_type.name, heap_out_value, heap_out_priority);
+ $finish;
+ end
+ end
+ end
+
+ if (deque_done[0] & deque_done[1]) begin
+ $display("PASS %s", `TEST_CASE);
+ $finish;
+ end
+ else if (counter > 20) begin
+ $display("FAIL %s: Test timed out", `TEST_CASE);
+ $finish;
+ end
+ end
+ else if (test_timer > MAX_HEAP_INIT_CYCLES) begin
+ $display("FAIL %s: Heap init timed out", `TEST_CASE);
+ $finish;
+ end
+end
+end
+
+else if (`TEST_CASE == "TEST_DEQUE_COLLISIONS") begin
+// Make sure back-to-back deques landing at the same
+// priority bucket are correctly handled (implicitly
+// tests the heap's write forwarding logic).
+logic [31:0] last_deque_counter;
+logic [5:0] num_deques_done;
+
+initial begin
+ num_deques_done = 0;
+ last_deque_counter = 0;
+end
+
+always @(posedge clk) begin
+ rst <= 0;
+ heap_in_en <= 0;
+ heap_in_value <= 0;
+ heap_in_priority <= 0;
+ test_timer <= test_timer + 1;
+ heap_in_op_type <= HEAP_OP_ENQUE;
+ init_done <= init_done | heap_ready;
+
+ if (init_done) begin
+ counter <= counter + 1;
+ if (counter <= 7) begin
+ heap_in_en <= 1;
+ heap_in_value <= counter + 5;
+ heap_in_priority <= 123;
+ heap_in_op_type <= HEAP_OP_ENQUE;
+ end
+ else if (counter < 15) begin
+ // NOOP
+ end
+ else if (counter <= 22) begin
+ heap_in_en <= 1;
+ heap_in_op_type <= HEAP_OP_DEQUE_MIN;
+ end
+ else if (counter >= 50) begin
+ if (num_deques_done == 0) begin
+ $display("FAIL %s: Test timed out", `TEST_CASE);
+ end
+ else if (num_deques_done != 8) begin
+ $display("FAIL %s: Expected 8 deque completions, saw %0d",
+ `TEST_CASE, num_deques_done);
+ end
+ else begin
+ $display("PASS %s", `TEST_CASE);
+ end
+ $finish;
+ end
+
+ if (heap_out_valid && (heap_out_op_type == HEAP_OP_DEQUE_MIN)) begin
+ num_deques_done <= num_deques_done + 1;
+ last_deque_counter <= counter;
+
+ if (num_deques_done != 0) begin
+ if ((counter != last_deque_counter + 1) || (heap_out_priority != 123)) begin
+ $display("FAIL %s: Colliding deques were not pipelined", `TEST_CASE);
+ $finish;
+ end
+ end
+ end
+ end
+ else if (test_timer > MAX_HEAP_INIT_CYCLES) begin
+ $display("FAIL %s: Heap init timed out", `TEST_CASE);
+ $finish;
+ end
+end
+end
+
+else if (`TEST_CASE == "TEST_RESET") begin
+// Make sure rst works as expected
+logic [31:0] num_enques_done;
+logic second_init_done;
+logic deque_done;
+logic rst_issued;
+
+initial begin
+ rst_issued = 0;
+ deque_done = 0;
+ num_enques_done = 0;
+ second_init_done = 0;
+end
+
+always @(posedge clk) begin
+ rst <= 0;
+ heap_in_en <= 0;
+ heap_in_value <= 0;
+ heap_in_priority <= 0;
+ test_timer <= test_timer + 1;
+ heap_in_op_type <= HEAP_OP_ENQUE;
+ init_done <= init_done | heap_ready;
+
+ if (second_init_done) begin
+ counter <= counter + 1;
+ if (counter < HEAP_MAX_NUM_ENTRIES) begin
+ heap_in_en <= 1;
+ heap_in_value <= 42;
+ heap_in_priority <= counter;
+ heap_in_op_type <= HEAP_OP_ENQUE;
+ end
+ else if (counter < (HEAP_MAX_NUM_ENTRIES + 10)) begin
+ // NOOP
+ end
+ else if (counter == (HEAP_MAX_NUM_ENTRIES + 10)) begin
+ heap_in_en <= 1;
+ heap_in_op_type <= HEAP_OP_DEQUE_MAX;
+ end
+ else if (counter > (HEAP_MAX_NUM_ENTRIES + 20)) begin
+ if ((num_enques_done == HEAP_MAX_NUM_ENTRIES) && deque_done) begin
+ $display("PASS %s", `TEST_CASE);
+ end
+ else if (!deque_done) begin
+ $display("FAIL %s: Expected 1 deque post reset, saw none", `TEST_CASE);
+ end
+ else begin
+ $display("FAIL %s: Expected %0d enques post reset, only saw %0d",
+ `TEST_CASE, HEAP_MAX_NUM_ENTRIES, num_enques_done);
+ end
+ $finish;
+ end
+
+ if (heap_out_valid) begin
+ if (heap_out_op_type == HEAP_OP_ENQUE) begin
+ if (heap_out_value == 42) begin
+ num_enques_done <= num_enques_done + 1;
+ end
+ else begin
+ $display("FAIL %s: Unexpected output value", `TEST_CASE);
+ $finish;
+ end
+ end
+ else if (heap_out_op_type == HEAP_OP_DEQUE_MAX) begin
+ if ((heap_out_priority == (HEAP_MIN_NUM_PRIORITIES_AND_ENTRIES - 1)) &&
+ (heap_out_value == 42)) begin
+ deque_done <= 1;
+ end
+ else begin
+ $display("FAIL %s: Deque-max post reset does not satisfy heap property",
+ `TEST_CASE);
+ $finish;
+ end
+ end
+ else begin
+ end
+ end
+ end
+ else if (rst_issued & heap_ready) begin
+ if (heap_size != 0) begin
+ $display("FAIL %s: Heap size is non-zero post reset", `TEST_CASE);
+ end
+ counter <= 0;
+ second_init_done <= 1;
+ end
+ else if (init_done) begin
+ counter <= counter + 1;
+ if (counter <= 7) begin
+ heap_in_en <= 1;
+ heap_in_value <= counter + 5;
+ heap_in_priority <= 123;
+ heap_in_op_type <= HEAP_OP_ENQUE;
+ end
+ else if (counter < 15) begin
+ // NOOP
+ end
+ else if (counter <= 18) begin
+ heap_in_en <= 1;
+ heap_in_op_type <= HEAP_OP_DEQUE_MIN;
+ end
+ else if (counter == 19) begin
+ rst <= 1;
+ rst_issued <= 1;
+ end
+ else if (counter > (MAX_HEAP_INIT_CYCLES << 2)) begin
+ $display("FAIL %s: Heap rst timed out", `TEST_CASE);
+ end
+ end
+ else if (test_timer > MAX_HEAP_INIT_CYCLES) begin
+ $display("FAIL %s: Heap init timed out", `TEST_CASE);
+ $finish;
+ end
+end
+end
+
+else begin
+ $error("FAIL: Unknown test %s", `TEST_CASE);
+end
+`endif
+
+// Heap instance
+pipelined_heap #(
+ .HEAP_BITMAP_WIDTH(HEAP_BITMAP_WIDTH),
+ .HEAP_MAX_NUM_ENTRIES(HEAP_MAX_NUM_ENTRIES),
+ .HEAP_ENTRY_VALUE_WIDTH(HEAP_ENTRY_VALUE_WIDTH)
+)
+heap (
+ .clk(clk),
+ .rst(rst),
+ .ready(heap_ready),
+ .in_en(heap_in_en),
+ .in_op_type(heap_in_op_type),
+ .in_he_value(heap_in_value),
+ .in_he_priority(heap_in_priority),
+ .out_valid(heap_out_valid),
+ .out_op_type(heap_out_op_type),
+ .out_he_value(heap_out_value),
+ .out_he_priority(heap_out_priority),
+ .size(heap_size),
+ .num_ops_enque(heap_num_ops_enque),
+ .num_ops_deque_min(heap_num_ops_deque_min),
+ .num_ops_deque_max(heap_num_ops_deque_max)
+);
+
+endmodule
diff --git a/pigasus/hardware/rtl_sim/tb/pipelined_heap_wrapper/run_test.sh b/pigasus/hardware/rtl_sim/tb/pipelined_heap_wrapper/run_test.sh
new file mode 100755
index 0000000..254bd7e
--- /dev/null
+++ b/pigasus/hardware/rtl_sim/tb/pipelined_heap_wrapper/run_test.sh
@@ -0,0 +1,52 @@
+#!/bin/bash
+altera_ver="$SIM_LIB_PATH/altera_ver"
+lpm_ver="$SIM_LIB_PATH/lpm_ver"
+sgate_ver="$SIM_LIB_PATH/sgate_ver"
+altera_mf_ver="$SIM_LIB_PATH/altera_mf_ver"
+altera_lnsim_ver="$SIM_LIB_PATH/altera_lnsim_ver"
+fourteennm_ver="$SIM_LIB_PATH/fourteennm_ver"
+fourteennm_ct1_ver="$SIM_LIB_PATH/fourteennm_ct1_ver"
+
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+NC='\033[0m' # No Color
+
+run_testcase () {
+ rm -rf work
+ rm -f vsim.wlf
+
+ vlib work > /dev/null 2>&1
+ vlog +define+SIM ../../src/common/*.sv -sv > /dev/null 2>&1
+ vlog +define+SIM ../../src/common/*.v > /dev/null 2>&1
+ vlog +define+SIM ../../src/reassembly/surge_protector/ffs.sv -sv > /dev/null 2>&1
+ vlog +define+SIM ../../src/reassembly/surge_protector/heap_ops_pkg.sv -sv > /dev/null 2>&1
+ vlog +define+SIM ../../src/reassembly/surge_protector/pipelined_heap.sv -sv > /dev/null 2>&1
+ vlog +define+SIM ../../src/reassembly/surge_protector/pipelined_heap_wrapper.sv -sv > /dev/null 2>&1
+ vlog +define+SIM +define+TEST_CASE=\"$1\" tb_pipelined_heap_wrapper.sv -sv > /dev/null 2>&1
+
+ OUTPUT=$(vsim -L $altera_mf_ver -L $altera_lnsim_ver -L $altera_ver \
+ -L $lpm_ver -L $sgate_ver -L $fourteennm_ver -L $fourteennm_ct1_ver \
+ -c -do "run -all" tb_pipelined_heap_wrapper | grep -e "PASS" -e "FAIL")
+
+ printf "Running $1... "
+ if grep -q "FAIL" <<< ${OUTPUT}
+ then
+ printf "${RED}${OUTPUT}${NC}\n"
+ elif grep -q "PASS $1" <<< ${OUTPUT}
+ then
+ printf "${GREEN}PASS${NC}\n"
+ else
+ printf "${RED}Test not run${NC}\n"
+ fi
+}
+
+declare -a testcases=(
+ 'TEST_DEQUE_PRIMARY_BASIC'
+ 'TEST_DEQUE_PRIMARY_APPROX_HEAP'
+ 'TEST_DEQUE_SECONDARY_BASIC'
+ 'TEST_DEQUE_SECONDARY_REISSUE'
+)
+
+for c in ${testcases[@]}; do
+ run_testcase $c
+done
diff --git a/pigasus/hardware/rtl_sim/tb/pipelined_heap_wrapper/tb_pipelined_heap_wrapper.sv b/pigasus/hardware/rtl_sim/tb/pipelined_heap_wrapper/tb_pipelined_heap_wrapper.sv
new file mode 100644
index 0000000..b7470e6
--- /dev/null
+++ b/pigasus/hardware/rtl_sim/tb/pipelined_heap_wrapper/tb_pipelined_heap_wrapper.sv
@@ -0,0 +1,390 @@
+`timescale 1 ns/10 ps
+
+import heap_ops_pkg::*;
+
+module tb_pipelined_heap_wrapper;
+
+// Simulation parameters
+localparam PERIOD = 10;
+localparam HEAP_BITMAP_WIDTH = 32;
+localparam HEAP_MAX_NUM_ENTRIES = 128;
+localparam HEAP_ENTRY_VALUE_WIDTH = 64;
+localparam HEAP_NUM_PRIORITIES = (HEAP_BITMAP_WIDTH ** 2);
+localparam HEAP_ENTRY_AWIDTH = ($clog2(HEAP_MAX_NUM_ENTRIES));
+localparam HEAP_PRIORITY_AWIDTH = ($clog2(HEAP_NUM_PRIORITIES));
+
+localparam HEAP_INIT_CYCLES = (
+ (HEAP_MAX_NUM_ENTRIES > HEAP_NUM_PRIORITIES) ?
+ HEAP_MAX_NUM_ENTRIES : HEAP_NUM_PRIORITIES);
+
+localparam HEAP_MIN_NUM_PRIORITIES_AND_ENTRIES = (
+ (HEAP_MAX_NUM_ENTRIES < HEAP_NUM_PRIORITIES) ?
+ HEAP_MAX_NUM_ENTRIES : HEAP_NUM_PRIORITIES);
+
+localparam MAX_HEAP_INIT_CYCLES = (HEAP_INIT_CYCLES << 1);
+
+// Local typedefs
+typedef logic [HEAP_PRIORITY_AWIDTH-1:0] heap_priority_t;
+typedef logic [HEAP_ENTRY_VALUE_WIDTH-1:0] heap_entry_value_t;
+
+/**
+ * List of tests:
+ * ---------------------
+ * TEST_DEQUE_PRIMARY_BASIC
+ * TEST_DEQUE_PRIMARY_APPROX_HEAP
+ * TEST_DEQUE_SECONDARY_BASIC
+ * TEST_DEQUE_SECONDARY_REISSUE
+ */
+
+// Global state
+logic clk;
+logic rst;
+logic init_done;
+logic [31:0] counter;
+logic [31:0] test_timer;
+
+initial clk = 0;
+initial rst = 1;
+initial counter = 0;
+initial init_done = 0;
+initial test_timer = 0;
+always #(PERIOD) clk = ~clk;
+
+// Heap wrapper signals
+logic in_enque_en;
+logic in_enque_ready;
+logic out_deque_primary_en;
+logic out_deque_primary_ready;
+heap_entry_value_t in_enque_value;
+heap_priority_t in_enque_priority;
+heap_entry_value_t out_deque_primary_value;
+heap_priority_t out_deque_primary_priority;
+logic in_deque_secondary_req_en;
+logic in_deque_secondary_req_ready;
+logic out_deque_secondary_en;
+logic out_deque_secondary_ready;
+heap_entry_value_t out_deque_secondary_value;
+heap_priority_t out_deque_secondary_priority;
+logic [HEAP_ENTRY_AWIDTH:0] heap_size;
+logic heap_ready;
+
+`ifndef TEST_CASE
+ $error("FAIL: No test case specified");
+`else
+if (`TEST_CASE == "TEST_DEQUE_PRIMARY_BASIC") begin
+// Test a basic deque-PRIMARY operation
+always @(posedge clk) begin
+ rst <= 0;
+ in_enque_en <= 0;
+ in_enque_value <= 0;
+ in_enque_priority <= 0;
+ out_deque_primary_en <= 0;
+ test_timer <= test_timer + 1;
+ init_done <= init_done | heap_ready;
+
+ if (init_done) begin
+ counter <= counter + 1;
+ if (counter == 0) begin
+ in_enque_en <= 1;
+ in_enque_value <= 23;
+ in_enque_priority <= 1023;
+
+ if (!in_enque_ready) begin
+ $display("FAIL %s: ", `TEST_CASE,
+ "Heap is empty, but in_enque_ready is not asserted");
+ $finish;
+ end
+
+ if (out_deque_primary_ready) begin
+ $display("FAIL %s: ", `TEST_CASE,
+ "Heap is empty, but out_deque_primary_ready is asserted");
+ $finish;
+ end
+ end
+ else if (counter == 20) begin
+ out_deque_primary_en <= 1;
+ if (out_deque_primary_ready) begin
+ if ((out_deque_primary_value != 23) ||
+ (out_deque_primary_priority != 1023)) begin
+ $display("FAIL %s: Expected ", `TEST_CASE,
+ "(value: 23, priority: 1023), got (%0d, %0d)",
+ out_deque_primary_value, out_deque_primary_priority);
+ $finish;
+ end
+ end
+ else begin
+ $display("FAIL %s: ", `TEST_CASE,
+ "Heap is not empty, but out_deque_primary_ready is not asserted");
+ $finish;
+ end
+ end
+ else if (counter == 22) begin
+ if (out_deque_primary_ready) begin
+ $display("FAIL %s: ", `TEST_CASE,
+ "Heap became empty, but out_deque_primary_ready is still asserted");
+ $finish;
+ end
+ else begin
+ $display("PASS %s", `TEST_CASE);
+ $finish;
+ end
+ end
+ end
+ else if (test_timer > MAX_HEAP_INIT_CYCLES) begin
+ $display("FAIL %s: Heap init timed out", `TEST_CASE);
+ $finish;
+ end
+end
+end
+
+else if (`TEST_CASE == "TEST_DEQUE_PRIMARY_APPROX_HEAP") begin
+// Ensures that the heap property is approximately satisfied.
+// This is enforced by checking that the third deque-PRIMARY
+// element corresponds to the top heap entry.
+always @(posedge clk) begin
+ rst <= 0;
+ in_enque_en <= 0;
+ in_enque_value <= 0;
+ in_enque_priority <= 0;
+ out_deque_primary_en <= 0;
+ test_timer <= test_timer + 1;
+ init_done <= init_done | heap_ready;
+
+ if (init_done) begin
+ counter <= counter + 1;
+ if (counter < 32) begin
+ in_enque_en <= 1;
+ in_enque_value <= counter;
+ in_enque_priority <= (31 - counter);
+ end
+ else if (counter == 50 || counter == 55) begin
+ if (!out_deque_primary_ready) begin
+ $display("FAIL %s: ", `TEST_CASE,
+ "Heap is not empty, but out_deque_primary_ready is not asserted");
+ $finish;
+ end
+ out_deque_primary_en <= 1;
+ end
+ else if (counter == 65) begin
+ if (!out_deque_primary_ready) begin
+ $display("FAIL %s: ", `TEST_CASE,
+ "Heap is not empty, but out_deque_primary_ready is not asserted");
+ $finish;
+ end
+ if ((out_deque_primary_priority == 0) && (out_deque_primary_value == 31)) begin
+ $display("PASS %s", `TEST_CASE);
+ $finish;
+ end
+ else begin
+ $display("FAIL %s: ", `TEST_CASE, "Approximate heap property not satisfied");
+ $finish;
+ end
+ end
+ end
+ else if (test_timer > MAX_HEAP_INIT_CYCLES) begin
+ $display("FAIL %s: Heap init timed out", `TEST_CASE);
+ $finish;
+ end
+end
+end
+
+else if (`TEST_CASE == "TEST_DEQUE_SECONDARY_BASIC") begin
+// Test a basic deque-SECONDARY operation
+always @(posedge clk) begin
+ rst <= 0;
+ in_enque_en <= 0;
+ in_enque_value <= 0;
+ in_enque_priority <= 0;
+ out_deque_primary_en <= 0;
+ out_deque_secondary_en <= 0;
+ test_timer <= test_timer + 1;
+ in_deque_secondary_req_en <= 0;
+ init_done <= init_done | heap_ready;
+
+ if (init_done) begin
+ counter <= counter + 1;
+ if (counter < 32) begin
+ in_enque_en <= 1;
+ in_enque_value <= counter;
+ in_enque_priority <= counter;
+ end
+ else if (counter == 50 || counter == 55) begin
+ if (!out_deque_primary_ready) begin
+ $display("FAIL %s: ", `TEST_CASE,
+ "Heap is not empty, but out_deque_primary_ready is not asserted");
+ $finish;
+ end
+ out_deque_primary_en <= 1;
+ end
+ else if (counter == 65) begin
+ if (out_deque_secondary_ready) begin
+ $display("FAIL %s: ", `TEST_CASE,
+ "Deque-SECONDARY was not issued, but out-FIFO has entries");
+ $finish;
+ end
+ else if (!in_deque_secondary_req_ready) begin
+ $display("FAIL %s: ", `TEST_CASE,
+ "Heap has >2 entries, but in_deque_secondary_req_ready is not asserted");
+ $finish;
+ end
+ in_deque_secondary_req_en <= 1;
+ end
+ else if (counter == 75) begin
+ if (!out_deque_secondary_ready) begin
+ $display("FAIL %s: ", `TEST_CASE,
+ "Deque-SECONDARY was issued, but out-FIFO is empty");
+ $finish;
+ end
+ else if ((out_deque_secondary_priority != 31) ||
+ (out_deque_secondary_value != 31)) begin
+ $display("FAIL %s: ", `TEST_CASE,
+ "Deque-SECONDARY does not satisfy heap property");
+ $finish;
+ end
+ out_deque_secondary_en <= 1;
+ end
+ else if (counter == 77) begin
+ if (out_deque_secondary_ready) begin
+ $display("FAIL %s: ", `TEST_CASE,
+ "Deque-SECONDARY was not re-issued, but out-FIFO has entries");
+ $finish;
+ end
+ else begin
+ $display("PASS %s", `TEST_CASE);
+ $finish;
+ end
+ end
+ end
+ else if (test_timer > MAX_HEAP_INIT_CYCLES) begin
+ $display("FAIL %s: Heap init timed out", `TEST_CASE);
+ $finish;
+ end
+end
+end
+
+else if (`TEST_CASE == "TEST_DEQUE_SECONDARY_REISSUE") begin
+// Ensures that failed deque-SECONDARY ops are reissued
+always @(posedge clk) begin
+ rst <= 0;
+ in_enque_en <= 0;
+ in_enque_value <= 0;
+ in_enque_priority <= 0;
+ out_deque_primary_en <= 0;
+ out_deque_secondary_en <= 0;
+ test_timer <= test_timer + 1;
+ in_deque_secondary_req_en <= 0;
+ init_done <= init_done | heap_ready;
+
+ if (init_done) begin
+ counter <= counter + 1;
+ if (counter < 2) begin
+ in_enque_en <= 1;
+ in_enque_value <= counter;
+ in_enque_priority <= counter;
+ end
+ else if (counter == 14) begin
+ if (out_deque_secondary_ready) begin
+ $display("FAIL %s: ", `TEST_CASE,
+ "Deque-SECONDARY was not issued, but out-FIFO has entries");
+ $finish;
+ end
+ else if (!in_deque_secondary_req_ready) begin
+ $display("FAIL %s: ", `TEST_CASE,
+ "Heap has >2 entries, but in_deque_secondary_req_ready is not asserted");
+ $finish;
+ end
+ in_deque_secondary_req_en <= 1;
+ end
+ else if (counter == 20) begin
+ if (out_deque_secondary_ready) begin
+ $display("FAIL %s: ", `TEST_CASE,
+ "Heap became empty, but out_deque_secondary_ready is still asserted");
+ $finish;
+ end
+ else if (in_deque_secondary_req_ready) begin
+ $display("FAIL %s: ", `TEST_CASE,
+ "Deque-SECONDARY was previously issued, but in_ready was re-asserted");
+ $finish;
+ end
+ in_enque_en <= 1;
+ in_enque_value <= counter;
+ in_enque_priority <= counter;
+ end
+ else if (counter == 35) begin
+ if (!out_deque_secondary_ready) begin
+ $display("FAIL %s: ", `TEST_CASE,
+ "Heap has entries, but deque-SECONDARY was not correctly re-issued");
+ $finish;
+ end
+ else if ((out_deque_secondary_priority != 20) ||
+ (out_deque_secondary_value != 20)) begin
+ $display("FAIL %s: Expected ", `TEST_CASE,
+ "(value: 20, priority: 20), got (%0d, %0d)",
+ out_deque_secondary_value, out_deque_secondary_priority);
+ $finish;
+ end
+ out_deque_secondary_en <= 1;
+ end
+ else if (counter == 37) begin
+ if (in_deque_secondary_req_ready) begin
+ $display("FAIL %s: ", `TEST_CASE,
+ "Heap is empty, but in_deque_secondary_req_ready is still asserted");
+ $finish;
+ end
+ else if (out_deque_secondary_ready) begin
+ $display("FAIL %s: ", `TEST_CASE,
+ "Deque-SECONDARY was not re-issued, but out-FIFO has entries");
+ $finish;
+ end
+ else begin
+ $display("PASS %s", `TEST_CASE);
+ $finish;
+ end
+ end
+ end
+ else if (test_timer > MAX_HEAP_INIT_CYCLES) begin
+ $display("FAIL %s: Heap init timed out", `TEST_CASE);
+ $finish;
+ end
+end
+end
+
+else begin
+ $error("FAIL: Unknown test %s", `TEST_CASE);
+end
+`endif
+
+// Heap wrapper instance
+pipelined_heap_wrapper #(
+ .HEAP_BITMAP_WIDTH(HEAP_BITMAP_WIDTH),
+ .HEAP_MAX_NUM_ENTRIES(HEAP_MAX_NUM_ENTRIES),
+ .HEAP_ENTRY_VALUE_WIDTH(HEAP_ENTRY_VALUE_WIDTH),
+ .HEAP_OP_DEQUE_PRIMARY_TYPE(HEAP_OP_DEQUE_MIN)
+) heap_wrapper (
+ // General inputs
+ .clk(clk),
+ .rst(rst),
+ // Enque
+ .in_enque_en(in_enque_en),
+ .in_enque_ready(in_enque_ready),
+ .in_enque_value(in_enque_value),
+ .in_enque_priority(in_enque_priority),
+ // Deque-PRIMARY
+ .out_deque_primary_en(out_deque_primary_en),
+ .out_deque_primary_ready(out_deque_primary_ready),
+ .out_deque_primary_value(out_deque_primary_value),
+ .out_deque_primary_priority(out_deque_primary_priority),
+ // Deque-SECONDARY (request)
+ .in_deque_secondary_req_en(in_deque_secondary_req_en),
+ .in_deque_secondary_req_ready(in_deque_secondary_req_ready),
+ // Deque-SECONDARY (response)
+ .out_deque_secondary_en(out_deque_secondary_en),
+ .out_deque_secondary_ready(out_deque_secondary_ready),
+ .out_deque_secondary_value(out_deque_secondary_value),
+ .out_deque_secondary_priority(out_deque_secondary_priority),
+ // Feedback
+ .heap_size(heap_size),
+ .heap_ready(heap_ready)
+);
+
+endmodule
diff --git a/pigasus/hardware/rtl_sim/tb/run_tests.sh b/pigasus/hardware/rtl_sim/tb/run_tests.sh
new file mode 100755
index 0000000..3ff80e0
--- /dev/null
+++ b/pigasus/hardware/rtl_sim/tb/run_tests.sh
@@ -0,0 +1,20 @@
+#!/bin/bash
+
+# Update common dir
+rm -rf ../src/common
+cp -r ../../scripts/generated_files ../src/common
+
+declare -a testdirs=(
+ 'ffs'
+ 'pipelined_heap'
+ 'pipelined_heap_wrapper'
+ 'scheduler_reassembly'
+)
+
+for testdir in ${testdirs[@]}; do
+ pwd=$(pwd)
+ echo "-----------------------------------------------"
+ echo "tb_${testdir}"
+ echo "-----------------------------------------------"
+ cd ${testdir}; ./run_test.sh; cd ${pwd};
+done
diff --git a/pigasus/hardware/rtl_sim/tb/scheduler_reassembly/.gitignore b/pigasus/hardware/rtl_sim/tb/scheduler_reassembly/.gitignore
new file mode 100644
index 0000000..8fce603
--- /dev/null
+++ b/pigasus/hardware/rtl_sim/tb/scheduler_reassembly/.gitignore
@@ -0,0 +1 @@
+data/
diff --git a/pigasus/hardware/rtl_sim/tb/scheduler_reassembly/run_test.sh b/pigasus/hardware/rtl_sim/tb/scheduler_reassembly/run_test.sh
new file mode 100755
index 0000000..d5766ac
--- /dev/null
+++ b/pigasus/hardware/rtl_sim/tb/scheduler_reassembly/run_test.sh
@@ -0,0 +1,114 @@
+#!/bin/bash
+altera_ver="$SIM_LIB_PATH/altera_ver"
+lpm_ver="$SIM_LIB_PATH/lpm_ver"
+sgate_ver="$SIM_LIB_PATH/sgate_ver"
+altera_mf_ver="$SIM_LIB_PATH/altera_mf_ver"
+altera_lnsim_ver="$SIM_LIB_PATH/altera_lnsim_ver"
+fourteennm_ver="$SIM_LIB_PATH/fourteennm_ver"
+fourteennm_ct1_ver="$SIM_LIB_PATH/fourteennm_ct1_ver"
+
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+NC='\033[0m' # No Color
+
+# TODO(natre): This is a hack. Ideally we'd want to colocate
+# artifacts with the source code and keep them under version
+# control, but unfortunately there isn't a good way of doing
+# this today (git LFS doesn't play well with public repos).
+# Fix this whenever that changes.
+if [ ! -d "data" ]; then
+ printf "Data directory missing, downloading now... "
+ wget -O data.tar.gz \
+ https://www.dropbox.com/s/vhbkjv3c2t4u9vs/data.tar.gz?dl=0 \
+ > /dev/null 2>&1
+ printf "Done\n"
+
+ tar -zxf data.tar.gz
+ rm -rf data.tar.gz
+fi
+
+# Common prologue. Arg: CommonDefines
+run_testcase_prologue() {
+ cd ../../
+ rm -rf work
+ rm -f vsim.wlf
+ vlib work > /dev/null 2>&1
+
+ common_defines=("$@")
+ vlog "${common_defines[@]}" ./src/*.*v -sv > /dev/null 2>&1
+ vlog "${common_defines[@]}" ./src/common/*.sv -sv > /dev/null 2>&1
+ vlog "${common_defines[@]}" ./src/common/*.v > /dev/null 2>&1
+ vlog "${common_defines[@]}" ./src/common_usr/*.sv -sv > /dev/null 2>&1
+ vlog "${common_defines[@]}" ./src/common_usr/*.v > /dev/null 2>&1
+ vlog "${common_defines[@]}" ./src/buffer/*.sv -sv > /dev/null 2>&1
+ vlog "${common_defines[@]}" ./src/fast_pattern_matcher/*.sv -sv > /dev/null 2>&1
+ vlog "${common_defines[@]}" ./src/mux/*.sv -sv > /dev/null 2>&1
+ vlog "${common_defines[@]}" ./src/non_fast_pattern_matcher/*.sv -sv > /dev/null 2>&1
+ vlog "${common_defines[@]}" ./src/parser/*.sv -sv > /dev/null 2>&1
+ vlog "${common_defines[@]}" ./src/pcie/*.sv -sv > /dev/null 2>&1
+ vlog "${common_defines[@]}" ./src/port_group/*.sv -sv > /dev/null 2>&1
+ vlog "${common_defines[@]}" ./src/reassembly/*.sv -sv > /dev/null 2>&1
+ vlog "${common_defines[@]}" ./src/reassembly/surge_protector/heap_ops_pkg.sv -sv > /dev/null 2>&1
+ vlog "${common_defines[@]}" ./src/reassembly/surge_protector/*.sv -sv > /dev/null 2>&1
+ vlog "${common_defines[@]}" ./src/services/*.sv -sv > /dev/null 2>&1
+}
+
+# Common epilogue. Arg: TestCaseName
+run_testcase_epilogue() {
+ OUTPUT=$(vsim -L $altera_mf_ver -L $altera_lnsim_ver -L $altera_ver \
+ -L $lpm_ver -L $sgate_ver -L $fourteennm_ver -L $fourteennm_ct1_ver \
+ -c -do "run -all" tb_scheduler_reassembly | grep -e "PASS" -e "FAIL")
+
+ if grep -q "FAIL" <<< ${OUTPUT}
+ then
+ printf "${RED}${OUTPUT}${NC}\n"
+ elif grep -q "PASS $1" <<< ${OUTPUT}
+ then
+ printf "${GREEN}PASS${NC}\n"
+ else
+ printf "${RED}Test not run${NC}\n"
+ fi
+
+ cd tb/scheduler_reassembly
+}
+
+run_testcase () {
+ while IFS=',' read -r TEST_CASE PKT_FILE RATE_INNOCENT RATE_ATTACK PKT_NUM; do
+ printf "Running ${TEST_CASE}... "
+
+ common_defines=()
+ common_defines+=( '+define+SIM' )
+ common_defines+=( '+define+PKT_NUM='${PKT_NUM} )
+ if [[ ${TEST_CASE} == *"WSJF"* ]]; then
+ common_defines+=( '+define+ENABLE_SURGEPROTECTOR' )
+ fi
+
+ PKT_FILE_NB_LINES=$(wc -l < ${PKT_FILE})
+ run_testcase_prologue "${common_defines[@]}" # Run prologue
+
+ vlog \
+ "${common_defines[@]}" \
+ +define+TEST_CASE=\"${TEST_CASE}\" \
+ +define+PKT_FILE=\"./tb/scheduler_reassembly/${PKT_FILE}\" \
+ +define+PKT_FILE_NB_LINES=${PKT_FILE_NB_LINES} \
+ +define+RATE_INNOCENT=${RATE_INNOCENT} \
+ +define+RATE_ATTACK=${RATE_ATTACK} \
+ ./tb/scheduler_reassembly/tb_scheduler_reassembly.sv -sv > /dev/null 2>&1
+
+ run_testcase_epilogue ${TEST_CASE} # Run epilogue
+ done <<< $1
+}
+
+# Format: TestCaseName, PacketFile, RateInnocent,
+# RateAttack, PacketBufferSize
+declare -a testcases=(
+ 'TEST_FCFS_BASIC_INORDER,data/inorder.pkt,10,0,1024'
+ 'TEST_FCFS_BASIC_OOO,data/ooo.pkt,10,0,1024'
+ 'TEST_WSJF_BASIC_INORDER,data/inorder.pkt,10,0,1024'
+ 'TEST_WSJF_BASIC_OOO,data/ooo.pkt,10,0,1024'
+ 'TEST_WSJF_ACA_MITIGATION,data/adversarial.pkt,10,1,16384'
+)
+
+for c in ${testcases[@]}; do
+ run_testcase $c
+done
diff --git a/pigasus/hardware/rtl_sim/tb/scheduler_reassembly/tb_scheduler_reassembly.sv b/pigasus/hardware/rtl_sim/tb/scheduler_reassembly/tb_scheduler_reassembly.sv
new file mode 100644
index 0000000..f1ce871
--- /dev/null
+++ b/pigasus/hardware/rtl_sim/tb/scheduler_reassembly/tb_scheduler_reassembly.sv
@@ -0,0 +1,847 @@
+`timescale 1 ns/10 ps
+
+`include "../../src/struct_s.sv"
+`include "../../src/stats_reg.sv"
+
+module tb_scheduler_reassembly;
+
+`ifndef TEST_CASE
+ $error("FAIL: No test case specified");
+`else
+
+`ifndef PKT_FILE
+$error("FAIL: PKT_FILE not set");
+`endif
+
+`ifndef PKT_FILE_NB_LINES
+$error("FAIL: PKT_FILE_NB_LINES not set");
+`endif
+
+// Input innocent rate (in Gbps)
+`ifndef RATE_INNOCENT
+$error("FAIL: RATE_INNOCENT not set");
+`endif
+
+// Input attack rate (in Gbps)
+`ifndef RATE_ATTACK
+$error("FAIL: RATE_ATTACK not set");
+`endif
+
+// Total input rate (in Gbps)
+localparam RATE = (`RATE_INNOCENT + `RATE_ATTACK);
+
+// The time at which we stop profiling is controlled by the
+// STOP_DELAY parameter. For tests which perform throughput
+// measurement, it's important to stop profiling as soon as
+// the input trace is exhausted (i.e., no delay). For other
+// tests, we're interested in making sure that the I/O byte
+// counts match, in which case we need to run the tests for
+// longer to account for processing latency; for the latter
+// set of tests, we default to a reasonable delay value.
+localparam STOP_DELAY = (
+ (`TEST_CASE == "TEST_WSJF_ACA_MITIGATION") ?
+ 1 : // Min delay for throughput measurement
+ 20000 // Default delay
+);
+
+/**
+ * List of tests:
+ * ---------------------
+ * TEST_FCFS_BASIC_INORDER
+ * TEST_FCFS_BASIC_OOO
+ * TEST_WSJF_BASIC_INORDER
+ * TEST_WSJF_BASIC_OOO
+ * TEST_WSJF_ACA_MITIGATION
+ */
+
+localparam period = 10;
+localparam period_rx = 2.56;
+localparam period_tx = 2.56;
+localparam period_user = 5;
+localparam period_user_high = 2.5;
+localparam period_esram_ref = 10;
+localparam period_pcie = 4;
+localparam period_emif = 3;
+localparam data_width = 528;
+localparam lo = 0;
+localparam hi = `PKT_FILE_NB_LINES;
+localparam nf_forward_threshold = 200;
+
+logic clk_status;
+logic clk_rxmac;
+logic clk_txmac;
+logic clk_user;
+logic clk_user_high;
+logic clk_esram_ref;
+logic clk_pcie;
+logic clk_emif;
+
+logic rst;
+logic emif_reset_n;
+logic [31:0] cnt;
+logic [31:0] addr;
+logic [data_width-1:0] arr[lo:hi];
+
+// Ethernet signals
+logic [511:0] l8_rx_data;
+logic [5:0] l8_rx_empty;
+logic l8_rx_valid;
+logic l8_rx_startofpacket;
+logic l8_rx_endofpacket;
+
+logic [511:0] stats_rx_data;
+logic [5:0] stats_rx_empty;
+logic stats_rx_valid;
+logic stats_rx_startofpacket;
+logic stats_rx_endofpacket;
+logic stats_rx_ready;
+
+logic [511:0] top_in_data;
+logic [5:0] top_in_empty;
+logic top_in_valid;
+logic top_in_startofpacket;
+logic top_in_endofpacket;
+logic [511:0] top_out_data;
+logic [5:0] top_out_empty;
+logic top_out_valid;
+logic top_out_ready;
+logic top_out_startofpacket;
+logic top_out_endofpacket;
+
+logic [511:0] l8_tx_data;
+logic [5:0] l8_tx_empty;
+logic l8_tx_valid;
+logic l8_tx_startofpacket;
+logic l8_tx_endofpacket;
+logic l8_tx_ready;
+
+// PCIE
+logic disable_pcie;
+logic [513:0] pcie_rb_wr_data;
+logic [11:0] pcie_rb_wr_addr;
+logic pcie_rb_wr_en;
+logic [11:0] pcie_rb_wr_base_addr;
+logic pcie_rb_almost_full;
+logic pcie_rb_update_valid;
+logic [11:0] pcie_rb_update_size;
+logic [27:0] pdumeta_cpu_data;
+logic pdumeta_cpu_valid;
+logic [9:0] pdumeta_cnt;
+
+// DRAM signals
+logic [547:0] ddr_wr_req_data;
+logic ddr_wr_req_valid;
+logic ddr_wr_req_almost_full;
+logic [33:0] ddr_rd_req_data;
+logic ddr_rd_req_valid;
+logic ddr_rd_req_almost_full;
+logic [511:0] ddr_rd_resp_data;
+logic ddr_rd_resp_valid;
+logic ddr_rd_resp_almost_full;
+logic ddr_rd_resp_valid_int;
+
+// eSRAM signals
+logic esram_pll_lock;
+logic esram_pkt_buf_wren;
+logic [PKTBUF_AWIDTH-1:0] pkt_buf_wraddress;
+logic [PKTBUF_AWIDTH-1:0] pkt_buf_rdaddress;
+logic [519:0] pkt_buf_wrdata;
+logic pkt_buf_rden;
+logic pkt_buf_rd_valid;
+logic [519:0] pkt_buf_rddata;
+
+// JTAG
+logic [29:0] s_addr;
+logic s_read;
+logic s_write;
+logic [31:0] s_writedata;
+logic [31:0] s_readdata;
+logic s_readdata_valid;
+logic [31:0] top_readdata;
+logic top_readdata_valid;
+logic [31:0] dram_readdata;
+logic dram_readdata_valid;
+
+logic stop;
+logic [31:0] stop_cnt;
+
+logic [3:0] tx_cnt;
+logic [15:0] reg_cnt;
+logic [15:0] reg_recv_cnt;
+
+// We send a burst of packets in a window, and
+// set the rate by limiting the window size.
+logic [7:0] rate_cnt;
+
+// Simulated PDU metadata
+logic sim_pdumeta_cpu_valid;
+pdu_metadata_t sim_pdumeta_cpu_data;
+logic [7:0] pdumeta_wait_cnt;
+
+// Reassembly profiling
+logic done_profiling;
+logic [63:0] time_in_num_cycles_end;
+logic [63:0] time_out_num_cycles_end;
+logic [63:0] time_in_num_cycles_start;
+logic [63:0] time_out_num_cycles_start;
+logic [63:0] total_in_reassembler_bytes;
+logic [63:0] total_out_reassembler_bytes;
+
+typedef enum {
+ PDUMETA_IDLE,
+ PDUMETA_WAIT
+} pdumeta_state_t;
+pdumeta_state_t pdumeta_state;
+
+typedef enum {
+ DISABLE_PCIE,
+ WRITE_CTRL,
+ ENABLE_PCIE,
+ IDLE,
+ READ_TOP_REG,
+ DONE
+} c_state_t;
+c_state_t conf_state;
+
+initial clk_rxmac = 0;
+initial clk_txmac = 1;
+initial clk_user = 0;
+initial clk_user_high = 0;
+initial clk_esram_ref = 0;
+initial clk_pcie = 0;
+initial clk_status = 0;
+initial l8_tx_ready = 0;
+initial tx_cnt = 0;
+initial rst = 1;
+initial cnt = 0;
+initial rate_cnt = 0;
+initial stop = 0;
+initial stop_cnt = 0;
+
+initial done_profiling = 0;
+initial time_in_num_cycles_end = 0;
+initial time_in_num_cycles_start = 0;
+initial time_out_num_cycles_start = 0;
+initial total_in_reassembler_bytes = 0;
+initial total_out_reassembler_bytes = 0;
+
+always #(period) clk_status = ~clk_status;
+always #(period_rx) clk_rxmac = ~clk_rxmac;
+always #(period_tx) clk_txmac = ~clk_txmac;
+always #(period_user) clk_user = ~clk_user;
+always #(period_pcie) clk_pcie = ~clk_pcie;
+always #(period_user_high) clk_user_high = ~clk_user_high;
+always #(period_esram_ref) clk_esram_ref = ~clk_esram_ref;
+
+`ifdef BRAM_CHECKPKT_BUF
+assign clk_emif = clk_pcie;
+assign emif_reset_n = !rst;
+`else
+initial clk_emif = 0;
+always #(period_emif) clk_emif = ~clk_emif;
+initial
+begin
+ emif_reset_n = 0;
+ #20 emif_reset_n = 1;
+end
+`endif
+
+// Read packet data from ROM
+initial begin : init_block
+ integer i;
+ for (i = lo; i <= hi; i = i + 1) begin
+ arr[i] = {((data_width + 1) / 2){2'b0}};
+ end
+ $readmemh(`PKT_FILE, arr, lo, hi);
+end
+
+assign l8_rx_startofpacket = arr[addr][524];
+assign l8_rx_endofpacket = arr[addr][520];
+assign l8_rx_empty = arr[addr][519:512];
+assign l8_rx_data = arr[addr][511:0];
+
+always @(posedge clk_txmac)
+begin
+ if (tx_cnt < 4'd10) begin
+ tx_cnt <= tx_cnt + 1'b1;
+ end
+ else begin
+ tx_cnt <= 0;
+ l8_tx_ready <= ~l8_tx_ready;
+ end
+end
+
+always @(posedge clk_rxmac) begin
+ cnt <= cnt + 1;
+ if (cnt == 1) begin
+ rst <= 1;
+ addr <= 0;
+ rate_cnt <= 0;
+ l8_rx_valid <= 0;
+ end
+ else if (cnt == 35) begin
+ rst <= 0;
+ end
+ // Give sufficient time for reset
+ else if (cnt == 20000) begin
+ l8_rx_valid <= 1;
+ end
+ else if (cnt >= 20001) begin
+ if (rate_cnt < 100 * (RATE * period_rx / (64 * 8))) begin
+ if (addr < hi) begin
+ addr <= addr + 1;
+ l8_rx_valid <= 1;
+ end
+ else if (!stop && stop_cnt == 0) begin
+ l8_rx_valid <= 0;
+ stop_cnt <= STOP_DELAY;
+ end
+ end
+ else begin
+ l8_rx_valid <= 0;
+ end
+
+ // Wrap-around rate counter
+ if (rate_cnt == 99) begin
+ rate_cnt <= 0;
+ end
+ else begin
+ rate_cnt <= rate_cnt + 1;
+ end
+ end
+
+ if (stop_cnt != 0) begin
+ stop_cnt <= stop_cnt - 1;
+ if (stop_cnt == 1) begin
+ stop <= 1;
+ $display("STOP READING!");
+ end
+ end
+end
+
+// Reassembly throughput profiling
+always @(posedge clk_rxmac) begin
+ if (rst) begin
+ time_in_num_cycles_end <= 0;
+ time_in_num_cycles_start <= 0;
+ end
+ else if (l8_rx_valid & l8_rx_endofpacket) begin
+ if (time_in_num_cycles_start == 0) begin
+ time_in_num_cycles_start <= cnt;
+ end
+ time_in_num_cycles_end <= cnt;
+ end
+end
+always @(posedge clk_txmac) begin
+ if (rst) begin
+ time_out_num_cycles_start <= 0;
+ end
+ else if (l8_tx_valid & l8_tx_endofpacket) begin
+ if (time_out_num_cycles_start == 0) begin
+ time_out_num_cycles_start <= cnt;
+ end
+ end
+end
+
+// FAKE pdumeta_cpu
+always @(posedge clk_pcie) begin
+ if (rst) begin
+ pdumeta_wait_cnt <= 0;
+ sim_pdumeta_cpu_data <= 0;
+ sim_pdumeta_cpu_valid <= 0;
+ pdumeta_state <= PDUMETA_IDLE;
+ end
+ else begin
+ case (pdumeta_state)
+ PDUMETA_IDLE: begin
+ pdumeta_wait_cnt <= 0;
+ sim_pdumeta_cpu_valid <= 0;
+ if (ddr_wr_req_valid) begin
+ pdumeta_state <= PDUMETA_WAIT;
+ sim_pdumeta_cpu_data.pdu_id <= ddr_wr_req_data[538:512] >> 5; // Addr
+ sim_pdumeta_cpu_data.action <= ACTION_NOMATCH;
+ sim_pdumeta_cpu_data.flits <= 24;
+ sim_pdumeta_cpu_data.pdu_size <= 1496;
+ end
+ end
+ PDUMETA_WAIT: begin
+ pdumeta_wait_cnt <= pdumeta_wait_cnt + 1;
+ if (pdumeta_wait_cnt == 25) begin
+ sim_pdumeta_cpu_valid <= 1;
+ pdumeta_state <= PDUMETA_IDLE;
+ end
+ end
+ endcase
+ end
+end
+
+// Read and display pkt/flow cnts
+always @(posedge clk_status) begin
+ if (rst) begin
+ s_addr <= 0;
+ s_read <= 0;
+ reg_cnt <= 0;
+ s_write <= 0;
+ s_writedata <= 0;
+ reg_recv_cnt <= 0;
+ conf_state <= ENABLE_PCIE;
+
+ done_profiling <= 0;
+ total_in_reassembler_bytes <= 0;
+ total_out_reassembler_bytes <= 0;
+ end
+ else begin
+ case (conf_state)
+ DISABLE_PCIE: begin
+ conf_state <= WRITE_CTRL;
+ s_write <= 1;
+ s_addr <= 30'h2A00_000F;
+ s_writedata <= 1;
+ end
+ ENABLE_PCIE: begin
+ conf_state <= WRITE_CTRL;
+ s_write <= 1;
+ s_addr <= 30'h2A00_000F;
+ s_writedata <= 0;
+ end
+ WRITE_CTRL: begin
+ conf_state <= IDLE;
+ s_write <= 1;
+ s_addr <= 30'h2200_0037;
+ // Write non-fast-pattern forward threshold
+ s_writedata <= nf_forward_threshold;
+ end
+ IDLE: begin
+ s_write <= 0;
+ if (stop) begin
+ conf_state <= READ_TOP_REG;
+ end
+ end
+ READ_TOP_REG: begin
+ if (reg_cnt < NUM_REG) begin
+ s_read <= 1;
+ s_addr <= {TOP_REG, 25'b0} + reg_cnt;
+ reg_cnt <= reg_cnt + 1;
+ end
+ else begin
+ s_read <= 0;
+ end
+
+ if (reg_recv_cnt >= NUM_REG) begin
+ reg_cnt <= 0;
+ reg_recv_cnt <= 0;
+ conf_state <= DONE;
+ $display("------------");
+ end
+ else if (top_readdata_valid) begin
+ reg_recv_cnt <= reg_recv_cnt + 1;
+ case (reg_recv_cnt)
+ REG_PARSER_OUT_BYTES_L : total_in_reassembler_bytes[31:0] <= top_readdata;
+ REG_PARSER_OUT_BYTES_H : total_in_reassembler_bytes[63:32] <= top_readdata;
+ REG_DM_OUT_BYTES_L : total_out_reassembler_bytes[31:0] <= top_readdata;
+ REG_DM_OUT_BYTES_H : total_out_reassembler_bytes[63:32] <= top_readdata;
+ default : ;
+ endcase
+ end
+ end
+ DONE: begin
+ done_profiling <= 1;
+ end
+ endcase
+ end
+end
+
+if ((`TEST_CASE == "TEST_FCFS_BASIC_INORDER") ||
+ (`TEST_CASE == "TEST_FCFS_BASIC_OOO")) begin
+// TEST_FCFS_BASIC_INORDER: Test FCFS scheduling with in-order flows
+// TEST_FCFS_BASIC_OOO: Test FCFS scheduling with innocent OOO flows
+if (SCHEDULER_REASSEMBLY_POLICY != "FCFS") begin
+ $error("FAIL %s: Incorrect scheduling policy %s (expected FCFS)",
+ `TEST_CASE, SCHEDULER_REASSEMBLY_POLICY);
+end
+
+always @(posedge clk_status) begin
+ if (done_profiling) begin
+ if (total_in_reassembler_bytes == 0) begin
+ $display("FAIL %s: ", `TEST_CASE,
+ "Reassembler input byte count is 0");
+ end
+ else if (time_out_num_cycles_start == 0) begin
+ $display("FAIL %s: ", `TEST_CASE,
+ "Valid was never asserted on Ethernet TX");
+ end
+ else if (total_out_reassembler_bytes != total_in_reassembler_bytes) begin
+ $display("FAIL %s: ", `TEST_CASE,
+ "Reassembler I/O byte count does not match");
+ end
+ else begin
+ $display("PASS %s", `TEST_CASE);
+ end
+ $finish;
+ end
+end
+end
+
+else if ((`TEST_CASE == "TEST_WSJF_BASIC_INORDER") ||
+ (`TEST_CASE == "TEST_WSJF_BASIC_OOO")) begin
+// TEST_WSJF_BASIC_INORDER: Test WSJF scheduling with in-order flows
+// TEST_WSJF_BASIC_OOO: Test WSJF scheduling with innocent OOO flows
+if (SCHEDULER_REASSEMBLY_POLICY != "WSJF") begin
+ $error("FAIL %s: Incorrect scheduling policy %s (expected WSJF)",
+ `TEST_CASE, SCHEDULER_REASSEMBLY_POLICY);
+end
+
+always @(posedge clk_status) begin
+ if (done_profiling) begin
+ if (total_in_reassembler_bytes == 0) begin
+ $display("FAIL %s: ", `TEST_CASE,
+ "Reassembler input byte count is 0");
+ end
+ else if (time_out_num_cycles_start == 0) begin
+ $display("FAIL %s: ", `TEST_CASE,
+ "Valid was never asserted on Ethernet TX");
+ end
+ else if (total_out_reassembler_bytes != total_in_reassembler_bytes) begin
+ $display("FAIL %s: ", `TEST_CASE,
+ "Reassembler I/O byte count does not match");
+ end
+ else begin
+ $display("PASS %s", `TEST_CASE);
+ end
+ $finish;
+ end
+end
+end
+
+else if (`TEST_CASE == "TEST_WSJF_ACA_MITIGATION") begin
+// Test WSJF scheduling with adversarial OOO flows. The parameters
+// (e.g., expected goodput) might need some tuning, but in general,
+// we expect WSJF (the scheduling policy underlying SurgeProtector)
+// to keep the goodput relatively constant even under attack.
+if (SCHEDULER_REASSEMBLY_POLICY != "WSJF") begin
+ $error("FAIL %s: Incorrect scheduling policy %s (expected WSJF)",
+ `TEST_CASE, SCHEDULER_REASSEMBLY_POLICY);
+end
+
+// Note: Since we have no way of differentiating between innocent and
+// adversarial traffic (without breaking abstraction or exposing some
+// internal state, both of which are hacky), we don't try and measure
+// goodput at all. Instead, we use the _total_ bandwidth as proxy for
+// goodput, and simply ensure that the delta between the input/output
+// bandwidths is not too large.
+logic [63:0] total_output_bandwidth_mbps;
+logic [63:0] total_input_bandwidth_mbps;
+logic [63:0] max_bandwidth_drop_mbps;
+logic [63:0] bandwidth_drop_mbps;
+logic is_bandwith_drop_pos;
+
+// Tolerate a bandwidth drop of (2 * RATE_ATTACK) on the output,
+// corresponding to a goodput drop of at most RATE_ATTACK Gbps.
+assign max_bandwidth_drop_mbps = (1000 * (2 * `RATE_ATTACK));
+assign time_out_num_cycles_end = cnt;
+
+always @(*) begin
+ total_output_bandwidth_mbps = 0;
+ total_input_bandwidth_mbps = 0;
+ is_bandwith_drop_pos = 0;
+ bandwidth_drop_mbps = 0;
+
+ if (done_profiling &&
+ (time_in_num_cycles_start != time_in_num_cycles_end) &&
+ (time_out_num_cycles_start != time_out_num_cycles_end)) begin
+ total_input_bandwidth_mbps = (
+ (1000 * (total_in_reassembler_bytes * 8)) /
+ ((time_in_num_cycles_end - time_in_num_cycles_start) * period_rx)
+ );
+ total_output_bandwidth_mbps = (
+ (1000 * (total_out_reassembler_bytes * 8)) /
+ ((time_out_num_cycles_end - time_out_num_cycles_start) * period_tx)
+ );
+ is_bandwith_drop_pos = (total_input_bandwidth_mbps >=
+ total_output_bandwidth_mbps);
+
+ if (is_bandwith_drop_pos) begin
+ bandwidth_drop_mbps = (total_input_bandwidth_mbps -
+ total_output_bandwidth_mbps);
+ end
+ end
+end
+
+always @(posedge clk_status) begin
+ if (done_profiling) begin
+ $display("Input bandwidth: %0d Mbps, output bandwidth: %0d Mbps",
+ total_input_bandwidth_mbps, total_output_bandwidth_mbps);
+
+ if (total_in_reassembler_bytes == 0) begin
+ $display("FAIL %s: ", `TEST_CASE,
+ "Reassembler input byte count is 0");
+ end
+ else if (time_out_num_cycles_start == 0) begin
+ $display("FAIL %s: ", `TEST_CASE,
+ "Valid was never asserted on Ethernet TX");
+ end
+ else if (is_bandwith_drop_pos) begin
+ // Ensure that bandwith drop is in an acceptable range
+ if (bandwidth_drop_mbps >= max_bandwidth_drop_mbps) begin
+ $display("FAIL %s: ", `TEST_CASE,
+ "Loss in goodput (%0d Mbps) exceeds ", bandwidth_drop_mbps,
+ "maximum allowable value under WSJF (%0d Mbps)", max_bandwidth_drop_mbps);
+ end
+ else begin
+ $display("PASS %s", `TEST_CASE);
+ end
+ end
+ else begin
+ $display("FAIL %s: ", `TEST_CASE,
+ "Loss in goodput should be a positive quantity");
+ end
+ $finish;
+ end
+end
+end
+
+else begin
+ $error("FAIL: Unknown test %s", `TEST_CASE);
+end
+`endif
+
+assign ddr_rd_resp_valid_int = ddr_rd_resp_valid & !ddr_rd_resp_almost_full;
+stats stats(
+ .arst (rst),
+
+ .clk_tx (clk_txmac),
+ .tx_ready (l8_tx_ready),
+ .tx_valid (l8_tx_valid),
+ .tx_data (l8_tx_data),
+ .tx_sop (l8_tx_startofpacket),
+ .tx_eop (l8_tx_endofpacket),
+ .tx_empty (l8_tx_empty),
+
+ .clk_rx (clk_rxmac),
+ .rx_sop (l8_rx_startofpacket),
+ .rx_eop (l8_rx_endofpacket),
+ .rx_empty (l8_rx_empty),
+ .rx_data (l8_rx_data),
+ .rx_valid (l8_rx_valid),
+
+ .rx_ready (stats_rx_ready),
+ .o_rx_sop (stats_rx_startofpacket),
+ .o_rx_eop (stats_rx_endofpacket),
+ .o_rx_empty (stats_rx_empty),
+ .o_rx_data (stats_rx_data),
+ .o_rx_valid (stats_rx_valid),
+
+ .clk_status (clk_status),
+ .status_addr (s_addr),
+ .status_read (s_read),
+ .status_write (s_write),
+ .status_writedata (s_writedata),
+ .status_readdata (s_readdata),
+ .status_readdata_valid (s_readdata_valid)
+);
+
+dc_fifo_wrapper input_fifo (
+ .in_clk (clk_rxmac),
+ .in_reset_n (!rst),
+ .out_clk (clk_user),
+ .out_reset_n (!rst),
+ .in_data (stats_rx_data),
+ .in_valid (stats_rx_valid),
+ .in_ready (stats_rx_ready),
+ .in_startofpacket (stats_rx_startofpacket),
+ .in_endofpacket (stats_rx_endofpacket),
+ .in_empty (stats_rx_empty),
+ .out_data (top_in_data),
+ .out_valid (top_in_valid),
+ .out_ready (1'b1),
+ .out_startofpacket (top_in_startofpacket),
+ .out_endofpacket (top_in_endofpacket),
+ .out_empty (top_in_empty)
+);
+
+
+top partition_1 (
+ // Clk & rst
+ .clk (clk_user),
+ .rst (rst),
+ .clk_high (clk_user_high),
+ .rst_high (rst),
+ .clk_pcie (clk_pcie),
+ .rst_pcie (rst),
+ // Ethernet in & out data
+ .in_data (top_in_data),
+ .in_valid (top_in_valid),
+ .in_sop (top_in_startofpacket),
+ .in_eop (top_in_endofpacket),
+ .in_empty (top_in_empty),
+ .out_data (top_out_data),
+ .out_valid (top_out_valid),
+ .out_sop (top_out_startofpacket),
+ .out_eop (top_out_endofpacket),
+ .out_empty (top_out_empty),
+ .out_ready (top_out_ready),
+ // eSRAM/BRAM
+ .pkt_buf_wren (pkt_buf_wren),
+ .pkt_buf_wraddress (pkt_buf_wraddress),
+ .pkt_buf_wrdata (pkt_buf_wrdata),
+ .pkt_buf_rden (pkt_buf_rden),
+ .pkt_buf_rdaddress (pkt_buf_rdaddress),
+ .pkt_buf_rd_valid (pkt_buf_rd_valid),
+ .pkt_buf_rddata (pkt_buf_rddata),
+ // PCIe
+ .pcie_rb_wr_data (pcie_rb_wr_data),
+ .pcie_rb_wr_addr (pcie_rb_wr_addr),
+ .pcie_rb_wr_en (pcie_rb_wr_en),
+ .pcie_rb_wr_base_addr (pcie_rb_wr_base_addr),
+ .pcie_rb_almost_full (pcie_rb_almost_full),
+ .pcie_rb_update_valid (pcie_rb_update_valid),
+ .pcie_rb_update_size (pcie_rb_update_size),
+ .disable_pcie (1'b0),
+`ifdef SIM
+ .pdumeta_cpu_data (sim_pdumeta_cpu_data),
+ .pdumeta_cpu_valid (sim_pdumeta_cpu_valid),
+`else
+ .pdumeta_cpu_data (pdumeta_cpu_data),
+ .pdumeta_cpu_valid (pdumeta_cpu_valid),
+`endif
+ .pdumeta_cnt (pdumeta_cnt),
+ // DRAM
+ .ddr_wr_req_data (ddr_wr_req_data),
+ .ddr_wr_req_valid (ddr_wr_req_valid),
+ .ddr_wr_req_almost_full (ddr_wr_req_almost_full),
+ .ddr_rd_req_data (ddr_rd_req_data),
+ .ddr_rd_req_valid (ddr_rd_req_valid),
+ .ddr_rd_req_almost_full (ddr_rd_req_almost_full),
+ .ddr_rd_resp_data (ddr_rd_resp_data),
+ .ddr_rd_resp_valid (ddr_rd_resp_valid),
+ .ddr_rd_resp_almost_full (ddr_rd_resp_almost_full),
+ // JTAG
+ .clk_status (clk_status),
+ .status_addr (s_addr),
+ .status_read (s_read),
+ .status_write (s_write),
+ .status_writedata (s_writedata),
+ .status_readdata (top_readdata),
+ .status_readdata_valid (top_readdata_valid)
+);
+
+dc_fifo_wrapper_infill out_fifo0 (
+ .in_clk (clk_user),
+ .in_reset_n (!rst),
+ .out_clk (clk_txmac),
+ .out_reset_n (!rst),
+ .in_csr_address (0),
+ .in_csr_read (1'b1),
+ .in_csr_write (1'b0),
+ .in_csr_readdata (),
+ .in_csr_writedata (),
+ .in_data (top_out_data),
+ .in_valid (top_out_valid),
+ .in_ready (top_out_ready),
+ .in_startofpacket (top_out_startofpacket),
+ .in_endofpacket (top_out_endofpacket),
+ .in_empty (top_out_empty),
+ .out_data (l8_tx_data),
+ .out_valid (l8_tx_valid),
+ .out_ready (l8_tx_ready),
+ .out_startofpacket (l8_tx_startofpacket),
+ .out_endofpacket (l8_tx_endofpacket),
+ .out_empty (l8_tx_empty)
+);
+
+pcie_top pcie (
+ .refclk_clk (1'b0), // .refclk.clk
+ .pcie_rstn_npor (1'b1), // .pcie_rstn.npor
+ .pcie_rstn_pin_perst (1'b0), // .pin_perst
+ .xcvr_rx_in0 (1'b0), // .xcvr.rx_in0
+ .xcvr_rx_in1 (1'b0), // .rx_in1
+ .xcvr_rx_in2 (1'b0), // .rx_in2
+ .xcvr_rx_in3 (1'b0), // .rx_in3
+ .xcvr_rx_in4 (1'b0), // .rx_in4
+ .xcvr_rx_in5 (1'b0), // .rx_in5
+ .xcvr_rx_in6 (1'b0), // .rx_in6
+ .xcvr_rx_in7 (1'b0), // .rx_in7
+ .xcvr_rx_in8 (1'b0), // .rx_in8
+ .xcvr_rx_in9 (1'b0), // .rx_in9
+ .xcvr_rx_in10 (1'b0), // .rx_in10
+ .xcvr_rx_in11 (1'b0), // .rx_in11
+ .xcvr_rx_in12 (1'b0), // .rx_in12
+ .xcvr_rx_in13 (1'b0), // .rx_in13
+ .xcvr_rx_in14 (1'b0), // .rx_in14
+ .xcvr_rx_in15 (1'b0), // .rx_in15
+ .xcvr_tx_out0 (), // .tx_out0
+ .xcvr_tx_out1 (), // .tx_out1
+ .xcvr_tx_out2 (), // .tx_out2
+ .xcvr_tx_out3 (), // .tx_out3
+ .xcvr_tx_out4 (), // .tx_out4
+ .xcvr_tx_out5 (), // .tx_out5
+ .xcvr_tx_out6 (), // .tx_out6
+ .xcvr_tx_out7 (), // .tx_out7
+ .xcvr_tx_out8 (), // .tx_out8
+ .xcvr_tx_out9 (), // .tx_out9
+ .xcvr_tx_out10 (), // .tx_out10
+ .xcvr_tx_out11 (), // .tx_out11
+ .xcvr_tx_out12 (), // .tx_out12
+ .xcvr_tx_out13 (), // .tx_out13
+ .xcvr_tx_out14 (), // .tx_out14
+ .xcvr_tx_out15 (), // .tx_out15
+ .pcie_clk (clk_pcie),
+ .pcie_reset_n (!rst),
+ .pcie_rb_wr_data (pcie_rb_wr_data),
+ .pcie_rb_wr_addr (pcie_rb_wr_addr),
+ .pcie_rb_wr_en (pcie_rb_wr_en),
+ .pcie_rb_wr_base_addr (pcie_rb_wr_base_addr),
+ .pcie_rb_almost_full (pcie_rb_almost_full),
+ .pcie_rb_update_valid (pcie_rb_update_valid),
+ .pcie_rb_update_size (pcie_rb_update_size),
+ .disable_pcie (disable_pcie),
+ .pdumeta_cpu_data (pdumeta_cpu_data),
+ .pdumeta_cpu_valid (pdumeta_cpu_valid),
+ .pdumeta_cnt (pdumeta_cnt),
+ .clk_status (clk_status),
+ .status_addr (s_addr),
+ .status_read (s_read),
+ .status_write (s_write),
+ .status_writedata (s_writedata),
+ .status_readdata (pcie_readdata),
+ .status_readdata_valid (pcie_readdata_valid)
+);
+
+dram_wrapper dram_check_pkt_buffer(
+ .emif_reset_n (emif_reset_n),
+ .emif_clk (clk_emif),
+ .clk (clk_pcie),
+ .rst (rst),
+ .ddr_wr_req_data (ddr_wr_req_data),
+ .ddr_wr_req_valid (ddr_wr_req_valid),
+ .ddr_wr_req_almost_full (ddr_wr_req_almost_full),
+ .ddr_rd_req_data (ddr_rd_req_data),
+ .ddr_rd_req_valid (ddr_rd_req_valid),
+ .ddr_rd_req_almost_full (ddr_rd_req_almost_full),
+ .ddr_rd_resp_data (ddr_rd_resp_data),
+ .ddr_rd_resp_valid (ddr_rd_resp_valid),
+ .ddr_rd_resp_ready (!ddr_rd_resp_almost_full),
+ //JTAG
+ .clk_status (clk_status),
+ .status_addr (s_addr),
+ .status_read (s_read),
+ .status_write (s_write),
+ .status_writedata (s_writedata),
+ .status_readdata (dram_readdata),
+ .status_readdata_valid (dram_readdata_valid)
+);
+
+esram_wrapper esram_pkt_buffer(
+ .clk_esram_ref (clk_esram_ref), //100 MHz
+ .esram_pll_lock (esram_pll_lock),
+ .clk_esram (clk_user), // 200 MHz
+ .wren (pkt_buf_wren),
+ .wraddress (pkt_buf_wraddress),
+ .wrdata (pkt_buf_wrdata),
+ .rden (pkt_buf_rden),
+ .rdaddress (pkt_buf_rdaddress),
+ .rd_valid (pkt_buf_rd_valid),
+ .rddata (pkt_buf_rddata)
+);
+
+endmodule
diff --git a/pigasus/hardware/scripts/ip_gen.tcl b/pigasus/hardware/scripts/ip_gen.tcl
index 88dfcb9..3460a87 100644
--- a/pigasus/hardware/scripts/ip_gen.tcl
+++ b/pigasus/hardware/scripts/ip_gen.tcl
@@ -1426,6 +1426,70 @@ proc do_create_ip_gen {} {
set_instantiation_interface_assignment_value clock ui.blockdiagram.direction {input}
add_instantiation_interface_port clock clock clk 1 STD_LOGIC Input
save_instantiation
+ add_component sc_fifo ip/ip_gen/sc_fifo.ip fifo sc_fifo 19.1
+ load_component sc_fifo
+ set_component_parameter_value GUI_AlmostEmpty {0}
+ set_component_parameter_value GUI_AlmostEmptyThr {1}
+ set_component_parameter_value GUI_AlmostFull {0}
+ set_component_parameter_value GUI_AlmostFullThr {1}
+ set_component_parameter_value GUI_CLOCKS_ARE_SYNCHRONIZED {0}
+ set_component_parameter_value GUI_Clock {0}
+ set_component_parameter_value GUI_DISABLE_DCFIFO_EMBEDDED_TIMING_CONSTRAINT {1}
+ set_component_parameter_value GUI_Depth {2048}
+ set_component_parameter_value GUI_ENABLE_ECC {0}
+ set_component_parameter_value GUI_Empty {1}
+ set_component_parameter_value GUI_Full {1}
+ set_component_parameter_value GUI_LE_BasedFIFO {0}
+ set_component_parameter_value GUI_LegacyRREQ {1}
+ set_component_parameter_value GUI_MAX_DEPTH {Auto}
+ set_component_parameter_value GUI_MAX_DEPTH_BY_9 {0}
+ set_component_parameter_value GUI_OVERFLOW_CHECKING {0}
+ set_component_parameter_value GUI_Optimize {0}
+ set_component_parameter_value GUI_Optimize_max {0}
+ set_component_parameter_value GUI_RAM_BLOCK_TYPE {Auto}
+ set_component_parameter_value GUI_TESTBENCH {0}
+ set_component_parameter_value GUI_UNDERFLOW_CHECKING {0}
+ set_component_parameter_value GUI_UsedW {1}
+ set_component_parameter_value GUI_Width {8}
+ set_component_parameter_value GUI_dc_aclr {0}
+ set_component_parameter_value GUI_delaypipe {4}
+ set_component_parameter_value GUI_diff_widths {0}
+ set_component_parameter_value GUI_msb_usedw {0}
+ set_component_parameter_value GUI_output_width {8}
+ set_component_parameter_value GUI_read_aclr_synch {0}
+ set_component_parameter_value GUI_rsEmpty {1}
+ set_component_parameter_value GUI_rsFull {0}
+ set_component_parameter_value GUI_rsUsedW {0}
+ set_component_parameter_value GUI_sc_aclr {0}
+ set_component_parameter_value GUI_sc_sclr {0}
+ set_component_parameter_value GUI_synStage {3}
+ set_component_parameter_value GUI_write_aclr_synch {0}
+ set_component_parameter_value GUI_wsEmpty {0}
+ set_component_parameter_value GUI_wsFull {1}
+ set_component_parameter_value GUI_wsUsedW {0}
+ set_component_project_property HIDE_FROM_IP_CATALOG {false}
+ save_component
+ load_instantiation sc_fifo
+ remove_instantiation_interfaces_and_ports
+ add_instantiation_interface fifo_input conduit INPUT
+ set_instantiation_interface_parameter_value fifo_input associatedClock {}
+ set_instantiation_interface_parameter_value fifo_input associatedReset {}
+ set_instantiation_interface_parameter_value fifo_input prSafe {false}
+ set_instantiation_interface_assignment_value fifo_input ui.blockdiagram.direction {input}
+ add_instantiation_interface_port fifo_input data datain 8 STD_LOGIC_VECTOR Input
+ add_instantiation_interface_port fifo_input wrreq wrreq 1 STD_LOGIC Input
+ add_instantiation_interface_port fifo_input rdreq rdreq 1 STD_LOGIC Input
+ add_instantiation_interface_port fifo_input clock clk 1 STD_LOGIC Input
+ add_instantiation_interface fifo_output conduit INPUT
+ set_instantiation_interface_parameter_value fifo_output associatedClock {}
+ set_instantiation_interface_parameter_value fifo_output associatedReset {}
+ set_instantiation_interface_parameter_value fifo_output prSafe {false}
+ set_instantiation_interface_assignment_value fifo_output ui.blockdiagram.direction {output}
+ add_instantiation_interface_port fifo_output q dataout 8 STD_LOGIC_VECTOR Output
+ add_instantiation_interface_port fifo_output usedw usedw 11 STD_LOGIC_VECTOR Output
+ add_instantiation_interface_port fifo_output full full 1 STD_LOGIC Output
+ add_instantiation_interface_port fifo_output empty empty 1 STD_LOGIC Output
+ save_instantiation
add_component singledsp ip/ip_gen/singledsp.ip altera_s10_native_fixed_point_dsp singledsp 19.1.0
load_component singledsp
set_component_parameter_value accum_2nd_pipeline_clock {0}
@@ -2427,6 +2491,9 @@ proc do_create_ip_gen {} {
+
+
+
diff --git a/pigasus/hardware/scripts/manipulate.py b/pigasus/hardware/scripts/manipulate.py
index 60499b3..30d9d3b 100755
--- a/pigasus/hardware/scripts/manipulate.py
+++ b/pigasus/hardware/scripts/manipulate.py
@@ -1,5 +1,6 @@
#! /usr/bin/env python3
+from math import ceil, log2
import sys
import os
from os import listdir
@@ -190,7 +191,12 @@ def change_ram(fname,new_name,dwidth,awidth,mem_size):
new_file.append(new_string)
new_string = " parameter DEPTH = "+str(mem_size)+";\n"
new_file.append(new_string)
+ new_string = " parameter IS_OUTDATA_REG = 1;\n\n"
+ new_file.append(new_string)
+ new_string = (" localparam OUTDATA_REG = (\n" +
+ " (IS_OUTDATA_REG == 0) ? \"UNREGISTERED\" : \"CLOCK0\");\n\n")
+ new_file.append(new_string)
if str(dwidth-1) in line and ("q" in line or "data" in line or "sub_wire" in line):
#print (line)
@@ -212,6 +218,9 @@ def change_ram(fname,new_name,dwidth,awidth,mem_size):
#print ("mem_size = "+str(mem_size))
new_line = line.replace(str(mem_size),"DEPTH")
new_file.append(new_line)
+ elif "outdata_reg_a" in line or "outdata_reg_b" in line:
+ new_line = line.replace("\"CLOCK0\"", "OUTDATA_REG")
+ new_file.append(new_line)
else:
new_file.append(line)
f.close()
@@ -282,6 +291,71 @@ def change_diff_width_ram(fname,new_name,w_dwidth,w_awidth,w_depth,r_dwidth,r_aw
for line in new_file:
f.write("%s" % line)
+def change_scfifo(fname, dwidth, depth):
+ new_file = []
+ first_input = 1
+ f = open(fname,'r')
+
+ log_depth = ceil(log2(depth))
+ for line in f:
+ #add paramter in the beginning.
+ if "input" in line and first_input:
+ first_input = 0
+ new_string = " parameter DWIDTH = " + str(dwidth)+";\n"
+ new_file.append(new_string)
+ new_string = " parameter DEPTH = " + str(depth)+";\n"
+ new_file.append(new_string)
+ new_string = " parameter IS_SHOWAHEAD = 0;\n"
+ new_file.append(new_string)
+ new_string = " parameter IS_OUTDATA_REG = 0;\n\n"
+ new_file.append(new_string)
+
+ new_string = " localparam LOG_DEPTH = $clog2(DEPTH);\n"
+ new_file.append(new_string)
+ new_string = (" localparam LPM_SHOWAHEAD = (\n" +
+ " (IS_SHOWAHEAD == 0) ? \"OFF\" : \"ON\");\n\n")
+ new_file.append(new_string)
+ new_string = (" localparam ADD_RAM_OUTPUT_REGISTER = (\n" +
+ " (IS_OUTDATA_REG == 0) ? \"OFF\" : \"ON\");\n\n")
+ new_file.append(new_string)
+
+ if str(dwidth-1) in line and ("q" in line or "data" in line or "sub_wire" in line):
+ #print (line)
+ new_line = line.replace(str(dwidth-1),"DWIDTH-1")
+ if str(dwidth) in line:
+ new_line = new_line.replace(str(dwidth)+"'h","")
+ new_file.append(new_line)
+ elif str(dwidth) in line and (("data" in line) or ("sub_wire" in line)):
+ #print (line)
+ new_line = line.replace(str(dwidth),"DWIDTH")
+ new_file.append(new_line)
+ elif str(log_depth-1) in line and (("usedw" in line) or ("sub_wire" in line)):
+ #print ("mem_size = "+str(mem_size))
+ new_line = line.replace(str(log_depth-1),"LOG_DEPTH-1")
+ new_file.append(new_line)
+ elif "add_ram_output_register" in line:
+ new_line = line.replace("\"OFF\"", "ADD_RAM_OUTPUT_REGISTER")
+ new_file.append(new_line)
+ elif "lpm_numwords" in line:
+ new_line = line.replace(str(depth), "DEPTH")
+ new_file.append(new_line)
+ elif "lpm_showahead" in line:
+ new_line = line.replace("\"OFF\"", "LPM_SHOWAHEAD")
+ new_file.append(new_line)
+ elif "lpm_width " in line:
+ new_line = line.replace(str(dwidth), "DWIDTH")
+ new_file.append(new_line)
+ elif "lpm_widthu" in line:
+ new_line = line.replace(str(log_depth), "LOG_DEPTH")
+ new_file.append(new_line)
+ else:
+ new_file.append(line)
+ f.close()
+
+ with open(fname, 'w') as f:
+ for line in new_file:
+ f.write("%s" % line)
+
def change_multiplexer(fname,new_name):
new_file = []
first_input = 1
@@ -367,6 +441,7 @@ def copy_file(ip_path,ip,dest_dir,keyword):
ip_list.append("dc_fifo_core_infill_mlab")
ip_list.append("dc_fifo_wrapper_mlab")
ip_list.append("dc_fifo_core_mlab")
+ip_list.append("sc_fifo")
#rom
@@ -464,6 +539,23 @@ def copy_file(ip_path,ip,dest_dir,keyword):
os.rename(dst_file,new_dst_file)
change_module_name(new_dst_file,ip)
+ elif "sc_fifo" in ip:
+ file_path = ip_path + ip
+ dirs = [f for f in listdir(file_path) if isdir(join(file_path, f))]
+ for gen_dir in dirs:
+ if "fifo" in gen_dir:
+ sub_dir = gen_dir
+
+ file_path = ip_path + ip + "/" + sub_dir + "/synth/"
+ files = [f for f in listdir(file_path) if isfile(join(file_path, f))]
+
+ src_file = file_path + files[0]
+ dst_file = dest_dir + "/" + ip + ".v"
+ shutil.copy(src_file, dst_file)
+
+ change_module_name(dst_file, ip)
+ change_scfifo(dst_file, 8, 2048)
+
elif ("rom" in ip) or ("ram" in ip):
if "rom" in ip:
new_dst_file = copy_file(ip_path,ip,dest_dir,"rom")
diff --git a/pigasus/hardware/scripts/src/alt_ehipc2_hw.qsf b/pigasus/hardware/scripts/src/alt_ehipc2_hw.qsf
index 0c3dd0d..86ac580 100644
--- a/pigasus/hardware/scripts/src/alt_ehipc2_hw.qsf
+++ b/pigasus/hardware/scripts/src/alt_ehipc2_hw.qsf
@@ -429,6 +429,13 @@ set_global_assignment -name SYSTEMVERILOG_FILE src/reassembly/hash_func.sv
set_global_assignment -name SYSTEMVERILOG_FILE src/reassembly/flow_table_wrapper.sv
set_global_assignment -name SYSTEMVERILOG_FILE src/reassembly/flow_table.sv
set_global_assignment -name SYSTEMVERILOG_FILE src/reassembly/flow_reassembly.sv
+set_global_assignment -name SYSTEMVERILOG_FILE src/reassembly/scheduler_reassembly.sv
+set_global_assignment -name SYSTEMVERILOG_FILE src/reassembly/bounded_fcfs_queue.sv
+set_global_assignment -name SYSTEMVERILOG_FILE src/reassembly/surge_protector/bounded_wsjf_queue.sv
+set_global_assignment -name SYSTEMVERILOG_FILE src/reassembly/surge_protector/ffs.sv
+set_global_assignment -name SYSTEMVERILOG_FILE src/reassembly/surge_protector/heap_ops_pkg.sv
+set_global_assignment -name SYSTEMVERILOG_FILE src/reassembly/surge_protector/pipelined_heap.sv
+set_global_assignment -name SYSTEMVERILOG_FILE src/reassembly/surge_protector/pipelined_heap_wrapper.sv
set_global_assignment -name SYSTEMVERILOG_FILE src/port_group/rule_unit.sv
set_global_assignment -name SYSTEMVERILOG_FILE src/port_group/port_unit.sv
set_global_assignment -name SYSTEMVERILOG_FILE src/port_group/port_group.sv
@@ -549,6 +556,7 @@ set_global_assignment -name VERILOG_FILE src/common/fifo_core_infill_mlab.v
set_global_assignment -name VERILOG_FILE src/common/fifo_core_infill.v
set_global_assignment -name VERILOG_FILE src/common/fifo_core.v
set_global_assignment -name SYSTEMVERILOG_FILE src/common/dsp.sv
+set_global_assignment -name VERILOG_FILE src/common/sc_fifo.v
set_global_assignment -name VERILOG_FILE src/common/dc_fifo_wrapper_mlab.v
set_global_assignment -name VERILOG_FILE src/common/dc_fifo_wrapper_infill_mlab.v
set_global_assignment -name VERILOG_FILE src/common/dc_fifo_wrapper_infill.v
diff --git a/pigasus_multi.py b/pigasus_multi.py
index 864011c..83a969a 100644
--- a/pigasus_multi.py
+++ b/pigasus_multi.py
@@ -140,21 +140,33 @@ def struct_s():
`ifndef STRUCT_S
`define STRUCT_S
-//`define SIM
+// `define SIM
`define USE_BRAM
`define BRAM_CHECKPKT_BUF
`define NO_C2F
-//`define DISABLE_NF_BYPASS
+// `define DISABLE_NF_BYPASS
// `define NO_BP
+// `define ENABLE_SURGEPROTECTOR
+
+// Reassembler scheduling policy
+`ifdef ENABLE_SURGEPROTECTOR""")
+ SCHEDULER_REASSEMBLY_POLICY = Param("WSJF")
+ T("`else")
+ SCHEDULER_REASSEMBLY_POLICY = Param("FCFS")
+ T("""`endif
// Packet buffer
// STORE 1024 pkts, each pkt takes 32 * 512 bits = 2 KB.
// 32 * 1024 = 32768 entries.
-`ifdef USE_BRAM""")
+`ifdef USE_BRAM
+`ifdef PKT_NUM
+parameter PKT_NUM = `PKT_NUM;
+`else""")
PKT_NUM = Param(PKT_NUM_BRAM_V)
- T("`else")
+ T("""`endif
+`else""")
PKT_NUM = Param(PKT_NUM_V)
- T("`endif")
+ T("""`endif""")
IN_BUF_DEPTH = Param(IN_BUF_DEPTH_V)
DS_THRESH = Param(DS_THRESH_V)
@@ -188,7 +200,7 @@ def struct_s():
ETH_IP = Param(ETH_IP_V)
ETH_META = Param(ETH_META_V)
ETH_USR = Param(ETH_USR_V)
-
+
PROT_ETH = Param(PROT_ETH_V)
IP_V4 = Param(IP_V4_V)
PROT_TCP = Param(PROT_TCP_V)
@@ -254,7 +266,6 @@ def struct_s():
} flit_meta_t;""")
# Linked list entry
- LL_DWIDTH = Param(1 + 32 + 16 + 16 + PKT_AWIDTH + 1 + 56)
T("""
typedef struct packed {
logic valid; // Valid
@@ -266,18 +277,108 @@ def struct_s():
logic last; // Last
logic [55:0] last_7_bytes; // Last
} entry_t;""")
+ LL_DWIDTH = Param(1 + 32 + 16 + LL_AWIDTH + PKT_AWIDTH + 5 + 1 + 56)
# Tuple
- TUPLE_DWIDTH = Param(32 + 32 + 16 + 16)
T("""
typedef struct packed {
logic [31:0] sIP;
logic [31:0] dIP;
logic [15:0] sPort;
logic [15:0] dPort;
-} tuple_t;
+} tuple_t;""")
+ TUPLE_DWIDTH = Param(32 + 32 + 16 + 16)
+
+ T("""
+/**
+ * Reassembler service.
+ */
+// OOO flow IDs""")
+ MAX_NUM_OOO_FLOWS = Param(1024)
+ OOO_FLOW_ID_AWIDTH = Param(clog2(MAX_NUM_OOO_FLOWS))
+ T("""// Service Queue""")
+ HEAP_BITMAP_WIDTH = Param(32)
+ HEAP_MAX_NUM_ENTRIES = Param(MAX_NUM_OOO_FLOWS)
+ HEAP_NUM_PRIORITIES = Param(HEAP_BITMAP_WIDTH ** 2)
+ HEAP_PRIORITY_AWIDTH = Param(clog2(HEAP_NUM_PRIORITIES))
+ HEAP_LOG_MAX_NUM_ENTRIES = Param(clog2(HEAP_MAX_NUM_ENTRIES))
+ T("""// Scheduler""")
+ OOO_FLOW_LL_MAX_NUM_ENTRIES = Param(PKT_NUM / 2)
+ OOO_FLOW_LL_ENTRY_AWIDTH = Param(clog2(OOO_FLOW_LL_MAX_NUM_ENTRIES))
+ OOO_FLOW_LL_ENTRY_PTR_T_WIDTH = Param(OOO_FLOW_LL_ENTRY_AWIDTH + 1)
+
+ T("""
+typedef logic [OOO_FLOW_ID_AWIDTH-1:0] ooo_flow_id_t;
+typedef logic [HEAP_LOG_MAX_NUM_ENTRIES:0] heap_size_t;
+typedef logic [HEAP_PRIORITY_AWIDTH-1:0] heap_priority_t;
+typedef logic [OOO_FLOW_LL_ENTRY_PTR_T_WIDTH-1:0] ooo_flow_ll_entry_ptr_t;""")
+
+ # Scheduler token
+ T("""
+typedef struct packed {
+ tuple_t tuple;
+ ooo_flow_id_t ooo_flow_id;
+} scheduler_token_t;""")
+ SCHEDULER_TOKEN_T_WIDTH = Param(TUPLE_DWIDTH + OOO_FLOW_ID_AWIDTH)
+
+ T("""
+typedef struct packed {
+ ooo_flow_ll_entry_ptr_t head;
+ ooo_flow_ll_entry_ptr_t tail;
+} ooo_flow_list_t;""")
+ OOO_FLOW_LIST_T_WIDTH = Localparam(2 * OOO_FLOW_LL_ENTRY_PTR_T_WIDTH)
+
+ T("""
+typedef struct packed {
+ logic valid;
+ tuple_t tuple;
+ logic [31:0] seq;
+ logic ll_valid;
+ logic [LL_AWIDTH-1:0] pointer;
+ logic [LL_AWIDTH-1:0] ll_size;
+ logic [55:0] last_7_bytes;
+ logic [FT_AWIDTH-1:0] addr0;
+ logic [FT_AWIDTH-1:0] addr1;
+ logic [FT_AWIDTH-1:0] addr2;
+ logic [FT_AWIDTH-1:0] addr3;
+ ooo_flow_list_t ooo_flow_ll;
+} ooo_flow_fc_entry_t;""")
+ OOO_FLOW_FC_ENTRY_T_WIDTH = Localparam(1 + TUPLE_DWIDTH + 32 + 1 + LL_AWIDTH + LL_AWIDTH +
+ 56 + (4 * FT_AWIDTH) + OOO_FLOW_LIST_T_WIDTH)
+
+ T("""
+typedef struct packed {
+ tuple_t tuple;
+ logic is_delete;
+ logic [31:0] seq;
+ logic [FT_AWIDTH-1:0] addr0;
+ logic [FT_AWIDTH-1:0] addr1;
+ logic [FT_AWIDTH-1:0] addr2;
+ logic [FT_AWIDTH-1:0] addr3;
+ logic [PKT_AWIDTH-1:0] rel_pkt_cnt;
+} ft_update_t;""")
+ FT_UPDATE_T_WIDTH = Localparam(TUPLE_DWIDTH + 1 + 32 + (4 * FT_AWIDTH) + PKT_AWIDTH)
+
+ T("""
+typedef struct packed {
+ ooo_flow_id_t ooo_flow_id;
+ tuple_t tuple;
+ logic [FT_AWIDTH-1:0] addr0;
+ logic [FT_AWIDTH-1:0] addr1;
+ logic [FT_AWIDTH-1:0] addr2;
+ logic [FT_AWIDTH-1:0] addr3;
+} reassembly_gc_meta_t;""")
+ REASSEMBLY_GC_META_T_WIDTH = Localparam(OOO_FLOW_ID_AWIDTH + TUPLE_DWIDTH + (4 * FT_AWIDTH))
+ T("""
+typedef struct packed {
+ logic ll_valid;
+ logic [LL_AWIDTH-1:0] pointer;
+ reassembly_gc_meta_t meta;
+} reassembly_gc_req_t;""")
+ REASSEMBLY_GC_REQ_T_WIDTH = Localparam(1 + LL_AWIDTH + REASSEMBLY_GC_META_T_WIDTH)
+ T("""
typedef struct packed {
logic [31:0] c2f_kmem_high_1; // higher 32 bit of kernel memory, FPGA read only
logic [31:0] c2f_kmem_low_1; // lower 32 bit of kernel memory, FPGA read only
@@ -300,22 +401,23 @@ def struct_s():
logic [31:0] f2c_tail; // tail pointer, CPU read only
} pcie_block_t;""")
- FT_DWIDTH = Param(1 + TUPLE_DWIDTH + 32 + LL_AWIDTH + 1 + PKT_AWIDTH + 56 + (4 * FT_AWIDTH))
T("""
typedef struct packed {
logic valid;
tuple_t tuple;
logic [31:0] seq;
- logic [LL_AWIDTH-1:0] pointer;
- logic ll_valid;
logic [PKT_AWIDTH-1:0] slow_cnt;
logic [55:0] last_7_bytes;
logic [FT_AWIDTH-1:0] addr0;
logic [FT_AWIDTH-1:0] addr1;
logic [FT_AWIDTH-1:0] addr2;
logic [FT_AWIDTH-1:0] addr3;
-} fce_t; // Flow context entry
+ logic ooo_flow_id_valid;
+ logic [OOO_FLOW_ID_AWIDTH-1:0] ooo_flow_id;
+} fce_t; // Flow context entry""")
+ FT_DWIDTH = Param(1 + TUPLE_DWIDTH + 32 + PKT_AWIDTH + 56 + (4 * FT_AWIDTH) + 1 + OOO_FLOW_ID_AWIDTH)
+ T("""
typedef struct packed {
tuple_t tuple;
logic [FT_AWIDTH-1:0] addr0;
diff --git a/top_base.sv b/top_base.sv
index d014fc0..14f0112 100644
--- a/top_base.sv
+++ b/top_base.sv
@@ -11,6 +11,7 @@
logic [31:0] parser_meta_csr_readdata_r;
logic [31:0] stats_incomp_out_meta_r;
logic [31:0] stats_parser_out_meta_r;
+ logic [63:0] stats_parser_out_bytes_r;
logic [31:0] stats_ft_in_meta_r;
logic [31:0] stats_ft_out_meta_r;
logic [31:0] stats_emptylist_in_r;
@@ -22,6 +23,7 @@
logic [31:0] stats_dm_in_check_meta_r;
logic [31:0] stats_dm_in_ooo_meta_r;
logic [31:0] stats_dm_in_forward_ooo_meta_r;
+ logic [63:0] stats_dm_out_bytes_r;
logic [31:0] stats_nopayload_pkt_r;
logic [31:0] stats_dm_check_pkt_r;
logic [31:0] in_pkt_fill_level_dm2sm;
@@ -81,6 +83,7 @@
logic [31:0] out_pkt_status;
logic [31:0] incomp_out_meta_status;
logic [31:0] parser_out_meta_status;
+logic [63:0] parser_out_bytes_status;
logic [31:0] ft_in_meta_status;
logic [31:0] ft_out_meta_status;
logic [31:0] emptylist_in_status;
@@ -92,6 +95,7 @@ logic [31:0] dm_in_drop_meta_status;
logic [31:0] dm_in_check_meta_status;
logic [31:0] dm_in_ooo_meta_status;
logic [31:0] dm_in_forward_ooo_meta_status;
+logic [63:0] dm_out_bytes_status;
logic [31:0] nopayload_pkt_status;
logic [31:0] dm_check_pkt_status;
logic [31:0] sm_pkt_status;
@@ -168,6 +172,7 @@ logic [31:0] in_pkt_r1;
logic [31:0] out_pkt_r1;
logic [31:0] incomp_out_meta_r1;
logic [31:0] parser_out_meta_r1;
+logic [63:0] parser_out_bytes_r1;
logic [31:0] ft_in_meta_r1;
logic [31:0] ft_out_meta_r1;
logic [31:0] emptylist_in_r1;
@@ -179,6 +184,7 @@ logic [31:0] dm_in_drop_meta_r1;
logic [31:0] dm_in_check_meta_r1;
logic [31:0] dm_in_ooo_meta_r1;
logic [31:0] dm_in_forward_ooo_meta_r1;
+logic [63:0] dm_out_bytes_r1;
logic [31:0] nopayload_pkt_r1;
logic [31:0] dm_check_pkt_r1;
logic [31:0] sm_pkt_r1;
@@ -231,6 +237,9 @@ assign incomp_out_meta = stats_incomp_out_meta_r;
logic [31:0] parser_out_meta;
assign parser_out_meta = stats_parser_out_meta_r;
+logic [63:0] parser_out_bytes;
+assign parser_out_bytes = stats_parser_out_bytes_r;
+
logic [31:0] ft_in_meta;
assign ft_in_meta = stats_ft_in_meta_r;
@@ -264,6 +273,9 @@ assign dm_in_ooo_meta = stats_dm_in_ooo_meta_r;
logic [31:0] dm_in_forward_ooo_meta;
assign dm_in_forward_ooo_meta = stats_dm_in_forward_meta_r;
+logic [63:0] dm_out_bytes;
+assign dm_out_bytes = stats_dm_out_bytes_r;
+
logic [31:0] nopayload_pkt;
assign nopayload_pkt = stats_nopayload_pkt_r;
@@ -452,6 +464,8 @@ always @(posedge clk_status) begin
incomp_out_meta_status <= incomp_out_meta_r1;
parser_out_meta_r1 <= parser_out_meta;
parser_out_meta_status <= parser_out_meta_r1;
+ parser_out_bytes_r1 <= parser_out_bytes;
+ parser_out_bytes_status <= parser_out_bytes_r1;
ft_in_meta_r1 <= ft_in_meta;
ft_in_meta_status <= ft_in_meta_r1;
ft_out_meta_r1 <= ft_out_meta;
@@ -474,6 +488,8 @@ always @(posedge clk_status) begin
dm_in_ooo_meta_status <= dm_in_ooo_meta_r1;
dm_in_forward_ooo_meta_r1 <= dm_in_forward_ooo_meta;
dm_in_forward_ooo_meta_status <= dm_in_forward_ooo_meta_r1;
+ dm_out_bytes_r1 <= dm_out_bytes;
+ dm_out_bytes_status <= dm_out_bytes_r1;
nopayload_pkt_r1 <= nopayload_pkt;
nopayload_pkt_status <= nopayload_pkt_r1;
dm_check_pkt_r1 <= dm_check_pkt;
@@ -619,6 +635,10 @@ always @(posedge clk_status) begin
REG_MAX_NF2PDU : status_readdata <= max_nf2pdu_status;
REG_SM_BYPASS_AF : status_readdata <= sm_bypass_af_status;
REG_SM_CDC_AF : status_readdata <= sm_cdc_af_status;
+ REG_PARSER_OUT_BYTES_L : status_readdata <= parser_out_bytes_status[31:0];
+ REG_PARSER_OUT_BYTES_H : status_readdata <= parser_out_bytes_status[63:32];
+ REG_DM_OUT_BYTES_L : status_readdata <= dm_out_bytes_status[31:0];
+ REG_DM_OUT_BYTES_H : status_readdata <= dm_out_bytes_status[63:32];
default : status_readdata <= 32'hDEADBEEF;
endcase
end