Skip to content
Open

CTC #257

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
143 changes: 143 additions & 0 deletions src/main/scala/ctc/CTC.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
package testchipip.ctc

import chisel3._
import chisel3.util._
import chisel3.experimental.dataview._

import org.chipsalliance.cde.config.{Parameters, Field, Config}
import freechips.rocketchip.subsystem._
import freechips.rocketchip.diplomacy._
import freechips.rocketchip.tilelink._
import freechips.rocketchip.prci._
import freechips.rocketchip.util.ResetCatchAndSync
// import testchipip.soc.{SBUS}

import testchipip.serdes._
import testchipip.soc.{InwardAddressTranslator}


object CTC {
val INNER_WIDTH = 32
val INNER_WIDTH_BYTES = INNER_WIDTH / 8
val OUTER_WIDTH = 4
}

object CTCCommand {
val read_req = 0.U
val write_req = 1.U
val read_ack = 2.U
val write_ack = 3.U
}

case class CTCParams(
onchipAddr: BigInt = 0x100000000L, // addresses that get routed here from THIS chip
offchipAddr: BigInt = 0x0, // addresses that this ctc device can access on the OTHER chip
size: BigInt = ((1L << 10) - 1), // 1024 bytes
managerBus: Option[TLBusWrapperLocation] = Some(SBUS),
clientBus: Option[TLBusWrapperLocation] = Some(SBUS),
phyFreqMHz: Int = 100
)

case object CTCKey extends Field[Option[CTCParams]](None)

trait CanHavePeripheryCTC { this: BaseSubsystem =>
private val portName = "ctc"

val ctc_name = s"ctc"
val (ctc2tl, tl2ctc, ctc_io) = p(CTCKey) match {
case Some(params) => {

val phyParams = CreditedSourceSyncSerialPhyParams(
phitWidth = CTC.OUTER_WIDTH,
flitWidth = CTC.INNER_WIDTH,
freqMHz = params.phyFreqMHz,
flitBufferSz = 16
)

lazy val slave_bus = locateTLBusWrapper(params.managerBus.get)
lazy val master_bus = locateTLBusWrapper(params.clientBus.get)

val ctc_domain = LazyModule(new ClockSinkDomain(name=Some(s"CTC")))
ctc_domain.clockNode := slave_bus.fixedClockNode

require(slave_bus.dtsFrequency.isDefined,
s"Slave bus ${slave_bus.busName} must provide a frequency")
require(master_bus.dtsFrequency.isDefined,
s"Master bus ${master_bus.busName} must provide a frequency")
require(slave_bus.dtsFrequency == master_bus.dtsFrequency,
s"Mismatching slave freq ${slave_bus.dtsFrequency} != master freq ${master_bus.dtsFrequency}")


// slave
val ctc2tl = ctc_domain { LazyModule(new CTCToTileLink()(p)) }
// master
val tl2ctc = ctc_domain { LazyModule(new TileLinkToCTC(baseAddr=params.offchipAddr, size=params.size)(p)) }

val translator = ctc_domain {
LazyModule(InwardAddressTranslator(AddressSet(params.offchipAddr, params.size), Some(params.onchipAddr))(p))
}

slave_bus.coupleTo(portName) { translator(tl2ctc.node) := TLBuffer() := _ }
master_bus.coupleFrom(portName) { _ := TLBuffer() := ctc2tl.node }

// If we provide a clock, generate a clock domain for the outgoing clock
val serial_tl_clock_freqMHz = CreditedSourceSyncSerialPhyParams().freqMHz
val serial_tl_clock_node = ctc_domain { ClockSinkNode(Seq(ClockSinkParameters(take=Some(ClockParameters(serial_tl_clock_freqMHz))))) }
serial_tl_clock_node := ClockGroup()(p, ValName(s"${ctc_name}_clock")) := allClockGroupsNode

val phit_io = ctc_domain { InModuleBody {
val phit_io = IO(phyParams.genIO).suggestName(ctc_name)

// 3 clock domains -
// - ctc2tl's "Inner clock": synchronizes signals going to the digital logic
// - outgoing clock: synchronizes signals going out
// - incoming clock: synchronizes signals coming in
val outgoing_clock = serial_tl_clock_node.in.head._1.clock
val outgoing_reset = ResetCatchAndSync(outgoing_clock, ctc2tl.module.reset.asBool)
val incoming_clock = phit_io.clock_in
val incoming_reset = ResetCatchAndSync(incoming_clock, phit_io.reset_in.asBool)
phit_io.clock_out := outgoing_clock
phit_io.reset_out := outgoing_reset.asAsyncReset
val phy = Module(new CreditedSerialPhy(2, phyParams))
phy.io.incoming_clock := incoming_clock
phy.io.incoming_reset := incoming_reset
phy.io.outgoing_clock := outgoing_clock
phy.io.outgoing_reset := outgoing_reset
phy.io.inner_clock := ctc2tl.module.clock
phy.io.inner_reset := ctc2tl.module.reset
phy.io.inner_ser(0).in <> ctc2tl.module.io.flit.in
phy.io.inner_ser(0).out <> tl2ctc.module.io.flit.out
phy.io.inner_ser(1).in <> tl2ctc.module.io.flit.in
phy.io.inner_ser(1).out <> ctc2tl.module.io.flit.out

phy.io.outer_ser <> phit_io.viewAsSupertype(new ValidPhitIO(phyParams.phitWidth))
phit_io
}}

val outer_io = InModuleBody {
val outer_io = IO(phyParams.genIO).suggestName(ctc_name)
outer_io <> phit_io
outer_io
}

// val inner_debug_io = ctc_domain { InModuleBody {
// val inner_debug_io = IO(new SerdesDebugIO).suggestName(s"${ctc_name}_debug")
// inner_debug_io := ctc2tl.module.io.debug
// inner_debug_io
// }}
// val outer_debug_io = InModuleBody {
// val outer_debug_io = IO(new SerdesDebugIO).suggestName(s"${ctc_name}_debug")
// outer_debug_io := inner_debug_io
// outer_debug_io
// }

(ctc2tl, tl2ctc, Some(outer_io))
}
case None => (None, None, None)
}
}


class WithCTC(params: CTCParams = CTCParams()) extends Config((site, here, up) => {
case CTCKey => Some(params)
})
171 changes: 171 additions & 0 deletions src/main/scala/ctc/CTCToTileLink.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
package testchipip.ctc

import chisel3._
import chisel3.util._
import testchipip.serdes._
import freechips.rocketchip.diplomacy._
import freechips.rocketchip.tilelink._
import freechips.rocketchip.util._
import org.chipsalliance.cde.config.{Parameters, Field}

// a tl-client (master) device
// from outer: receives read and write requests in CTC
// to inner: sends read and write requests in TL
// from inner: receives read and write responses in TL
// to outer: sends read and write responses in CTC
class CTCToTileLink(sourceIds: Int = 1)(implicit p: Parameters) extends LazyModule {
val node = TLClientNode(Seq(TLMasterPortParameters.v1(Seq(TLClientParameters(
name = "ctc", sourceId = IdRange(0, sourceIds))))))

lazy val module = new CTCToTileLinkModule(this)
}

class CTCToTileLinkModule(outer: CTCToTileLink) extends LazyModuleImp(outer) {
val io = IO(new Bundle {
val flit = new DecoupledFlitIO(CTC.INNER_WIDTH)
})

val (mem, edge) = outer.node.out(0)
require (edge.manager.minLatency > 0)

// constants
val cmdLen = 2
val lenLen = 16
val wordLen = 64
val pAddrBits = edge.bundle.addressBits
val nChunksPerWord = wordLen / CTC.INNER_WIDTH
val dataBits = mem.params.dataBits
val beatBytes = dataBits / 8
val nChunksPerBeat = dataBits / CTC.INNER_WIDTH

val len = Reg(UInt(lenLen.W))
val ctc_len = Reg(UInt(lenLen.W))
val cmd = Reg(UInt(cmdLen.W))
val addr = Reg(UInt(wordLen.W))
val tladdr = Reg(UInt(wordLen.W))
val body = Reg(Vec(nChunksPerBeat, UInt(CTC.INNER_WIDTH.W)))
val ack = Reg(Bool())

val next_tl_addr = tladdr + beatBytes.U

// tl requests
val tl_read_req = edge.Get(
fromSource = 0.U, toAddress = tladdr, lgSize = log2Ceil(beatBytes).U)._2
val tl_write_req = edge.Put(
fromSource = 0.U, toAddress = tladdr, lgSize = log2Ceil(beatBytes).U, data = body.asUInt)._2

// ====== state machine ======
val (s_cmd :: s_addr :: s_r_req :: s_r_data :: s_send_ack :: s_send_addr :: s_r_body ::
s_w_body :: s_w_data :: s_w_wait :: Nil) = Enum(10)
val state = RegInit(s_cmd)
val idx = Reg(UInt(log2Up(Math.max(nChunksPerBeat, nChunksPerWord)).W))

// state-driven signals
io.flit.in.ready := state.isOneOf(s_cmd, s_addr, s_w_body)
io.flit.out.valid := state.isOneOf(s_send_ack, s_send_addr, s_r_body)
val out_bits = Mux(state === s_send_ack && cmd === CTCCommand.read_req, Cat(CTCCommand.read_ack, ctc_len), // read ack header
Mux(state === s_send_ack && cmd === CTCCommand.write_req, Cat(CTCCommand.write_ack, ctc_len), // write ack header
Mux(state === s_send_addr, addr(CTC.INNER_WIDTH - 1, 0), // send address header
body(idx)))) // data flit
io.flit.out.bits := out_bits.asTypeOf(io.flit.out.bits)

mem.a.valid := state.isOneOf(s_r_req, s_w_data)
mem.a.bits := Mux(state === s_r_req, tl_read_req, tl_write_req)
mem.b.ready := false.B
mem.c.valid := false.B
mem.d.ready := state.isOneOf(s_r_data, s_w_wait)
mem.e.valid := false.B

when (state === s_cmd && io.flit.in.valid) {
len := io.flit.in.bits.flit(lenLen - 1, 0) + 1.U
ctc_len := io.flit.in.bits.flit(lenLen - 1, 0)
cmd := io.flit.in.bits.flit(cmdLen + lenLen - 1, lenLen)
addr := 0.U
idx := 0.U
ack := false.B
state := s_addr
body.foreach(_ := 0.U)
}

when (state === s_addr && io.flit.in.valid) {
// older flits are at higher indices
addr := addr | (io.flit.in.bits.flit << (idx * CTC.INNER_WIDTH.U))
idx := idx + 1.U
when (idx === (nChunksPerWord - 1).U) {
idx := 0.U
tladdr := addr | (io.flit.in.bits.flit << (idx * CTC.INNER_WIDTH.U))
when (cmd === CTCCommand.read_req) {
state := s_r_req
} .elsewhen (cmd === CTCCommand.write_req) {
state := s_w_body
} .otherwise {
assert(false.B, "Bad CTC command")
}
}
}

// BEGIN: handling read requests
// send read request to inner TL
when (state === s_r_req && mem.a.ready) {
state := s_r_data
}
// wait for read data from inner TL to arrive
when (state === s_r_data && mem.d.valid) {
body := mem.d.bits.data.asTypeOf(body)
state := Mux(~ack, s_send_ack, s_r_body) // if ack is not sent, send acknowledgement header first
}
// send the read ack to outer CTC if this is the first beat
when (state === s_send_ack && io.flit.out.ready) {
ack := true.B // set ack flag to true
idx := 0.U
state := s_send_addr
}
// send the address header to outer CTC
when (state === s_send_addr && io.flit.out.ready) {
idx := idx + 1.U
addr := addr >> CTC.INNER_WIDTH.U
when (idx === (nChunksPerWord - 1).U) {
state := Mux(cmd === CTCCommand.read_req, s_r_body, s_cmd)
}
}
// send the read data to outer CTC
when (state === s_r_body && io.flit.out.ready) {
idx := idx + 1.U
len := len - 1.U
when (idx === (nChunksPerBeat - 1).U || len === 1.U) {
tladdr := next_tl_addr
state := Mux(len === 1.U, s_cmd, s_r_req) // send next TL R Request
}
}
// END: handling read requests

// BEGIN: handling write requests
// collect the write data from the CTC
when (state === s_w_body && io.flit.in.valid) {
body(idx) := io.flit.in.bits.asUInt
len := len - 1.U
when (idx === (nChunksPerBeat - 1).U || len === 1.U) {
state := s_w_data
} .otherwise {
idx := idx + 1.U
}
}

// send the write request to inner TL
when (state === s_w_data && mem.a.ready) {
state := s_w_wait // wait for write response from inner TL
}

// wait for write response from inner TL
when (state === s_w_wait && mem.d.valid) {
when (len === 0.U) { // am I the last beat?
state := s_send_ack
} .otherwise {
// addr := addr + 1.U
tladdr := next_tl_addr
idx := 0.U
state := s_w_body
}
}
// END: handling write requests
}
Loading