diff --git a/generators/chipyard/src/main/scala/config/ChipConfigs.scala b/generators/chipyard/src/main/scala/config/ChipConfigs.scala index db685daf7c..1d80f18080 100644 --- a/generators/chipyard/src/main/scala/config/ChipConfigs.scala +++ b/generators/chipyard/src/main/scala/config/ChipConfigs.scala @@ -127,3 +127,30 @@ class VerilatorCITetheredChipLikeRocketConfig extends Config( new chipyard.config.WithNoResetSynchronizers ++ new ChipLikeRocketConfig) ++ new chipyard.harness.WithMultiChip(1, new ChipBringupHostConfig)) + + +// Example chip with no AXI4 memport which can still use loadmem over serialTL with FastRAM +class NoAXI4MemPortChipLikeRocketConfig extends Config( + new chipyard.harness.WithSimTSIOverSerialTL(fast = true) ++ // Enable FastRAM + new testchipip.serdes.WithSerialTL( + Seq( + testchipip.serdes.SerialTLParams( + manager = Some( // port acts as a manager of offchip memory + testchipip.serdes.SerialTLManagerParams( + memParams = Seq( + testchipip.serdes.ManagerRAMParams( + address = BigInt("80000000", 16), // Chipyard DRAM base + size = BigInt("100000000", 16) + ) + ), + isMemoryDevice = true, + slaveWhere = MBUS + ) + ), + client = Some(testchipip.serdes.SerialTLClientParams()), // client for TSI connection + phyParams = testchipip.serdes.DecoupledExternalSyncSerialPhyParams() + ) + )) ++ + new freechips.rocketchip.subsystem.WithNoMemPort ++ + new freechips.rocketchip.rocket.WithNHugeCores(1) ++ + new chipyard.config.AbstractConfig) \ No newline at end of file diff --git a/generators/chipyard/src/main/scala/config/ChipletConfigs.scala b/generators/chipyard/src/main/scala/config/ChipletConfigs.scala index ed25f8e1f5..fb99eac291 100644 --- a/generators/chipyard/src/main/scala/config/ChipletConfigs.scala +++ b/generators/chipyard/src/main/scala/config/ChipletConfigs.scala @@ -3,7 +3,7 @@ package chipyard import org.chipsalliance.cde.config.{Config} import freechips.rocketchip.diplomacy.{AddressSet} import freechips.rocketchip.subsystem.{SBUS} -import testchipip.soc.{OBUS} +import testchipip.soc.{OBUS, InwardAddressTranslatorParams, OutwardAddressTranslatorParams} // ------------------------------------------------ // Configs demonstrating chip-to-chip communication @@ -96,7 +96,6 @@ class MultiSimMultiLinkSymmetricChipletRocketConfig extends Config( new chipyard.harness.WithMultiChip(1, new MultiLinkSymmetricChipletRocketConfig) ) - // Core-only chiplet config, where the coherent memory is located on the LLC-chiplet class RocketCoreChipletConfig extends Config( new testchipip.serdes.WithSerialTL(Seq( @@ -143,37 +142,56 @@ class MultiSimLLCChipletRocketConfig extends Config( new chipyard.harness.WithMultiChip(1, new LLCChipletConfig) ) -class CTCRocketConfig extends Config( - new testchipip.soc.WithChipIdPin ++ +// -------------------------------------------- +// ------------ IO Chiplet Example ------------ +// -------------------------------------------- + +class ComputeChiplet1Config extends Config( new chipyard.harness.WithCTCLoopback ++ - new testchipip.ctc.WithCTC(Seq(new testchipip.ctc.CTCParams(onchipAddr = 0x1000000000L, offchipAddr = 0x0L, size = ((1L << 32) - 1), noPhy=true))) ++ - new RocketConfig + new testchipip.soc.WithChipIdPinWidth(2) ++ + new testchipip.soc.WithChipIdPin ++ + new testchipip.ctc.WithCTC(Seq(new testchipip.ctc.CTCParams( + translationParams = InwardAddressTranslatorParams(chipID=1, offset=0x100000000L), + offchip=Seq.tabulate(3)(i => AddressSet(0x100000000L << i, 0x100000000L - 1)), + phyParams = None))) ++ + new chipyard.RocketConfig ) -class DoubleCTCRocketConfig extends Config( - new testchipip.soc.WithChipIdPin ++ +class ComputeChiplet2Config extends Config( new chipyard.harness.WithCTCLoopback ++ - new testchipip.ctc.WithCTC(Seq( - new testchipip.ctc.CTCParams(onchipAddr = 0x1000000000L, offchipAddr = 0x0L, size = ((1L << 32) - 1), noPhy=false), - new testchipip.ctc.CTCParams(onchipAddr = 0x2000000000L, offchipAddr = 0x0L, size = ((1L << 32) - 1), noPhy=true) - )) ++ - new RocketConfig + new testchipip.soc.WithChipIdPinWidth(2) ++ + new testchipip.soc.WithChipIdPin ++ + new testchipip.ctc.WithCTC(Seq(new testchipip.ctc.CTCParams( + translationParams = InwardAddressTranslatorParams(chipID=2, offset=0x100000000L), + offchip=Seq.tabulate(3)(i => AddressSet(0x100000000L << i, 0x100000000L - 1)), + phyParams = None))) ++ + new chipyard.RocketConfig ) -class MultiCTCRocketConfig extends Config( - new chipyard.harness.WithAbsoluteFreqHarnessClockInstantiator ++ - new chipyard.harness.WithANDSuccessFn ++ - new chipyard.harness.WithMultiChipCTC(chip0=0, chip1=1, chip0portId=0, chip1portId=0) ++ // connect CTC port 0 of chip 0 and CTC port 0 of chip 1 - new chipyard.harness.WithMultiChip(0, new CTCRocketConfig) ++ - new chipyard.harness.WithMultiChip(1, new CTCRocketConfig) +class IOChipletConfig extends Config( + new chipyard.harness.WithCTCTiedOff ++ + new testchipip.soc.WithChipIdPinWidth(2) ++ + new testchipip.soc.WithChipIdPin ++ + new testchipip.ctc.WithCTC(Seq( + new testchipip.ctc.CTCParams( + translationParams = InwardAddressTranslatorParams(chipID=0, offset=0x100000000L), + offchip=Seq(AddressSet(0x200000000L, 0x100000000L - 1)), + phyParams = None), + new testchipip.ctc.CTCParams( + translationParams = InwardAddressTranslatorParams(chipID=0, offset=0x100000000L), + offchip=Seq(AddressSet(0x400000000L, 0x100000000L - 1)), + phyParams = None) + )) ++ + new chipyard.RocketConfig ) -class MultiDoubleCTCRocketConfig extends Config( +class TripleChipletConfig extends Config( new chipyard.harness.WithAbsoluteFreqHarnessClockInstantiator ++ new chipyard.harness.WithANDSuccessFn ++ - new chipyard.harness.WithMultiChipCTC(chip0=0, chip1=1, chip0portId=0, chip1portId=0) ++ // connect CTC port 0 of chip 0 and CTC port 0 of chip 1 - new chipyard.harness.WithMultiChipCTC(chip0=0, chip1=1, chip0portId=1, chip1portId=1) ++ // connect CTC port 1 of chip 0 and CTC port 1 of chip 1 - new chipyard.harness.WithMultiChip(0, new DoubleCTCRocketConfig) ++ - new chipyard.harness.WithMultiChip(1, new DoubleCTCRocketConfig) + new chipyard.harness.WithMultiChipCTC(chip0=1, chip1=0, chip0portId=0, chip1portId=0) ++ // C1 to IO port 0 + new chipyard.harness.WithMultiChipCTC(chip0=2, chip1=0, chip0portId=0, chip1portId=1) ++ // C2 to IO port 1 + new chipyard.harness.WithMultiChip(0, new IOChipletConfig) ++ + new chipyard.harness.WithMultiChip(1, new ComputeChiplet1Config) ++ + new chipyard.harness.WithMultiChip(2, new ComputeChiplet2Config) ) diff --git a/generators/chipyard/src/main/scala/harness/HarnessBinders.scala b/generators/chipyard/src/main/scala/harness/HarnessBinders.scala index b77bf90393..37d75b4d4b 100644 --- a/generators/chipyard/src/main/scala/harness/HarnessBinders.scala +++ b/generators/chipyard/src/main/scala/harness/HarnessBinders.scala @@ -13,7 +13,7 @@ import freechips.rocketchip.util._ import freechips.rocketchip.jtag.{JTAGIO} import freechips.rocketchip.devices.debug.{SimJTAG} import chipyard.iocell._ -import testchipip.dram.{SimDRAM} +import testchipip.dram.{SimDRAM, FastRAM} import testchipip.tsi.{SimTSI, SerialRAM, TSI, TSIIO} import testchipip.soc.{TestchipSimDTM} import testchipip.spi.{SimSPIFlashModel} @@ -242,7 +242,7 @@ class WithSerialTLTiedOff(tieoffs: Option[Seq[Int]] = None) extends HarnessBinde } }) -class WithSimTSIOverSerialTL extends HarnessBinder({ +class WithSimTSIOverSerialTL(fast: Boolean = false) extends HarnessBinder({ case (th: HasHarnessInstantiators, port: SerialTLPort, chipId: Int) if (port.portId == 0) => { port.io match { case io: HasClockOut => @@ -259,12 +259,19 @@ class WithSimTSIOverSerialTL extends HarnessBinder({ case io: HasClockIn => th.harnessBinderClock } withClock(clock) { - val ram = Module(LazyModule(new SerialRAM(port.serdesser, port.params)(port.serdesser.p)).module) - ram.io.ser.in <> io.out - io.in <> ram.io.ser.out - - val success = SimTSI.connect(ram.io.tsi, clock, th.harnessBinderReset, chipId) - when (success) { th.chiptopSuccess(chipId) := true.B } + if (fast) { + val ram = Module(LazyModule(new FastRAM(port.serdesser, port.params, chipId = chipId)(port.serdesser.p)).module) + ram.io.ser.in <> io.out + io.in <> ram.io.ser.out + val success = SimTSI.connect(ram.io.tsi, clock, th.harnessBinderReset, chipId) + when (success) { th.chiptopSuccess(chipId) := true.B } + } else { + val ram = Module(LazyModule(new SerialRAM(port.serdesser, port.params)(port.serdesser.p)).module) + ram.io.ser.in <> io.out + io.in <> ram.io.ser.out + val success = SimTSI.connect(ram.io.tsi, clock, th.harnessBinderReset, chipId) + when (success) { th.chiptopSuccess(chipId) := true.B } + } } } } diff --git a/generators/testchipip b/generators/testchipip index bfe7aa36fc..5dca05bef9 160000 --- a/generators/testchipip +++ b/generators/testchipip @@ -1 +1 @@ -Subproject commit bfe7aa36fc570ee17e3f461c1ed48525684b95ff +Subproject commit 5dca05bef9a9d7b135e18543379bc782df80ce40 diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 812caf5654..27b2d7e150 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -87,6 +87,7 @@ add_executable(mt-hello mt-hello.c) add_executable(symmetric symmetric.c) add_executable(ctc-test ctc-test.c) add_executable(multi-ctc-test multi-ctc-test.c) +add_executable(triple-chiplet triple-chiplet.c) ################################# # Disassembly @@ -127,6 +128,7 @@ add_dump_target(mt-hello) add_dump_target(symmetric) add_dump_target(ctc-test) add_dump_target(multi-ctc-test) +add_dump_target(triple-chiplet) # Add custom command to generate spiflash.img from spiflash.py diff --git a/tests/triple-chiplet.c b/tests/triple-chiplet.c new file mode 100644 index 0000000000..6f5ea5df38 --- /dev/null +++ b/tests/triple-chiplet.c @@ -0,0 +1,82 @@ +#include +#include +#include +#include +#include +#include "marchid.h" +#include "mmio.h" + +#define CHIP_ID_ADDR 0x2000 +#define COMPUTE1_OFFSET 0x200000000L +#define COMPUTE2_OFFSET 0x400000000L + +uint32_t src[10] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; +uint32_t dest[10]; +uint32_t test[10]; + +int rw_mem(uint64_t offset) { + size_t write_start = rdcycle(); + + uint32_t* offchip_addr = (uint32_t*)((uintptr_t)dest + offset); + + // Using inline ASM because CTC requires 32b transactions + for (int i = 0; i < 10; i++) { + asm volatile( + "lw t0, 0(%1)\n" + "sw t0, 0(%0)\n" + : + : "r"(offchip_addr + i), + "r"(src + i) + : "t0", "memory" + ); + } + + size_t write_end = rdcycle(); + + printf("Wrote %ld bytes in %ld cycles\n", sizeof(src), write_end - write_start); + + size_t read_start = rdcycle(); + + for (int i = 0; i < 10; i++) { + asm volatile( + "lw t0, 0(%0)\n" + "sw t0, 0(%1)\n" + : + : "r"(offchip_addr + i), + "r"(test + i) + : "t0", "memory" + ); + } + + size_t read_end = rdcycle(); + + for (int i = 0; i < sizeof(src) / 4; i++) { + if (src[i] != test[i]) { + printf("Remote write/read failed at index %d %p %p %p %x %x\n", i, src+i, test+i, dest + offset + i, src[i], test[i]); + exit(1); + } + } + + printf("Read %ld bytes in %ld cycles\n", sizeof(src), read_end - read_start); + + return 0; +} + +int main(void) { + + int chip_id = reg_read64(CHIP_ID_ADDR); + + printf("Got chip ID: %d\n", chip_id); + + if (chip_id == 1) { + rw_mem(COMPUTE2_OFFSET); + printf("Chip 1 DONE\n"); + } else if (chip_id == 2) { + rw_mem(COMPUTE1_OFFSET); + printf("Chip 2 DONE\n"); + } else { + printf("Chip %d DONE\n", chip_id); + } + + return 0; +} \ No newline at end of file