Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions generators/chipyard/src/main/scala/config/ChipConfigs.scala
Original file line number Diff line number Diff line change
Expand Up @@ -127,3 +127,30 @@ class VerilatorCITetheredChipLikeRocketConfig extends Config(
new chipyard.config.WithNoResetSynchronizers ++
new ChipLikeRocketConfig) ++
new chipyard.harness.WithMultiChip(1, new ChipBringupHostConfig))


// Example chip with no AXI4 memport which can still use loadmem over serialTL with FastRAM
class NoAXI4MemPortChipLikeRocketConfig extends Config(
new chipyard.harness.WithSimTSIOverSerialTL(fast = true) ++ // Enable FastRAM
new testchipip.serdes.WithSerialTL(
Seq(
testchipip.serdes.SerialTLParams(
manager = Some( // port acts as a manager of offchip memory
testchipip.serdes.SerialTLManagerParams(
memParams = Seq(
testchipip.serdes.ManagerRAMParams(
address = BigInt("80000000", 16), // Chipyard DRAM base
size = BigInt("100000000", 16)
)
),
isMemoryDevice = true,
slaveWhere = MBUS
)
),
client = Some(testchipip.serdes.SerialTLClientParams()), // client for TSI connection
phyParams = testchipip.serdes.DecoupledExternalSyncSerialPhyParams()
)
)) ++
new freechips.rocketchip.subsystem.WithNoMemPort ++
new freechips.rocketchip.rocket.WithNHugeCores(1) ++
new chipyard.config.AbstractConfig)
66 changes: 42 additions & 24 deletions generators/chipyard/src/main/scala/config/ChipletConfigs.scala
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ package chipyard
import org.chipsalliance.cde.config.{Config}
import freechips.rocketchip.diplomacy.{AddressSet}
import freechips.rocketchip.subsystem.{SBUS}
import testchipip.soc.{OBUS}
import testchipip.soc.{OBUS, InwardAddressTranslatorParams, OutwardAddressTranslatorParams}

// ------------------------------------------------
// Configs demonstrating chip-to-chip communication
Expand Down Expand Up @@ -96,7 +96,6 @@ class MultiSimMultiLinkSymmetricChipletRocketConfig extends Config(
new chipyard.harness.WithMultiChip(1, new MultiLinkSymmetricChipletRocketConfig)
)


// Core-only chiplet config, where the coherent memory is located on the LLC-chiplet
class RocketCoreChipletConfig extends Config(
new testchipip.serdes.WithSerialTL(Seq(
Expand Down Expand Up @@ -143,37 +142,56 @@ class MultiSimLLCChipletRocketConfig extends Config(
new chipyard.harness.WithMultiChip(1, new LLCChipletConfig)
)

class CTCRocketConfig extends Config(
new testchipip.soc.WithChipIdPin ++
// --------------------------------------------
// ------------ IO Chiplet Example ------------
// --------------------------------------------

class ComputeChiplet1Config extends Config(
new chipyard.harness.WithCTCLoopback ++
new testchipip.ctc.WithCTC(Seq(new testchipip.ctc.CTCParams(onchipAddr = 0x1000000000L, offchipAddr = 0x0L, size = ((1L << 32) - 1), noPhy=true))) ++
new RocketConfig
new testchipip.soc.WithChipIdPinWidth(2) ++
new testchipip.soc.WithChipIdPin ++
new testchipip.ctc.WithCTC(Seq(new testchipip.ctc.CTCParams(
translationParams = InwardAddressTranslatorParams(chipID=1, offset=0x100000000L),
offchip=Seq.tabulate(3)(i => AddressSet(0x100000000L << i, 0x100000000L - 1)),
phyParams = None))) ++
new chipyard.RocketConfig
)

class DoubleCTCRocketConfig extends Config(
new testchipip.soc.WithChipIdPin ++
class ComputeChiplet2Config extends Config(
new chipyard.harness.WithCTCLoopback ++
new testchipip.ctc.WithCTC(Seq(
new testchipip.ctc.CTCParams(onchipAddr = 0x1000000000L, offchipAddr = 0x0L, size = ((1L << 32) - 1), noPhy=false),
new testchipip.ctc.CTCParams(onchipAddr = 0x2000000000L, offchipAddr = 0x0L, size = ((1L << 32) - 1), noPhy=true)
)) ++
new RocketConfig
new testchipip.soc.WithChipIdPinWidth(2) ++
new testchipip.soc.WithChipIdPin ++
new testchipip.ctc.WithCTC(Seq(new testchipip.ctc.CTCParams(
translationParams = InwardAddressTranslatorParams(chipID=2, offset=0x100000000L),
offchip=Seq.tabulate(3)(i => AddressSet(0x100000000L << i, 0x100000000L - 1)),
phyParams = None))) ++
new chipyard.RocketConfig
)

class MultiCTCRocketConfig extends Config(
new chipyard.harness.WithAbsoluteFreqHarnessClockInstantiator ++
new chipyard.harness.WithANDSuccessFn ++
new chipyard.harness.WithMultiChipCTC(chip0=0, chip1=1, chip0portId=0, chip1portId=0) ++ // connect CTC port 0 of chip 0 and CTC port 0 of chip 1
new chipyard.harness.WithMultiChip(0, new CTCRocketConfig) ++
new chipyard.harness.WithMultiChip(1, new CTCRocketConfig)
class IOChipletConfig extends Config(
new chipyard.harness.WithCTCTiedOff ++
new testchipip.soc.WithChipIdPinWidth(2) ++
new testchipip.soc.WithChipIdPin ++
new testchipip.ctc.WithCTC(Seq(
new testchipip.ctc.CTCParams(
translationParams = InwardAddressTranslatorParams(chipID=0, offset=0x100000000L),
offchip=Seq(AddressSet(0x200000000L, 0x100000000L - 1)),
phyParams = None),
new testchipip.ctc.CTCParams(
translationParams = InwardAddressTranslatorParams(chipID=0, offset=0x100000000L),
offchip=Seq(AddressSet(0x400000000L, 0x100000000L - 1)),
phyParams = None)
)) ++
new chipyard.RocketConfig
)

class MultiDoubleCTCRocketConfig extends Config(
class TripleChipletConfig extends Config(
new chipyard.harness.WithAbsoluteFreqHarnessClockInstantiator ++
new chipyard.harness.WithANDSuccessFn ++
new chipyard.harness.WithMultiChipCTC(chip0=0, chip1=1, chip0portId=0, chip1portId=0) ++ // connect CTC port 0 of chip 0 and CTC port 0 of chip 1
new chipyard.harness.WithMultiChipCTC(chip0=0, chip1=1, chip0portId=1, chip1portId=1) ++ // connect CTC port 1 of chip 0 and CTC port 1 of chip 1
new chipyard.harness.WithMultiChip(0, new DoubleCTCRocketConfig) ++
new chipyard.harness.WithMultiChip(1, new DoubleCTCRocketConfig)
new chipyard.harness.WithMultiChipCTC(chip0=1, chip1=0, chip0portId=0, chip1portId=0) ++ // C1 to IO port 0
new chipyard.harness.WithMultiChipCTC(chip0=2, chip1=0, chip0portId=0, chip1portId=1) ++ // C2 to IO port 1
new chipyard.harness.WithMultiChip(0, new IOChipletConfig) ++
new chipyard.harness.WithMultiChip(1, new ComputeChiplet1Config) ++
new chipyard.harness.WithMultiChip(2, new ComputeChiplet2Config)
)

23 changes: 15 additions & 8 deletions generators/chipyard/src/main/scala/harness/HarnessBinders.scala
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ import freechips.rocketchip.util._
import freechips.rocketchip.jtag.{JTAGIO}
import freechips.rocketchip.devices.debug.{SimJTAG}
import chipyard.iocell._
import testchipip.dram.{SimDRAM}
import testchipip.dram.{SimDRAM, FastRAM}
import testchipip.tsi.{SimTSI, SerialRAM, TSI, TSIIO}
import testchipip.soc.{TestchipSimDTM}
import testchipip.spi.{SimSPIFlashModel}
Expand Down Expand Up @@ -242,7 +242,7 @@ class WithSerialTLTiedOff(tieoffs: Option[Seq[Int]] = None) extends HarnessBinde
}
})

class WithSimTSIOverSerialTL extends HarnessBinder({
class WithSimTSIOverSerialTL(fast: Boolean = false) extends HarnessBinder({
case (th: HasHarnessInstantiators, port: SerialTLPort, chipId: Int) if (port.portId == 0) => {
port.io match {
case io: HasClockOut =>
Expand All @@ -259,12 +259,19 @@ class WithSimTSIOverSerialTL extends HarnessBinder({
case io: HasClockIn => th.harnessBinderClock
}
withClock(clock) {
val ram = Module(LazyModule(new SerialRAM(port.serdesser, port.params)(port.serdesser.p)).module)
ram.io.ser.in <> io.out
io.in <> ram.io.ser.out

val success = SimTSI.connect(ram.io.tsi, clock, th.harnessBinderReset, chipId)
when (success) { th.chiptopSuccess(chipId) := true.B }
if (fast) {
val ram = Module(LazyModule(new FastRAM(port.serdesser, port.params, chipId = chipId)(port.serdesser.p)).module)
ram.io.ser.in <> io.out
io.in <> ram.io.ser.out
val success = SimTSI.connect(ram.io.tsi, clock, th.harnessBinderReset, chipId)
when (success) { th.chiptopSuccess(chipId) := true.B }
} else {
val ram = Module(LazyModule(new SerialRAM(port.serdesser, port.params)(port.serdesser.p)).module)
ram.io.ser.in <> io.out
io.in <> ram.io.ser.out
val success = SimTSI.connect(ram.io.tsi, clock, th.harnessBinderReset, chipId)
when (success) { th.chiptopSuccess(chipId) := true.B }
}
}
}
}
Expand Down
2 changes: 2 additions & 0 deletions tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ add_executable(mt-hello mt-hello.c)
add_executable(symmetric symmetric.c)
add_executable(ctc-test ctc-test.c)
add_executable(multi-ctc-test multi-ctc-test.c)
add_executable(triple-chiplet triple-chiplet.c)

#################################
# Disassembly
Expand Down Expand Up @@ -127,6 +128,7 @@ add_dump_target(mt-hello)
add_dump_target(symmetric)
add_dump_target(ctc-test)
add_dump_target(multi-ctc-test)
add_dump_target(triple-chiplet)


# Add custom command to generate spiflash.img from spiflash.py
Expand Down
82 changes: 82 additions & 0 deletions tests/triple-chiplet.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
#include <stdio.h>
#include <string.h>
#include <stdint.h>
#include <stdlib.h>
#include <riscv-pk/encoding.h>
#include "marchid.h"
#include "mmio.h"

#define CHIP_ID_ADDR 0x2000
#define COMPUTE1_OFFSET 0x200000000L
#define COMPUTE2_OFFSET 0x400000000L

uint32_t src[10] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
uint32_t dest[10];
uint32_t test[10];

int rw_mem(uint64_t offset) {
size_t write_start = rdcycle();

uint32_t* offchip_addr = (uint32_t*)((uintptr_t)dest + offset);

// Using inline ASM because CTC requires 32b transactions
for (int i = 0; i < 10; i++) {
asm volatile(
"lw t0, 0(%1)\n"
"sw t0, 0(%0)\n"
:
: "r"(offchip_addr + i),
"r"(src + i)
: "t0", "memory"
);
}

size_t write_end = rdcycle();

printf("Wrote %ld bytes in %ld cycles\n", sizeof(src), write_end - write_start);

size_t read_start = rdcycle();

for (int i = 0; i < 10; i++) {
asm volatile(
"lw t0, 0(%0)\n"
"sw t0, 0(%1)\n"
:
: "r"(offchip_addr + i),
"r"(test + i)
: "t0", "memory"
);
}

size_t read_end = rdcycle();

for (int i = 0; i < sizeof(src) / 4; i++) {
if (src[i] != test[i]) {
printf("Remote write/read failed at index %d %p %p %p %x %x\n", i, src+i, test+i, dest + offset + i, src[i], test[i]);
exit(1);
}
}

printf("Read %ld bytes in %ld cycles\n", sizeof(src), read_end - read_start);

return 0;
}

int main(void) {

int chip_id = reg_read64(CHIP_ID_ADDR);

printf("Got chip ID: %d\n", chip_id);

if (chip_id == 1) {
rw_mem(COMPUTE2_OFFSET);
printf("Chip 1 DONE\n");
} else if (chip_id == 2) {
rw_mem(COMPUTE1_OFFSET);
printf("Chip 2 DONE\n");
} else {
printf("Chip %d DONE\n", chip_id);
}

return 0;
}
Loading