From 374796d27c48dd018c057686c8adfa62629182e6 Mon Sep 17 00:00:00 2001 From: dANW34V3R Date: Fri, 12 May 2023 13:59:37 +0100 Subject: [PATCH 1/5] Initial patch --- README_RV32.md | 13 + configs/DEMO_RISCV32.yaml | 142 ++++++ src/include/simeng/Elf.hh | 12 + src/include/simeng/RegisterValue.hh | 26 +- src/include/simeng/arch/Architecture.hh | 19 + .../simeng/arch/aarch64/Architecture.hh | 4 + src/include/simeng/arch/riscv/Architecture.hh | 44 ++ src/include/simeng/arch/riscv/Instruction.hh | 43 +- src/include/simeng/models/emulation/Core.hh | 3 + .../simeng/pipeline/PipelineBuffer1.hh | 107 +++++ src/lib/CoreInstance.cc | 3 +- src/lib/Elf.cc | 336 +++++++++----- src/lib/ModelConfig.cc | 11 +- src/lib/arch/aarch64/Architecture.cc | 4 + src/lib/arch/riscv/Architecture.cc | 146 +++++- src/lib/arch/riscv/ExceptionHandler.cc | 19 +- src/lib/arch/riscv/Instruction.cc | 6 + src/lib/arch/riscv/InstructionMetadata.cc | 38 +- src/lib/arch/riscv/InstructionMetadata.hh | 16 + src/lib/arch/riscv/Instruction_address.cc | 21 +- src/lib/arch/riscv/Instruction_decode.cc | 313 ++++++++++++- src/lib/arch/riscv/Instruction_execute.cc | 431 +++++++++++++----- src/lib/models/emulation/Core.cc | 45 +- 23 files changed, 1527 insertions(+), 275 deletions(-) create mode 100644 README_RV32.md create mode 100644 configs/DEMO_RISCV32.yaml create mode 100644 src/include/simeng/pipeline/PipelineBuffer1.hh diff --git a/README_RV32.md b/README_RV32.md new file mode 100644 index 0000000000..fe5942068e --- /dev/null +++ b/README_RV32.md @@ -0,0 +1,13 @@ +# SimEng Updates to support 32-bit RISC-V ISA + +- Added 32-bit RISC-V Architecture support + - Sample implementation of how a 32 bit mode can be supported as a configuration. The necessary updates to Architecture, Instruction decode and Instruction execution is added. + - Added a Demo yaml file DEMO_RISCV32.yaml that can be used as a reference for running using the emulation core. + - The exception handler is updated to process a 32-bit register value for only 4 system calls that where used for internal benchmarks but will need wider adoption accross all other system calls. +- Added Compressed (16-bit) ISA support +- Added Instruction trace generation support that can be used to log commited instructions. +- 32-bit CSR support + - Added few CSRs that and sample implementation on how to use them. +- Added an alternative implementation of pipeline buffer with variable latency support. + - Supports 0 delay that is benefitial for merging pipeline stages if required. + - Supports more than 1 cycle delay between pipeline stages. diff --git a/configs/DEMO_RISCV32.yaml b/configs/DEMO_RISCV32.yaml new file mode 100644 index 0000000000..3ea962b8bc --- /dev/null +++ b/configs/DEMO_RISCV32.yaml @@ -0,0 +1,142 @@ +--- +# This file is based off of the current tx2.yaml config and serves as an example configuration for RISC-V cores. +# The following resources where utilised to create the config file and naming schemes: +# https://en.wikichip.org/wiki/cavium/microarchitectures/vulcan + +Core: + ISA: rv32 + Simulation-Mode: emulation + Trace: True + Clock-Frequency: 2.5 + Fetch-Block-Size: 32 +Fetch: + Fetch-Block-Size: 32 + Loop-Buffer-Size: 0 + Loop-Detection-Threshold: 0 +Process-Image: + Heap-Size: 1073741824 + Stack-Size: 1048576 +Register-Set: + GeneralPurpose-Count: 154 + FloatingPoint-Count: 90 +Pipeline-Widths: + Commit: 4 + Dispatch-Rate: 4 + FrontEnd: 4 + LSQ-Completion: 2 +Queue-Sizes: + ROB: 180 + Load: 64 + Store: 36 +Branch-Predictor: + BTB-Tag-Bits: 11 + Saturating-Count-Bits: 2 + Global-History-Length: 10 + RAS-entries: 1 + Fallback-Static-Predictor: "Always-Taken" + Branch-Predictor: + BTB-bitlength: 16 +L1-Data-Memory: + Interface-Type: Flat +L1-Instruction-Memory: + Interface-Type: Flat +LSQ-L1-Interface: + Access-Latency: 4 + Exclusive: False + Load-Bandwidth: 32 + Store-Bandwidth: 16 + Permitted-Requests-Per-Cycle: 2 + Permitted-Loads-Per-Cycle: 2 + Permitted-Stores-Per-Cycle: 1 +Ports: + 0: + Portname: Port 0 + Instruction-Support: + - INT_SIMPLE + - INT_MUL + 1: + Portname: Port 1 + Instruction-Support: + - INT + 2: + Portname: Port 2 + Instruction-Support: + - INT_SIMPLE + - INT_MUL + - BRANCH + 3: + Portname: Port 4 + Instruction-Support: + - LOAD + 4: + Portname: Port 5 + Instruction-Support: + - LOAD + 5: + Portname: Port 3 + Instruction-Support: + - STORE +Reservation-Stations: + 0: + Size: 60 + Dispatch-Rate: 4 + Ports: + - Port 0 + - Port 1 + - Port 2 + - Port 4 + - Port 5 + - Port 3 +Execution-Units: + 0: + Pipelined: True + 1: + Pipelined: True + 2: + Pipelined: True + 3: + Pipelined: True + 4: + Pipelined: True + 5: + Pipelined: True +Latencies: + 0: + Instruction-Groups: + - INT_SIMPLE_ARTH + - INT_SIMPLE_LOGICAL + Execution-Latency: 1 + Execution-Throughput: 1 + 1: + Instruction-Groups: + - INT_MUL + Execution-Latency: 5 + Execution-Throughput: 1 + 2: + Instruction-Groups: + - INT_DIV + Execution-Latency: 39 + Execution-Throughput: 39 +# CPU-Info mainly used to generate a replica of the special (or system) file directory +# structure +CPU-Info: + # Set Generate-Special-Dir to 'T' to generate the special files directory, or to 'F' to not. + # (Not generating the special files directory may require the user to copy over files manually) + Generate-Special-Dir: true + # Core-Count MUST be 1 as multi-core is not supported at this time. (TX2 true value is 32) + Core-Count: 1 + # Socket-Count MUST be 1 as multi-socket simulations are not supported at this time. (TX2 true value is 2) + Socket-Count: 1 + # SMT MUST be 1 as Simultanious-Multi-Threading is not supported at this time. (TX2 true value is 4) + SMT: 1 + # Below are the values needed to generate /proc/cpuinfo + BogoMIPS: 400.00 + Features: fp asimd evtstrm aes pmull sha1 sha2 crc32 atomics cpuid asimdrdm + CPU-Implementer: "0x43" + CPU-Architecture: 8 + CPU-Variant: "0x1" + CPU-Part: "0x0af" + CPU-Revision: 2 + # Package-Count is used to generate + # /sys/devices/system/cpu/cpu{0..Core-Count}/topology/{physical_package_id, core_id} + Package-Count: 1 \ No newline at end of file diff --git a/src/include/simeng/Elf.hh b/src/include/simeng/Elf.hh index 88e419e6a5..14bcddcb28 100644 --- a/src/include/simeng/Elf.hh +++ b/src/include/simeng/Elf.hh @@ -21,6 +21,15 @@ struct ElfHeader { uint64_t memorySize; }; +struct Elf32Header { + uint32_t type; + uint32_t offset; + uint32_t virtualAddress; + uint32_t physicalAddress; + uint32_t fileSize; + uint32_t memorySize; +}; + /** A processed Executable and Linkable Format (ELF) file. */ class Elf { public: @@ -33,8 +42,11 @@ class Elf { private: uint64_t entryPoint_; std::vector headers_; + uint32_t entryPoint32_; + std::vector headers32_; bool isValid_ = false; uint64_t processImageSize_; + bool mode32bit_; }; } // namespace simeng diff --git a/src/include/simeng/RegisterValue.hh b/src/include/simeng/RegisterValue.hh index 20004432d0..d85471eda3 100644 --- a/src/include/simeng/RegisterValue.hh +++ b/src/include/simeng/RegisterValue.hh @@ -26,10 +26,16 @@ class RegisterValue { * number of bytes (defaulting to the size of the template type). */ template , T>* = nullptr> - RegisterValue(T value, uint16_t bytes = sizeof(T)) : bytes(bytes) { + RegisterValue(T value, uint16_t bytes = sizeof(T), bool relaxFor32 = true) : bytes(bytes) { + relaxedFor32bit_ = relaxFor32; + std::memset(this->value, 0, MAX_LOCAL_BYTES); if (isLocal()) { T* view = reinterpret_cast(this->value); - view[0] = value; + if (sizeof(T) > bytes) { // e.g. when T is int64 and bytes is 4 + std::memcpy(this->value, &value, bytes); + } else { + view[0] = value; + } if (bytes > sizeof(T)) { // Zero the remaining bytes not set by the provided value @@ -90,11 +96,16 @@ class RegisterValue { * the specified datatype. */ template const T* getAsVector() const { - static_assert(alignof(T) <= 8 && "Alignment over 8 bytes not guranteed"); + static_assert(alignof(T) <= 8 && "Alignment over 8 bytes not guaranteed"); assert(bytes > 0 && "Attempted to access an uninitialised RegisterValue"); - assert(sizeof(T) <= bytes && - "Attempted to access a RegisterValue as a datatype larger than the " - "data held"); + assert((sizeof(T) <= bytes || (bytes == 4 && sizeof(T) == 8)) && "Attempted" + " to access a RegisterValue as a datatype larger than the " + "data held" ); + if(!relaxedFor32bit_) { // maybe #ifdef if it makes slower? + assert(sizeof(T) <= bytes && + "Attempted to access a RegisterValue as a datatype larger than the " + "data held"); + } if (isLocal()) { return reinterpret_cast(value); } else { @@ -129,6 +140,9 @@ class RegisterValue { /** The underlying local member value. Aligned to 8 bytes to prevent * potential alignment issue when casting. */ alignas(8) char value[MAX_LOCAL_BYTES]; + + /** Switch for different assert checking */ + bool relaxedFor32bit_; }; } // namespace simeng \ No newline at end of file diff --git a/src/include/simeng/arch/Architecture.hh b/src/include/simeng/arch/Architecture.hh index 4dbe377587..edd404c827 100644 --- a/src/include/simeng/arch/Architecture.hh +++ b/src/include/simeng/arch/Architecture.hh @@ -14,6 +14,12 @@ using MacroOp = std::vector>; namespace arch { +/** Modes. Assume only has 32-bit and 64-bit. */ +enum arch_mode { + ARCH_32BIT=1, + ARCH_64BIT=0 +}; + /** The types of changes that can be made to values within the process state. */ enum class ChangeType { REPLACEMENT, INCREMENT, DECREMENT }; @@ -109,6 +115,19 @@ class Architecture { /** Updates System registers of any system-based timers. */ virtual void updateSystemTimerRegisters(RegisterFileSet* regFile, const uint64_t iterations) const = 0; + + /** Update trace file */ + virtual void updateInstrTrace(const std::shared_ptr& instruction, + RegisterFileSet* regFile, uint64_t tick) const = 0; + + /** Return the mode (32-bit or 64-bit) */ + arch_mode is32BitMode() const { + return is32Bit_; + } + + protected: + /** Mode, either 32-bit or 64-bit */ + arch_mode is32Bit_; }; } // namespace arch diff --git a/src/include/simeng/arch/aarch64/Architecture.hh b/src/include/simeng/arch/aarch64/Architecture.hh index 39d399dc7d..ad14dc1c0e 100644 --- a/src/include/simeng/arch/aarch64/Architecture.hh +++ b/src/include/simeng/arch/aarch64/Architecture.hh @@ -84,6 +84,10 @@ class Architecture : public arch::Architecture { /** Update the value of SVCRval_. */ void setSVCRval(const uint64_t newVal) const; + /** Update trace file */ + void updateInstrTrace(const std::shared_ptr& instruction, + RegisterFileSet* regFile, uint64_t tick) const override; + private: /** A decoding cache, mapping an instruction word to a previously decoded * instruction. Instructions are added to the cache as they're decoded, to diff --git a/src/include/simeng/arch/riscv/Architecture.hh b/src/include/simeng/arch/riscv/Architecture.hh index 9e8c169f7a..de6c76c71e 100644 --- a/src/include/simeng/arch/riscv/Architecture.hh +++ b/src/include/simeng/arch/riscv/Architecture.hh @@ -2,6 +2,8 @@ #include #include +#include +#include #include "simeng/arch/Architecture.hh" #include "simeng/arch/riscv/ExceptionHandler.hh" @@ -14,6 +16,32 @@ namespace simeng { namespace arch { namespace riscv { +enum riscv_sysreg { + SYSREG_MSTATUS = 0x300, + SYSREG_MSTATUSH = 0x310, + SYSREG_MEPC = 0x341, + SYSREG_MCAUSE = 0x342, + SYSREG_MHARTID = 0xF14, + SYSREG_CYCLE = 0xC00, + SYSREG_TIME = 0xC01, + SYSREG_INSTRRET = 0xC02 +}; + +struct constantsPool { + const uint8_t alignMask = 0x3; + const uint8_t alignMaskCompressed = 0x1; + const uint8_t bytesLimit = 4; + const uint8_t bytesLimitCompressed = 2; + const uint8_t byteLength64 = 8; + const uint8_t byteLength32 = 4; +}; + +struct archConstants { + uint8_t alignMask; + uint8_t bytesLimit; /* Minimum bytes the decoder needs to process */ + uint8_t regWidth; /* Register width in bytes */ +}; + /* A basic RISC-V implementation of the `Architecture` interface. */ class Architecture : public arch::Architecture { public: @@ -63,6 +91,13 @@ class Architecture : public arch::Architecture { std::vector getConfigPhysicalRegisterQuantities( YAML::Node config) const override; + /** Update trace file */ + void updateInstrTrace(const std::shared_ptr& instruction, + RegisterFileSet* regFile, uint64_t tick) const override; + + /** Return a struct contains constants */ + archConstants getConstants() const; + private: /** Retrieve an executionInfo object for the requested instruction. If a * opcode-based override has been defined for the latency and/or @@ -95,6 +130,15 @@ class Architecture : public arch::Architecture { /** A reference to a Linux kernel object to forward syscalls to. */ kernel::Linux& linux_; + + /** A pointer to the trace file */ + std::ofstream *traceFile_; + + /** Switch for updateInstrTrace() */ + bool traceOn_ = false; + + /** A struct contains constants */ + archConstants constants_; }; } // namespace riscv diff --git a/src/include/simeng/arch/riscv/Instruction.hh b/src/include/simeng/arch/riscv/Instruction.hh index 61b83037ca..3f023d28b6 100644 --- a/src/include/simeng/arch/riscv/Instruction.hh +++ b/src/include/simeng/arch/riscv/Instruction.hh @@ -47,9 +47,23 @@ enum class InstructionException { SupervisorCall, HypervisorCall, SecureMonitorCall, + UnmappedSysReg, NoAvailablePort }; +enum CInstructionFormat { + CIF_CR, + CIF_CI, + CIF_CSS, + CIF_CIW, + CIF_CL, + CIF_CS, + CIF_CA, + CIF_CB, + CIF_CJ, + CIF_INVALID +}; + /** A basic RISC-V implementation of the `Instruction` interface. */ class Instruction : public simeng::Instruction { public: @@ -163,13 +177,22 @@ class Instruction : public simeng::Instruction { * automatically supplied as zero. */ static const Register ZERO_REGISTER; + static const Register RA_REGISTER; + static const Register SP_REGISTER; + + /** Set register byte width */ + void setArchRegWidth(uint8_t len); + + /** ONLY valid after decode. Return regByteWidth */ + uint8_t getArchRegWidth() const; + private: /** The maximum number of source registers any supported RISC-V instruction * can have. */ static const uint8_t MAX_SOURCE_REGISTERS = 2; /** The maximum number of destination registers any supported RISC-V * instruction can have. */ - static const uint8_t MAX_DESTINATION_REGISTERS = 1; + static const uint8_t MAX_DESTINATION_REGISTERS = 2; //CSRs can be another destination apart from std RD /** A reference to the ISA instance this instruction belongs to. */ const Architecture& architecture_; @@ -198,11 +221,19 @@ class Instruction : public simeng::Instruction { /** The current exception state of this instruction. */ InstructionException exception_ = InstructionException::None; + /** The length of instruction in bytes. */ + uint8_t archRegWidth_; + // Decoding /** Process the instruction's metadata to determine source/destination * registers. */ void decode(); + bool decode16(); + + /** Deal with CSR when decoding */ + bool decodeCsr(); + /** Invalidate instructions that are currently not yet implemented. This prevents errors during speculated branches with unknown destinations; non-executable assertions. memory is decoded into valid but not implemented @@ -238,6 +269,13 @@ class Instruction : public simeng::Instruction { bool isLogical_ = false; /** Is this a compare instruction? */ bool isCompare_ = false; + /** Is this a csr operation instruction? */ + bool isCsr_ = false; + + CInstructionFormat instFormat_ = CIF_INVALID; + + /** Extracted value of current immediate from metadata */ + uint32_t c_imm = 0; // Memory /** Set the accessed memory addresses, and create a corresponding memory data @@ -252,6 +290,9 @@ class Instruction : public simeng::Instruction { * for sending to memory (according to instruction type). Each entry * corresponds to a `memoryAddresses` entry. */ std::vector memoryData; + + /** Return integer register value, to support both 32-bit and 64-bit mode */ + int64_t getSignedInt(RegisterValue& value) const; }; } // namespace riscv diff --git a/src/include/simeng/models/emulation/Core.hh b/src/include/simeng/models/emulation/Core.hh index 9152c6df03..c4a4acc453 100644 --- a/src/include/simeng/models/emulation/Core.hh +++ b/src/include/simeng/models/emulation/Core.hh @@ -11,6 +11,9 @@ #include "simeng/arch/Architecture.hh" #include "simeng/span.hh" +// TODO: This is architecture-specific, need to be refactored later. See comments in Core.cc +#include "simeng/arch/riscv/Architecture.hh" + namespace simeng { namespace models { namespace emulation { diff --git a/src/include/simeng/pipeline/PipelineBuffer1.hh b/src/include/simeng/pipeline/PipelineBuffer1.hh new file mode 100644 index 0000000000..dd2ed70ce7 --- /dev/null +++ b/src/include/simeng/pipeline/PipelineBuffer1.hh @@ -0,0 +1,107 @@ +#pragma once + +#include +#include +#include + +namespace simeng { +namespace pipeline_hi { + +/** A tickable pipelined buffer. Values are shifted from the tail slot to the + * head slot each time `tick()` is called. */ +template +class PipelineBuffer { + public: + /** Construct a pipeline buffer of width `width`, and fill all slots with + * `initialValue`. */ + PipelineBuffer(int width, const T& initialValue) + : width(width), buffer(width * defaultLength_, initialValue), + length_(defaultLength_), headIndex_(defaultLength_-1), + tailIndex_(0) {} + + PipelineBuffer(int width, const T& initialValue, int length) + : width(width), buffer(width * length, initialValue), length_(length), + headIndex_(length_-1), tailIndex_(0) { + assert(length_ != 0 && "Pipeline buffer length cannot be 0"); + } + + /** Tick the buffer and move head/tail pointers, or do nothing if it's + * stalled. */ + void tick() { + if (isStalled_) return; + + //length ==1 shortcut? condition check cost + + if (headIndex_) { // when headIndex != 0 + headIndex_--; + } else { + headIndex_ = length_ - 1; + } + if (tailIndex_) { // when tailIndex != 0 + tailIndex_--; + } else { + tailIndex_ = length_ - 1; + } + } + + /** Get a tail slots pointer. */ + T* getTailSlots() { + T* ptr = buffer.data(); + return &ptr[tailIndex_ * width]; + } + + /** Get a const tail slots pointer. */ + const T* getTailSlots() const { + const T* ptr = buffer.data(); + return &ptr[tailIndex_ * width]; + } + + /** Get a head slots pointer. */ + T* getHeadSlots() { + T* ptr = buffer.data(); + return &ptr[headIndex_ * width]; + } + + /** Get a const head slots pointer. */ + const T* getHeadSlots() const { + const T* ptr = buffer.data(); + return &ptr[headIndex_ * width]; + } + + /** Check if the buffer is stalled. */ + bool isStalled() const { return isStalled_; } + + /** Set the buffer's stall flag to `stalled`. */ + void stall(bool stalled) { isStalled_ = stalled; } + + /** Fill the buffer with a specified value. */ + void fill(const T& value) { std::fill(buffer.begin(), buffer.end(), value); } + + /** Get the width of the buffer slots. */ + unsigned short getWidth() const { return width; } + + private: + /** The width of each row of slots. */ + unsigned short width; + + /** The buffer. */ + std::vector buffer; + + /** Whether the buffer is stalled or not. */ + bool isStalled_ = false; + + /** Buffer length */ + const unsigned int length_; + + /** */ + unsigned int headIndex_; + + /** */ + unsigned int tailIndex_; + + /** The number of stages in the pipeline. */ + static const unsigned int defaultLength_ = 2; +}; + +} // namespace pipeline_hi +} // namespace simeng diff --git a/src/lib/CoreInstance.cc b/src/lib/CoreInstance.cc index ddf53b20bf..8ba06c8e08 100644 --- a/src/lib/CoreInstance.cc +++ b/src/lib/CoreInstance.cc @@ -233,7 +233,8 @@ void CoreInstance::createCore() { } // Create the architecture, with knowledge of the kernel - if (config_["Core"]["ISA"].as() == "rv64") { + if (config_["Core"]["ISA"].as() == "rv64" || + config_["Core"]["ISA"].as() == "rv32") { arch_ = std::make_unique(kernel_, config_); } else if (config_["Core"]["ISA"].as() == "AArch64") { diff --git a/src/lib/Elf.cc b/src/lib/Elf.cc index 6654cc86a8..6281598403 100644 --- a/src/lib/Elf.cc +++ b/src/lib/Elf.cc @@ -47,127 +47,248 @@ Elf::Elf(std::string path, char** imagePointer) { // Check whether this is a 32 or 64-bit executable char bitFormat; file.read(&bitFormat, sizeof(bitFormat)); - if (bitFormat != ElfBitFormat::Format64) { + if (bitFormat != ElfBitFormat::Format32 && bitFormat != ElfBitFormat::Format64) { return; } + mode32bit_ = (bitFormat == ElfBitFormat::Format32); + isValid_ = true; - /** - * Starting from the 24th byte of the ELF header a 64-bit value - * represents the virtual address to which the system first transfers - * control, thus starting the process. - * In `elf64_hdr` this value maps to the member `Elf64_Addr e_entry`. - */ + if (bitFormat == ElfBitFormat::Format64) { + /** + * Starting from the 24th byte of the ELF header a 64-bit value + * represents the virtual address to which the system first transfers + * control, thus starting the process. + * In `elf64_hdr` this value maps to the member `Elf64_Addr e_entry`. + */ - // Seek to the entry point of the file. - // The information in between is discarded - file.seekg(0x18); - file.read(reinterpret_cast(&entryPoint_), sizeof(entryPoint_)); + // Seek to the entry point of the file. + // The information in between is discarded + file.seekg(0x18); + file.read(reinterpret_cast(&entryPoint_), sizeof(entryPoint_)); - /** - * Starting from the 32nd byte of the ELF Header a 64-bit value - * represents the offset of the ELF Program header or - * Program header table in the ELF file. - * In `elf64_hdr` this value maps to the member `Elf64_Addr e_phoff`. - */ + /** + * Starting from the 32nd byte of the ELF Header a 64-bit value + * represents the offset of the ELF Program header or + * Program header table in the ELF file. + * In `elf64_hdr` this value maps to the member `Elf64_Addr e_phoff`. + */ - // Seek to the byte representing the start of the header offset table. - uint64_t headerOffset; - file.read(reinterpret_cast(&headerOffset), sizeof(headerOffset)); + // Seek to the byte representing the start of the header offset table. + uint64_t headerOffset; + file.read(reinterpret_cast(&headerOffset), sizeof(headerOffset)); - /** - * Starting 54th byte of the ELF Header a 16-bit value indicates - * the size of each entry in the ELF Program header. In the `elf64_hdr` - * struct this value maps to the member `Elf64_Half e_phentsize`. All - * header entries have the same size. - * Starting from the 56th byte a 16-bit value represents the number - * of header entries in the ELF Program header. In the `elf64_hdr` - * struct this value maps to `Elf64_Half e_phnum`. - */ + /** + * Starting 54th byte of the ELF Header a 16-bit value indicates + * the size of each entry in the ELF Program header. In the `elf64_hdr` + * struct this value maps to the member `Elf64_Half e_phentsize`. All + * header entries have the same size. + * Starting from the 56th byte a 16-bit value represents the number + * of header entries in the ELF Program header. In the `elf64_hdr` + * struct this value maps to `Elf64_Half e_phnum`. + */ + + // Seek to the byte representing header entry size. + file.seekg(0x36); + uint16_t headerEntrySize; + file.read(reinterpret_cast(&headerEntrySize), sizeof(headerEntrySize)); + uint16_t headerEntries; + file.read(reinterpret_cast(&headerEntries), sizeof(headerEntries)); + + // Resize the header to equal the number of header entries. + headers_.resize(headerEntries); + processImageSize_ = 0; + + // Loop over all headers and extract them. + for (size_t i = 0; i < headerEntries; i++) { + // Since all headers entries have the same size. + // We can extract the nth header using the header offset + // and header entry size. + file.seekg(headerOffset + (i * headerEntrySize)); + auto& header = headers_[i]; - // Seek to the byte representing header entry size. - file.seekg(0x36); - uint16_t headerEntrySize; - file.read(reinterpret_cast(&headerEntrySize), sizeof(headerEntrySize)); - uint16_t headerEntries; - file.read(reinterpret_cast(&headerEntries), sizeof(headerEntries)); - - // Resize the header to equal the number of header entries. - headers_.resize(headerEntries); - processImageSize_ = 0; - - // Loop over all headers and extract them. - for (size_t i = 0; i < headerEntries; i++) { - // Since all headers entries have the same size. - // We can extract the nth header using the header offset - // and header entry size. - file.seekg(headerOffset + (i * headerEntrySize)); - auto& header = headers_[i]; + /** + * Like the ELF Header, the ELF Program header is also defined + * using a struct: + * typedef struct { + * uint32_t p_type; + * uint32_t p_flags; + * Elf64_Off p_offset; + * Elf64_Addr p_vaddr; + * Elf64_Addr p_paddr; + * uint64_t p_filesz; + * uint64_t p_memsz; + * uint64_t p_align; + * } Elf64_Phdr; + * + * The ELF Program header table is an array of structures, + * each describing a segment or other information the system + * needs to prepare the program for execution. A segment + * contains one or more sections (ELF Program Section). + * + * The `p_vaddr` field holds the virtual address at which the first + * byte of the segment resides in memory and the `p_memsz` field + * holds the number of bytes in the memory image of the segment. + * It may be zero. The `p_offset` member holds the offset from the + * beginning of the file at which the first byte of the segment resides. + */ + // Each address-related field is 8 bytes in a 64-bit ELF file + const int fieldBytes = 8; + file.read(reinterpret_cast(&(header.type)), sizeof(header.type)); + file.seekg(4, std::ios::cur); // Skip flags + file.read(reinterpret_cast(&(header.offset)), fieldBytes); + file.read(reinterpret_cast(&(header.virtualAddress)), fieldBytes); + file.read(reinterpret_cast(&(header.physicalAddress)), fieldBytes); + file.read(reinterpret_cast(&(header.fileSize)), fieldBytes); + file.read(reinterpret_cast(&(header.memorySize)), fieldBytes); + + // To construct the process we look for the largest virtual address and + // add it to the memory size of the header. This way we obtain a very + // large array which can hold data at large virtual address. + // However, this way we end up creating a sparse array, in which most + // of the entries are unused. Also SimEng internally treats these + // virtual address as physical addresses to index into this large array. + if (header.virtualAddress + header.memorySize > processImageSize_) { + processImageSize_ = header.virtualAddress + header.memorySize; + } + } + + *imagePointer = (char*)malloc(processImageSize_ * sizeof(char)); /** - * Like the ELF Header, the ELF Program header is also defined - * using a struct: - * typedef struct { - * uint32_t p_type; - * uint32_t p_flags; - * Elf64_Off p_offset; - * Elf64_Addr p_vaddr; - * Elf64_Addr p_paddr; - * uint64_t p_filesz; - * uint64_t p_memsz; - * uint64_t p_align; - * } Elf64_Phdr; - * - * The ELF Program header table is an array of structures, - * each describing a segment or other information the system - * needs to prepare the program for execution. A segment - * contains one or more sections (ELF Program Section). - * - * The `p_vaddr` field holds the virtual address at which the first - * byte of the segment resides in memory and the `p_memsz` field - * holds the number of bytes in the memory image of the segment. - * It may be zero. The `p_offset` member holds the offset from the - * beginning of the file at which the first byte of the segment resides. + * The ELF Program header has a member called `p_type`, which represents + * the kind of data or memory segments described by the program header. + * The value PT_LOAD=1 represents a loadable segment. In other words, + * it contains initialized data that contributes to the program's + * memory image. */ - // Each address-related field is 8 bytes in a 64-bit ELF file - const int fieldBytes = 8; - file.read(reinterpret_cast(&(header.type)), sizeof(header.type)); - file.seekg(4, std::ios::cur); // Skip flags - file.read(reinterpret_cast(&(header.offset)), fieldBytes); - file.read(reinterpret_cast(&(header.virtualAddress)), fieldBytes); - file.read(reinterpret_cast(&(header.physicalAddress)), fieldBytes); - file.read(reinterpret_cast(&(header.fileSize)), fieldBytes); - file.read(reinterpret_cast(&(header.memorySize)), fieldBytes); - - // To construct the process we look for the largest virtual address and - // add it to the memory size of the header. This way we obtain a very - // large array which can hold data at large virtual address. - // However, this way we end up creating a sparse array, in which most - // of the entries are unused. Also SimEng internally treats these - // virtual address as physical addresses to index into this large array. - if (header.virtualAddress + header.memorySize > processImageSize_) { - processImageSize_ = header.virtualAddress + header.memorySize; + // Process headers; only observe LOAD sections for this basic implementation + for (const auto& header : headers_) { + if (header.type == 1) { // LOAD + file.seekg(header.offset); + // Read `fileSize` bytes from `file` into the appropriate place in process + // memory + file.read(*imagePointer + header.virtualAddress, header.fileSize); + } } - } + } else { + /** + * Starting from the 24th byte of the ELF header a 32-bit value + * represents the virtual address to which the system first transfers + * control, thus starting the process. + * In `elf32_hdr` this value maps to the member `Elf32_Addr e_entry`. + */ - *imagePointer = (char*)malloc(processImageSize_ * sizeof(char)); - /** - * The ELF Program header has a member called `p_type`, which represents - * the kind of data or memory segments described by the program header. - * The value PT_LOAD=1 represents a loadable segment. In other words, - * it contains initialized data that contributes to the program's - * memory image. - */ + // Seek to the entry point of the file. + // The information in between is discarded + file.seekg(0x18); + file.read(reinterpret_cast(&entryPoint32_), sizeof(entryPoint32_)); + + /** + * Starting from the 32nd byte of the ELF Header a 64-bit value + * represents the offset of the ELF Program header or + * Program header table in the ELF file. + * In `elf32_hdr` this value maps to the member `Elf32_Addr e_phoff`. + */ + + // Seek to the byte representing the start of the header offset table. + uint32_t headerOffset; + file.read(reinterpret_cast(&headerOffset), sizeof(headerOffset)); + + /** + * Starting 42th byte of the ELF Header a 16-bit value indicates + * the size of each entry in the ELF Program header. In the `elf32_hdr` + * struct this value maps to the member `Elf32_Half e_phentsize`. All + * header entries have the same size. + * Starting from the 44th byte a 16-bit value represents the number + * of header entries in the ELF Program header. In the `elf32_hdr` + * struct this value maps to `Elf32_Half e_phnum`. + */ + + // Seek to the byte representing header entry size. + file.seekg(0x2a); + uint16_t headerEntrySize; + file.read(reinterpret_cast(&headerEntrySize), sizeof(headerEntrySize)); + uint16_t headerEntries; + file.read(reinterpret_cast(&headerEntries), sizeof(headerEntries)); - // Process headers; only observe LOAD sections for this basic implementation - for (const auto& header : headers_) { - if (header.type == 1) { // LOAD - file.seekg(header.offset); - // Read `fileSize` bytes from `file` into the appropriate place in process - // memory - file.read(*imagePointer + header.virtualAddress, header.fileSize); + // Resize the header to equal the number of header entries. + headers32_.resize(headerEntries); + processImageSize_ = 0; + + // Loop over all headers and extract them. + for (size_t i = 0; i < headerEntries; i++) { + // Since all headers entries have the same size. + // We can extract the nth header using the header offset + // and header entry size. + file.seekg(headerOffset + (i * headerEntrySize)); + auto& header = headers32_[i]; + + /** + * Like the ELF Header, the ELF Program header is also defined + * using a struct: + * typedef struct { + * uint32_t p_type; + * Elf32_Off p_offset; + * Elf32_Addr p_vaddr; + * Elf32_Addr p_paddr; + * uint32_t p_filesz; + * uint32_t p_memsz; + * uint32_t p_flags; + * uint32_t p_align; + * } Elf32_Phdr; + * + * The ELF Program header table is an array of structures, + * each describing a segment or other information the system + * needs to prepare the program for execution. A segment + * contains one or more sections (ELF Program Section). + * + * The `p_vaddr` field holds the virtual address at which the first + * byte of the segment resides in memory and the `p_memsz` field + * holds the number of bytes in the memory image of the segment. + * It may be zero. The `p_offset` member holds the offset from the + * beginning of the file at which the first byte of the segment resides. + */ + + // Each address-related field is 4 bytes in a 32-bit ELF file + const int fieldBytes = 4; + file.read(reinterpret_cast(&(header.type)), sizeof(header.type)); + file.read(reinterpret_cast(&(header.offset)), fieldBytes); + file.read(reinterpret_cast(&(header.virtualAddress)), fieldBytes); + file.read(reinterpret_cast(&(header.physicalAddress)), fieldBytes); + file.read(reinterpret_cast(&(header.fileSize)), fieldBytes); + file.read(reinterpret_cast(&(header.memorySize)), fieldBytes); + + // To construct the process we look for the largest virtual address and + // add it to the memory size of the header. This way we obtain a very + // large array which can hold data at large virtual address. + // However, this way we end up creating a sparse array, in which most + // of the entries are unused. Also SimEng internally treats these + // virtual address as physical addresses to index into this large array. + if (header.virtualAddress + header.memorySize > processImageSize_) { + processImageSize_ = header.virtualAddress + header.memorySize; + } + } + + *imagePointer = (char*)malloc(processImageSize_ * sizeof(char)); + /** + * The ELF Program header has a member called `p_type`, which represents + * the kind of data or memory segments described by the program header. + * The value PT_LOAD=1 represents a loadable segment. In other words, + * it contains initialized data that contributes to the program's + * memory image. + */ + + // Process headers; only observe LOAD sections for this basic implementation + for (const auto& header : headers32_) { + if (header.type == 1) { // LOAD + file.seekg(header.offset); + // Read `fileSize` bytes from `file` into the appropriate place in process + // memory + file.read(*imagePointer + header.virtualAddress, header.fileSize); + } } } @@ -179,7 +300,12 @@ Elf::~Elf() {} uint64_t Elf::getProcessImageSize() const { return processImageSize_; } -uint64_t Elf::getEntryPoint() const { return entryPoint_; } +uint64_t Elf::getEntryPoint() const { + if (mode32bit_) { + return entryPoint32_; + } else + return entryPoint_; +} bool Elf::isValid() const { return isValid_; } diff --git a/src/lib/ModelConfig.cc b/src/lib/ModelConfig.cc index 60117a8053..88cc1f7d59 100644 --- a/src/lib/ModelConfig.cc +++ b/src/lib/ModelConfig.cc @@ -67,7 +67,7 @@ void ModelConfig::validate() { "Streaming-Vector-Length"}; validISA = nodeChecker( configFile_[root][subFields[0]], subFields[0], - std::vector({"AArch64", "rv64"}), ExpectedValue::String); + std::vector({"AArch64", "rv64", "rv32"}), ExpectedValue::String); nodeChecker(configFile_[root][subFields[1]], subFields[1], {"emulation", "inorderpipelined", "outoforder"}, ExpectedValue::String); @@ -146,7 +146,8 @@ void ModelConfig::validate() { 1, group.as().size())); configFile_["Ports"][i]["Instruction-Opcode-Support"][opcodeIndex] = opcode; - if (configFile_["Core"]["ISA"].as() == "rv64") { + if (configFile_["Core"]["ISA"].as() == "rv64" || + configFile_["Core"]["ISA"].as() == "rv32") { // Ensure opcode is between the bounds of 0 and Capstones' // RISCV_INSTRUCTION_LIST_END boundChecker( @@ -233,7 +234,8 @@ void ModelConfig::validate() { // TODO make as many subfields as possible generic to avoid repeated code // e.g. AArch64 FloatingPoint/SVE-Count -> FloatingPoint-Count - if (configFile_["Core"]["ISA"].as() == "rv64") { + if (configFile_["Core"]["ISA"].as() == "rv64" || + configFile_["Core"]["ISA"].as() == "rv32") { // Register-Set root = "Register-Set"; subFields = {"GeneralPurpose-Count", "FloatingPoint-Count"}; @@ -707,7 +709,8 @@ void ModelConfig::createGroupMapping() { "STORE_ADDRESS_SME", "STORE_DATA_SME", "STORE_SME"}; - } else if (configFile_["Core"]["ISA"].as() == "rv64") { + } else if (configFile_["Core"]["ISA"].as() == "rv64" || + configFile_["Core"]["ISA"].as() == "rv32") { groupOptions_ = {"INT", "INT_SIMPLE", "INT_SIMPLE_ARTH", diff --git a/src/lib/arch/aarch64/Architecture.cc b/src/lib/arch/aarch64/Architecture.cc index 08b807eb97..23ebf86ae3 100644 --- a/src/lib/arch/aarch64/Architecture.cc +++ b/src/lib/arch/aarch64/Architecture.cc @@ -325,6 +325,10 @@ void Architecture::setSVCRval(const uint64_t newVal) const { SVCRval_ = newVal; } +void Architecture::updateInstrTrace(const std::shared_ptr& instruction, + simeng::RegisterFileSet* regFile, uint64_t tick) const { + } + } // namespace aarch64 } // namespace arch } // namespace simeng diff --git a/src/lib/arch/riscv/Architecture.cc b/src/lib/arch/riscv/Architecture.cc index 5d18349dc5..d1a18777e8 100644 --- a/src/lib/arch/riscv/Architecture.cc +++ b/src/lib/arch/riscv/Architecture.cc @@ -16,7 +16,25 @@ std::forward_list Architecture::metadataCache; Architecture::Architecture(kernel::Linux& kernel, YAML::Node config) : linux_(kernel) { - cs_err n = cs_open(CS_ARCH_RISCV, CS_MODE_RISCV64, &capstoneHandle); + is32Bit_ = ARCH_64BIT; + if (config["Core"]["ISA"].as() == "rv32") { + is32Bit_ = ARCH_32BIT; + } + + cs_mode csMode = CS_MODE_RISCV64; + constantsPool constantsPool; + + if(is32Bit_) { + csMode = CS_MODE_RISCV32GC; // TODO Note: currently using local (1-line)modified capstone + constants_.alignMask = constantsPool.alignMaskCompressed; + constants_.regWidth = constantsPool.byteLength32; + constants_.bytesLimit = constantsPool.bytesLimitCompressed; + } else { + constants_.alignMask = constantsPool.alignMask; + constants_.regWidth = constantsPool.byteLength64; + constants_.bytesLimit = constantsPool.bytesLimit; + } + cs_err n = cs_open(CS_ARCH_RISCV, csMode, &capstoneHandle); if (n != CS_ERR_OK) { std::cerr << "[SimEng:Architecture] Could not create capstone handle due " "to error " @@ -26,6 +44,16 @@ Architecture::Architecture(kernel::Linux& kernel, YAML::Node config) cs_option(capstoneHandle, CS_OPT_DETAIL, CS_OPT_ON); + // Generate zero-indexed system register map + systemRegisterMap_[SYSREG_MSTATUS] = systemRegisterMap_.size(); + systemRegisterMap_[SYSREG_MSTATUSH] = systemRegisterMap_.size(); + systemRegisterMap_[SYSREG_MEPC] = systemRegisterMap_.size(); + systemRegisterMap_[SYSREG_MCAUSE] = systemRegisterMap_.size(); + systemRegisterMap_[SYSREG_MHARTID] = systemRegisterMap_.size(); + systemRegisterMap_[SYSREG_CYCLE] = systemRegisterMap_.size(); + systemRegisterMap_[SYSREG_TIME] = systemRegisterMap_.size(); + systemRegisterMap_[SYSREG_INSTRRET] = systemRegisterMap_.size(); + // Instantiate an executionInfo entry for each group in the InstructionGroup // namespace. for (int i = 0; i < NUM_GROUPS; i++) { @@ -117,19 +145,28 @@ Architecture::Architecture(kernel::Linux& kernel, YAML::Node config) } } } + if (config["Core"]["Trace"].as()) { + traceFile_ = new std::ofstream(); + traceFile_->open("./trace.log"); + traceOn_ = true; + } } Architecture::~Architecture() { cs_close(&capstoneHandle); decodeCache.clear(); metadataCache.clear(); groupExecutionInfo_.clear(); + if(traceOn_) { + traceFile_->close(); + } } uint8_t Architecture::predecode(const void* ptr, uint8_t bytesAvailable, uint64_t instructionAddress, MacroOp& output) const { // Check that instruction address is 4-byte aligned as required by RISC-V - if (instructionAddress & 0x3) { + // 2-byte when Compressed ISA is supported + if (instructionAddress & constants_.alignMask) { // Consume 1-byte and raise a misaligned PC exception auto metadata = InstructionMetadata((uint8_t*)ptr, 1); metadataCache.emplace_front(metadata); @@ -142,8 +179,8 @@ uint8_t Architecture::predecode(const void* ptr, uint8_t bytesAvailable, return 1; } - assert(bytesAvailable >= 4 && - "Fewer than 4 bytes supplied to RISC-V decoder"); + assert(bytesAvailable >= constants_.bytesLimit && + "Fewer than bytes limit supplied to RISC-V decoder"); // Dereference the instruction pointer to obtain the instruction word uint32_t insn; @@ -175,6 +212,8 @@ uint8_t Architecture::predecode(const void* ptr, uint8_t bytesAvailable, Instruction newInsn(*this, metadataCache.front()); // Set execution information for this instruction newInsn.setExecutionInfo(getExecutionInfo(newInsn)); + // Set byte length in instruction + newInsn.setArchRegWidth(constants_.regWidth); // Cache the instruction iter = decodeCache.insert({insn, newInsn}).first; } @@ -187,7 +226,7 @@ uint8_t Architecture::predecode(const void* ptr, uint8_t bytesAvailable, uop->setInstructionAddress(instructionAddress); - return 4; + return iter->second.getMetadata().lenBytes; } executionInfo Architecture::getExecutionInfo(Instruction& insn) const { @@ -216,9 +255,9 @@ std::vector Architecture::getRegisterFileStructures() const { uint16_t numSysRegs = static_cast(systemRegisterMap_.size()); return { - {8, 32}, // General purpose - {8, 32}, // Floating Point - {8, numSysRegs}, // System + {constants_.regWidth, 32}, // General purpose + {constants_.regWidth, 32}, // Floating Point + {constants_.regWidth, numSysRegs}, // System }; } @@ -234,12 +273,17 @@ ProcessStateChange Architecture::getInitialState() const { ProcessStateChange changes; // Set ProcessStateChange type changes.type = ChangeType::REPLACEMENT; - - uint64_t stackPointer = linux_.getInitialStackPointer(); - // Set the stack pointer register changes.modifiedRegisters.push_back({RegisterType::GENERAL, 2}); - changes.modifiedRegisterValues.push_back(stackPointer); - + uint64_t stackPointer; + // TODO: check if this conditional expression is needed + if(is32Bit_) { + stackPointer = (uint32_t)linux_.getInitialStackPointer(); + changes.modifiedRegisterValues.push_back((uint32_t)stackPointer); + } else + { + stackPointer = linux_.getInitialStackPointer(); + changes.modifiedRegisterValues.push_back(stackPointer); + } return changes; } @@ -247,9 +291,9 @@ uint8_t Architecture::getMaxInstructionSize() const { return 4; } std::vector Architecture::getConfigPhysicalRegisterStructure(YAML::Node config) const { - return {{8, config["Register-Set"]["GeneralPurpose-Count"].as()}, - {8, config["Register-Set"]["FloatingPoint-Count"].as()}, - {8, getNumSystemRegisters()}}; + return {{constants_.regWidth, config["Register-Set"]["GeneralPurpose-Count"].as()}, + {constants_.regWidth, config["Register-Set"]["FloatingPoint-Count"].as()}, + {constants_.regWidth, getNumSystemRegisters()}}; } std::vector Architecture::getConfigPhysicalRegisterQuantities( @@ -267,6 +311,76 @@ void Architecture::updateSystemTimerRegisters(RegisterFileSet* regFile, const uint64_t iterations) const { } +void Architecture::updateInstrTrace(const std::shared_ptr& instruction, + RegisterFileSet* regFile, uint64_t tick) const { + if(traceOn_) { + Instruction instr_ = *static_cast(instruction.get()); + auto& metadata = instr_.getMetadata(); + std::stringstream s; + s << "0x" << std::hex << instr_.getInstructionAddress() << " "; + if (tick < 100000000) + s << "t(" << std::setfill('0') << std::setw(8) << std::dec << (uint32_t)tick << ") "; + else + s << "t(" << std::setfill('0') << std::setw(16) << std::dec << (uint32_t)tick << ") "; + s << "("; + if(metadata.len == IL_16B) { + s << "0000"; + } + for(int8_t i=metadata.lenBytes; i>0; i--) { + s << std::hex << std::setfill('0') << std::setw(2) << static_cast(metadata.encoding[i-1]); + } + s << ") "; + s << metadata.mnemonic << " " << metadata.operandStr; + auto sources = instr_.getOperandRegisters(); + auto destinations = instr_.getDestinationRegisters(); + int8_t num_src = (int8_t)sources.size(); + int8_t num_dest = (int8_t)destinations.size(); + if((num_src + num_dest) >0) { + s << " "; + if (num_dest > 0) { + s << "(d: "; + for(int8_t i=0;iget(reg).get(); + if(i < (num_dest-1)) { + s << " "; + } + } + s << ") "; + } + if (num_src > 0) { + s << "(s: "; + for(int8_t i=0;iget(reg).get(); + if(i < (num_src-1)) { + s << " "; + } + } + s << ") "; + } + } + s << std::endl; + *traceFile_ << s.str(); + traceFile_->flush(); //Helps with debugging sometimes as all the state of previous committed instr is written to file. + } +} +archConstants Architecture::getConstants() const { return constants_; } + } // namespace riscv } // namespace arch } // namespace simeng diff --git a/src/lib/arch/riscv/ExceptionHandler.cc b/src/lib/arch/riscv/ExceptionHandler.cc index 8f76c4cc3a..c88448048d 100644 --- a/src/lib/arch/riscv/ExceptionHandler.cc +++ b/src/lib/arch/riscv/ExceptionHandler.cc @@ -97,7 +97,8 @@ bool ExceptionHandler::init() { } case 57: { // close int64_t fd = registerFileSet.get(R0).get(); - stateChange = {ChangeType::REPLACEMENT, {R0}, {linux_.close(fd)}}; + stateChange = {ChangeType::REPLACEMENT, {R0}}; + stateChange.modifiedRegisterValues.push_back(RegisterValue(linux_.close(fd), instruction_.getArchRegWidth())); break; } case 61: { // getdents64 @@ -185,7 +186,8 @@ bool ExceptionHandler::init() { return readBufferThen(bufPtr, count, [=]() { int64_t retval = linux_.write(fd, dataBuffer.data(), count); ProcessStateChange stateChange = { - ChangeType::REPLACEMENT, {R0}, {retval}}; + ChangeType::REPLACEMENT, {R0}}; + stateChange.modifiedRegisterValues.push_back(RegisterValue(retval, instruction_.getArchRegWidth())); return concludeSyscall(stateChange); }); } @@ -354,7 +356,8 @@ bool ExceptionHandler::init() { kernel::stat statOut; stateChange = { - ChangeType::REPLACEMENT, {R0}, {linux_.fstat(fd, statOut)}}; + ChangeType::REPLACEMENT, {R0}}; + stateChange.modifiedRegisterValues.push_back(RegisterValue(linux_.fstat(fd, statOut), instruction_.getArchRegWidth())); stateChange.memoryAddresses.push_back({statbufPtr, sizeof(statOut)}); stateChange.memoryAddressValues.push_back(statOut); break; @@ -554,7 +557,8 @@ bool ExceptionHandler::init() { case 214: { // brk auto result = linux_.brk(registerFileSet.get(R0).get()); stateChange = { - ChangeType::REPLACEMENT, {R0}, {static_cast(result)}}; + ChangeType::REPLACEMENT, {R0}}; + stateChange.modifiedRegisterValues.push_back(RegisterValue(static_cast(result), instruction_.getArchRegWidth())); break; } case 215: { // munmap @@ -823,6 +827,9 @@ void ExceptionHandler::printException(const Instruction& insn) const { case InstructionException::NoAvailablePort: std::cout << "unsupported execution port"; break; + case InstructionException::UnmappedSysReg: + std::cout << "unmapped system register"; + break; default: std::cout << "unknown (id: " << static_cast(exception) << ")"; @@ -835,9 +842,9 @@ void ExceptionHandler::printException(const Instruction& insn) const { << insn.getInstructionAddress() << ": "; auto& metadata = insn.getMetadata(); - for (uint8_t byte : metadata.encoding) { + for (int8_t i = metadata.lenBytes; i > 0; i--) { std::cout << std::setfill('0') << std::setw(2) - << static_cast(byte) << " "; + << static_cast(metadata.encoding[i-1]); } std::cout << std::dec << " "; if (exception == InstructionException::EncodingUnallocated) { diff --git a/src/lib/arch/riscv/Instruction.cc b/src/lib/arch/riscv/Instruction.cc index 530890e9a6..6cfc173b9d 100644 --- a/src/lib/arch/riscv/Instruction.cc +++ b/src/lib/arch/riscv/Instruction.cc @@ -11,6 +11,8 @@ namespace arch { namespace riscv { const Register Instruction::ZERO_REGISTER = {RegisterType::GENERAL, 0}; +const Register Instruction::RA_REGISTER = {RegisterType::GENERAL, 1}; +const Register Instruction::SP_REGISTER = {RegisterType::GENERAL, 2}; Instruction::Instruction(const Architecture& architecture, const InstructionMetadata& metadata) @@ -165,6 +167,10 @@ const std::vector& Instruction::getSupportedPorts() { const InstructionMetadata& Instruction::getMetadata() const { return metadata; } +void Instruction::setArchRegWidth(uint8_t len) { archRegWidth_ = len; } + +uint8_t Instruction::getArchRegWidth() const { return archRegWidth_; } + } // namespace riscv } // namespace arch } // namespace simeng diff --git a/src/lib/arch/riscv/InstructionMetadata.cc b/src/lib/arch/riscv/InstructionMetadata.cc index 595f5f6ece..f2b5a9b736 100644 --- a/src/lib/arch/riscv/InstructionMetadata.cc +++ b/src/lib/arch/riscv/InstructionMetadata.cc @@ -14,7 +14,9 @@ InstructionMetadata::InstructionMetadata(const cs_insn& insn) implicitSourceCount(insn.detail->regs_read_count), implicitDestinationCount(insn.detail->regs_write_count), operandCount(insn.detail->riscv.op_count) { - std::memcpy(encoding, insn.bytes, sizeof(encoding)); + setLength(insn.size); + std::memset(encoding, 0, 4); + std::memcpy(encoding, insn.bytes, insn.size); // Copy printed output std::strncpy(mnemonic, insn.mnemonic, CS_MNEMONIC_SIZE); operandStr = std::string(insn.op_str); @@ -36,7 +38,7 @@ InstructionMetadata::InstructionMetadata(const uint8_t* invalidEncoding, opcode(Opcode::RISCV_INSTRUCTION_LIST_END), implicitSourceCount(0), implicitDestinationCount(0), - operandCount(0) { + operandCount(0), len(IL_INVALID) { assert(bytes <= sizeof(encoding)); std::memcpy(encoding, invalidEncoding, bytes); mnemonic[0] = '\0'; @@ -252,6 +254,28 @@ void InstructionMetadata::alterPseudoInstructions(const cs_insn& insn) { } break; } + case Opcode::RISCV_CSRRW: + case Opcode::RISCV_CSRRS: + case Opcode::RISCV_CSRRC: + case Opcode::RISCV_CSRRWI: + case Opcode::RISCV_CSRRSI: + case Opcode::RISCV_CSRRCI: { + //Extract CSR info + csr = ((uint32_t)encoding[3] << 4) | ((uint32_t)encoding[2] >> 4); + //If there are less than 2 operands provided add necessary x0 operand + if(operandCount == 1) { + if(strcmp(mnemonic, "csrr") == 0) { //csrrs rd,csr,x0 + operands[1].type = RISCV_OP_REG; + operands[1].reg = 1; + } else { //csrrxx x0,csr,rs/imm + operands[1] = operands[0]; + operands[0].type = RISCV_OP_REG; + operands[0].reg = 1; + } + operandCount = 2; + } + break; + } } } @@ -278,6 +302,16 @@ void InstructionMetadata::includeZeroRegisterPosZero() { operandCount = 3; } + +void InstructionMetadata::setLength(uint8_t size) { + lenBytes = size; + switch(size) { + case 2: len = IL_16B; break; + case 4: len = IL_32B; break; + default: len = IL_INVALID; + } +} + } // namespace riscv } // namespace arch } // namespace simeng \ No newline at end of file diff --git a/src/lib/arch/riscv/InstructionMetadata.hh b/src/lib/arch/riscv/InstructionMetadata.hh index af5bebf815..4ce164a346 100644 --- a/src/lib/arch/riscv/InstructionMetadata.hh +++ b/src/lib/arch/riscv/InstructionMetadata.hh @@ -14,6 +14,12 @@ namespace Opcode { #include "RISCVGenInstrInfo.inc" } // namespace Opcode +enum INSTR_LENGTH { + IL_16B, + IL_32B, + IL_INVALID +}; + /** A simplified RISC-V-only version of the Capstone instruction structure. */ struct InstructionMetadata { public: @@ -70,6 +76,13 @@ struct InstructionMetadata { /** The number of explicit operands. */ uint8_t operandCount; + /** The instruction length for variable instruction length support. */ + INSTR_LENGTH len; + uint8_t lenBytes; + + /** RISC-V CSR encoding */ + uint32_t csr = 0; + private: /** Detect instruction aliases and update metadata to match the de-aliased * instruction. */ @@ -85,6 +98,9 @@ struct InstructionMetadata { /** RISC-V helper function * Use register zero as operands[0] and immediate value as operands[2] */ void includeZeroRegisterPosZero(); + + /** Set the byte length of instruction */ + void setLength(uint8_t size); }; } // namespace riscv diff --git a/src/lib/arch/riscv/Instruction_address.cc b/src/lib/arch/riscv/Instruction_address.cc index e893ce3644..52ee7484c0 100644 --- a/src/lib/arch/riscv/Instruction_address.cc +++ b/src/lib/arch/riscv/Instruction_address.cc @@ -31,7 +31,7 @@ span Instruction::generateAddresses() { setMemoryAddresses({{address, 4}}); } else { // Double - setMemoryAddresses({{address, 8}}); + setMemoryAddresses({{address, archRegWidth_}}); } return getGeneratedAddresses(); } @@ -40,7 +40,7 @@ span Instruction::generateAddresses() { case Opcode::RISCV_SD: [[fallthrough]]; case Opcode::RISCV_LD: { - setMemoryAddresses({{address, 8}}); + setMemoryAddresses({{address, archRegWidth_}}); break; } case Opcode::RISCV_SW: @@ -86,7 +86,7 @@ span Instruction::generateAddresses() { case Opcode::RISCV_LR_D_RL: [[fallthrough]]; case Opcode::RISCV_LR_D_AQ_RL: { - setMemoryAddresses({{operands[0].get(), 8}}); + setMemoryAddresses({{operands[0].get(), archRegWidth_}}); break; } case Opcode::RISCV_SC_W: @@ -106,9 +106,22 @@ span Instruction::generateAddresses() { case Opcode::RISCV_SC_D_RL: [[fallthrough]]; case Opcode::RISCV_SC_D_AQ_RL: { - setMemoryAddresses({{operands[1].get(), 8}}); + setMemoryAddresses({{operands[1].get(), archRegWidth_}}); break; } + case Opcode::RISCV_C_LW: + case Opcode::RISCV_C_FLWSP: + case Opcode::RISCV_C_LWSP: { + setMemoryAddresses({{operands[0].get() + c_imm, 4}}); + break; + } + case Opcode::RISCV_C_SW: + case Opcode::RISCV_C_FSWSP: + case Opcode::RISCV_C_SWSP: { + setMemoryAddresses({{operands[1].get() + c_imm, 4}}); + break; + } + default: exceptionEncountered_ = true; exception_ = InstructionException::ExecutionNotYetImplemented; diff --git a/src/lib/arch/riscv/Instruction_decode.cc b/src/lib/arch/riscv/Instruction_decode.cc index 6db263796b..8bdd5041eb 100644 --- a/src/lib/arch/riscv/Instruction_decode.cc +++ b/src/lib/arch/riscv/Instruction_decode.cc @@ -60,6 +60,15 @@ void Instruction::invalidateIfNotImplemented() { return; if (metadata.opcode == Opcode::RISCV_FENCE) return; + //C Extention + if (metadata.opcode >= Opcode::RISCV_C_ADD && + metadata.opcode <= Opcode::RISCV_C_XOR) + return; + //CSR operations + if (metadata.opcode >= Opcode::RISCV_CSRRC && + metadata.opcode <= Opcode::RISCV_CSRRWI) + return; + exception_ = InstructionException::EncodingUnallocated; exceptionEncountered_ = true; return; @@ -77,6 +86,15 @@ void Instruction::decode() { return; } + //Handle Compressed instruction separately for now. + if (decode16()) { + return; + } + + if (decodeCsr()) { + return; + } + // Identify branches switch (metadata.opcode) { case Opcode::RISCV_BEQ: @@ -153,7 +171,7 @@ void Instruction::decode() { if (sourceRegisters[sourceRegisterCount] == Instruction::ZERO_REGISTER) { // Catch zero register references and pre-complete those operands - operands[sourceRegisterCount] = RegisterValue(0, 8); + operands[sourceRegisterCount] = RegisterValue(0, architecture_.getConstants().regWidth); } else { operandsPending++; } @@ -195,7 +213,7 @@ void Instruction::decode() { if (sourceRegisters[sourceRegisterCount] == Instruction::ZERO_REGISTER) { // Catch zero register references and pre-complete those operands - operands[sourceRegisterCount] = RegisterValue(0, 8); + operands[sourceRegisterCount] = RegisterValue(0, architecture_.getConstants().regWidth); } else { operandsPending++; } @@ -258,6 +276,297 @@ void Instruction::decode() { } } +bool Instruction::decode16() { + if (metadata.len != IL_16B) { + return false; + } + + switch (metadata.opcode) { + case Opcode::RISCV_C_JR: + case Opcode::RISCV_C_JALR: + isBranch_ = true; + instFormat_ = CIF_CR; + assert(metadata.operandCount==1 && + metadata.operands[0].type == RISCV_OP_REG && + csRegToRegister(metadata.operands[0].reg) != Instruction::ZERO_REGISTER && + "Invalid operand for JR,JALR:- CR instructions"); + sourceRegisters[sourceRegisterCount++] = csRegToRegister(metadata.operands[0].reg); + operandsPending++; + if (metadata.opcode == Opcode::RISCV_C_JALR) { + destinationRegisters[destinationRegisterCount++] = Instruction::RA_REGISTER; + } + branchType_ = BranchType::Unconditional; + break; + case Opcode::RISCV_C_MV: + instFormat_ = CIF_CR; + assert(metadata.operandCount==2 && + metadata.operands[0].type == RISCV_OP_REG && + metadata.operands[1].type == RISCV_OP_REG && + csRegToRegister(metadata.operands[0].reg) != Instruction::ZERO_REGISTER && + csRegToRegister(metadata.operands[1].reg) != Instruction::ZERO_REGISTER && + "Invalid operand for MV:- CR instructions"); + destinationRegisters[destinationRegisterCount++] = csRegToRegister(metadata.operands[0].reg); + sourceRegisters[sourceRegisterCount++] = csRegToRegister(metadata.operands[1].reg); + operandsPending++; + break; + case Opcode::RISCV_C_EBREAK://TODO + instFormat_ = CIF_CR; + break; + case Opcode::RISCV_C_ADD: + instFormat_ = CIF_CR; + assert(metadata.operandCount==2 && + metadata.operands[0].type == RISCV_OP_REG && + metadata.operands[1].type == RISCV_OP_REG && + csRegToRegister(metadata.operands[0].reg) != Instruction::ZERO_REGISTER && + csRegToRegister(metadata.operands[1].reg) != Instruction::ZERO_REGISTER && + "Invalid operand for MV:- CR instructions"); + destinationRegisters[destinationRegisterCount++] = csRegToRegister(metadata.operands[0].reg); + sourceRegisters[sourceRegisterCount++] = csRegToRegister(metadata.operands[0].reg); + operandsPending++; + sourceRegisters[sourceRegisterCount++] = csRegToRegister(metadata.operands[1].reg); + operandsPending++; + break; + case Opcode::RISCV_C_NOP://TODO + instFormat_ = CIF_CI; + break; + case Opcode::RISCV_C_ADDI: + //case Opcode::RISCV_C_ADDIW: + case Opcode::RISCV_C_LI: + case Opcode::RISCV_C_ADDI16SP: + case Opcode::RISCV_C_LUI: + case Opcode::RISCV_C_SLLI: + instFormat_ = CIF_CI; + assert(metadata.operandCount==2 && + metadata.operands[0].type == RISCV_OP_REG && + metadata.operands[1].type == RISCV_OP_IMM && + csRegToRegister(metadata.operands[0].reg) != Instruction::ZERO_REGISTER && + !(metadata.opcode == Opcode::RISCV_C_LUI && csRegToRegister(metadata.operands[0].reg) == Instruction::SP_REGISTER) && + !(metadata.opcode == Opcode::RISCV_C_ADDI16SP && csRegToRegister(metadata.operands[0].reg) != Instruction::SP_REGISTER) && + "Invalid operand for CI instructions"); + if (metadata.opcode != Opcode::RISCV_C_LUI && metadata.opcode != Opcode::RISCV_C_LI ) { + sourceRegisters[sourceRegisterCount++] = csRegToRegister(metadata.operands[0].reg); + operandsPending++; + } + destinationRegisters[destinationRegisterCount++] = csRegToRegister(metadata.operands[0].reg); + c_imm = metadata.operands[1].imm; + break; + case Opcode::RISCV_C_ADDI4SPN: + instFormat_ = CIF_CIW; + assert(metadata.operandCount==3 && + metadata.operands[0].type == RISCV_OP_REG && + metadata.operands[1].type == RISCV_OP_REG && + metadata.operands[2].type == RISCV_OP_IMM && + metadata.operands[2].imm != 0 && + metadata.operands[1].reg == 0x3 && + "Invalid operand for CIW instructions"); + sourceRegisters[sourceRegisterCount++] = Instruction::SP_REGISTER; + operandsPending++; + c_imm = metadata.operands[2].imm; + destinationRegisters[destinationRegisterCount++] = csRegToRegister(metadata.operands[0].reg); + break; + case Opcode::RISCV_C_SUB: + case Opcode::RISCV_C_XOR: + case Opcode::RISCV_C_OR: + case Opcode::RISCV_C_AND: + //case Opcode::RISCV_C_SUBW: + //case Opcode::RISCV_C_ADDW: + instFormat_ = CIF_CA; + assert(metadata.operandCount==2 && + metadata.operands[0].type == RISCV_OP_REG && + metadata.operands[1].type == RISCV_OP_REG && + metadata.operands[0].reg > 8 && + metadata.operands[1].reg > 8 && + "Invalid operand for CA instructions"); + destinationRegisters[destinationRegisterCount++] = csRegToRegister(metadata.operands[0].reg); + sourceRegisters[sourceRegisterCount++] = csRegToRegister(metadata.operands[0].reg); + operandsPending++; + sourceRegisters[sourceRegisterCount++] = csRegToRegister(metadata.operands[1].reg); + operandsPending++; + break; + case Opcode::RISCV_C_SRAI: + case Opcode::RISCV_C_SRLI: + case Opcode::RISCV_C_ANDI: + instFormat_ = CIF_CB; + assert(metadata.operandCount==2 && + metadata.operands[0].type == RISCV_OP_REG && + metadata.operands[1].type == RISCV_OP_IMM && + metadata.operands[0].reg > 8 && + "Invalid operand for CI instructions"); + sourceRegisters[sourceRegisterCount++] = csRegToRegister(metadata.operands[0].reg); + operandsPending++; + destinationRegisters[destinationRegisterCount++] = csRegToRegister(metadata.operands[0].reg); + c_imm = metadata.operands[1].imm; + break; + case Opcode::RISCV_C_BEQZ: + case Opcode::RISCV_C_BNEZ: + isBranch_ = true; + instFormat_ = CIF_CB; + assert(metadata.operandCount==2 && + metadata.operands[0].type == RISCV_OP_REG && + metadata.operands[1].type == RISCV_OP_IMM && + "Invalid operand for CB instructions"); + sourceRegisters[sourceRegisterCount++] = csRegToRegister(metadata.operands[0].reg); + //No zero register check required. can assert for register >=X8 + operandsPending++; + c_imm = metadata.operands[1].imm; + branchType_ = BranchType::Conditional; + knownTarget_ = instructionAddress_ + metadata.operands[1].imm; + break; + case Opcode::RISCV_C_FLD: + case Opcode::RISCV_C_FLW: + case Opcode::RISCV_C_LD: + case Opcode::RISCV_C_LW: + instFormat_ = CIF_CL; + isLoad_ = true; + assert(metadata.operandCount==3 && + metadata.operands[0].type == RISCV_OP_REG && + metadata.operands[1].type == RISCV_OP_IMM && + metadata.operands[2].type == RISCV_OP_REG && + metadata.operands[0].reg > 8 && + metadata.operands[2].reg > 8 && + "Invalid operand for CL instructions"); + destinationRegisters[destinationRegisterCount++] = csRegToRegister(metadata.operands[0].reg); + c_imm = metadata.operands[1].imm; + sourceRegisters[sourceRegisterCount++] = csRegToRegister(metadata.operands[2].reg); + operandsPending++; + break; + //case Opcode::RISCV_C_FLDSP: + //case Opcode::RISCV_C_FLWSP: + case Opcode::RISCV_C_LWSP: + //case Opcode::RISCV_C_LDSP: + instFormat_ = CIF_CI; + isLoad_ = true; + assert(metadata.operandCount==3 && + metadata.operands[0].type == RISCV_OP_REG && + metadata.operands[1].type == RISCV_OP_IMM && + metadata.operands[2].type == RISCV_OP_REG && + csRegToRegister(metadata.operands[0].reg) != Instruction::ZERO_REGISTER && + metadata.operands[2].reg == 0x3 && + "Invalid operand for CI instructions"); + destinationRegisters[destinationRegisterCount++] = csRegToRegister(metadata.operands[0].reg); + c_imm = metadata.operands[1].imm; + sourceRegisters[sourceRegisterCount++] = csRegToRegister(metadata.operands[2].reg); + operandsPending++; + break; + case Opcode::RISCV_C_FSD: + case Opcode::RISCV_C_FSW: + case Opcode::RISCV_C_SW: + case Opcode::RISCV_C_SD: + instFormat_ = CIF_CS; + isStore_ = true; + assert(metadata.operandCount==3 && + metadata.operands[0].type == RISCV_OP_REG && + metadata.operands[1].type == RISCV_OP_IMM && + metadata.operands[2].type == RISCV_OP_REG && + metadata.operands[0].reg > 8 && + metadata.operands[2].reg > 8 && + "Invalid operand for CS instructions"); + sourceRegisters[sourceRegisterCount++] = csRegToRegister(metadata.operands[0].reg); + operandsPending++; + c_imm = metadata.operands[1].imm; + sourceRegisters[sourceRegisterCount++] = csRegToRegister(metadata.operands[2].reg); + operandsPending++; + break; + //case Opcode::RISCV_C_FSDSP: + //case Opcode::RISCV_C_FSWSP: + case Opcode::RISCV_C_SWSP: + //case Opcode::RISCV_C_SDSP: + instFormat_ = CIF_CSS; + isStore_ = true; + assert(metadata.operandCount==3 && + metadata.operands[0].type == RISCV_OP_REG && + metadata.operands[1].type == RISCV_OP_IMM && + metadata.operands[2].type == RISCV_OP_REG && + metadata.operands[2].reg == 0x3 && + "Invalid operand for CSS instructions"); + sourceRegisters[sourceRegisterCount] = csRegToRegister(metadata.operands[0].reg); + if (sourceRegisters[sourceRegisterCount] == + Instruction::ZERO_REGISTER) { + // Catch zero register references and pre-complete those operands + operands[sourceRegisterCount] = RegisterValue(0, 4); + } else { + operandsPending++; + } + sourceRegisterCount++; + c_imm = metadata.operands[1].imm; + sourceRegisters[sourceRegisterCount] = csRegToRegister(metadata.operands[2].reg); + sourceRegisterCount++; + operandsPending++; + break; + case Opcode::RISCV_C_J: + case Opcode::RISCV_C_JAL: + instFormat_ = CIF_CJ; + isBranch_ = true; + //Add assertion when first operand is not of type imm + assert(metadata.operandCount==1 && + metadata.operands[0].type == RISCV_OP_IMM && "Invalid operand for CJ instructions"); + c_imm = metadata.operands[0].imm; + if (metadata.opcode == Opcode::RISCV_C_JAL) { + destinationRegisters[destinationRegisterCount++] = Instruction::RA_REGISTER; + } + branchType_ = BranchType::Unconditional; + knownTarget_ = instructionAddress_ + metadata.operands[0].imm; + break; + case Opcode::RISCV_C_UNIMP: + break; + } + + assert(instFormat_!= CIF_INVALID && "Invalid format defined for a RISCV compressed instruction"); + return true; +} + +bool Instruction::decodeCsr() { + //CSR operations + if (!(metadata.opcode >= Opcode::RISCV_CSRRC && + metadata.opcode <= Opcode::RISCV_CSRRWI)) { + return false; + } + + isCsr_ = true; + uint32_t sysRegTag = architecture_.getSystemRegisterTag(metadata.csr); + if (sysRegTag == -1) { + exceptionEncountered_ = true; + exception_ = InstructionException::UnmappedSysReg; + sourceRegisterCount = 0; + destinationRegisterCount = 0; + return true; + } + + // CSR becomes first source and destination + sourceRegisters[sourceRegisterCount++] = { + RegisterType::SYSTEM, static_cast(sysRegTag)}; + operandsPending++; + destinationRegisters[destinationRegisterCount++] = { + RegisterType::SYSTEM, static_cast(sysRegTag)}; + + // First operand from metadata is rd, second operand from metadata is rs1 + if (csRegToRegister(metadata.operands[1].reg) != Instruction::ZERO_REGISTER) { + destinationRegisters[destinationRegisterCount++] = + csRegToRegister(metadata.operands[1].reg); + } + + if(metadata.operands[0].type == RISCV_OP_IMM) { + c_imm = metadata.operands[0].imm; + } else if (metadata.operands[0].type == RISCV_OP_REG) { + sourceRegisters[sourceRegisterCount] = csRegToRegister(metadata.operands[0].reg); + if (sourceRegisters[sourceRegisterCount] == + Instruction::ZERO_REGISTER) { + // Catch zero register references and pre-complete those operands + operands[sourceRegisterCount] = RegisterValue(0, 4); + } else { + operandsPending++; + } + sourceRegisterCount++; + } else { + exceptionEncountered_ = true; + exception_ = InstructionException::EncodingNotYetImplemented; + sourceRegisterCount = 0; + destinationRegisterCount = 0; + } + + return true; +} + } // namespace riscv } // namespace arch } // namespace simeng \ No newline at end of file diff --git a/src/lib/arch/riscv/Instruction_execute.cc b/src/lib/arch/riscv/Instruction_execute.cc index 005982a9fc..b7a4a822b4 100644 --- a/src/lib/arch/riscv/Instruction_execute.cc +++ b/src/lib/arch/riscv/Instruction_execute.cc @@ -62,6 +62,10 @@ uint64_t zeroExtend(uint64_t bits, uint64_t msb) { return rightShift; } +inline int64_t Instruction::getSignedInt(RegisterValue& value) const { + return (archRegWidth_ == 4) ? (int64_t)value.get() : value.get(); +} + void Instruction::executionNYI() { exceptionEncountered_ = true; exception_ = InstructionException::ExecutionNotYetImplemented; @@ -79,32 +83,38 @@ void Instruction::execute() { executed_ = true; switch (metadata.opcode) { case Opcode::RISCV_LB: { // LB rd,rs1,imm - results[0] = RegisterValue(bitExtend(memoryData[0].get(), 8), 8); + results[0] = RegisterValue(bitExtend(memoryData[0].get(), 8), + archRegWidth_); break; } case Opcode::RISCV_LBU: { // LBU rd,rs1,imm results[0] = - RegisterValue(zeroExtend(memoryData[0].get(), 8), 8); + RegisterValue(zeroExtend(memoryData[0].get(), 8), + archRegWidth_); break; } case Opcode::RISCV_LH: { // LH rd,rs1,imm results[0] = - RegisterValue(bitExtend(memoryData[0].get(), 16), 8); + RegisterValue(bitExtend(memoryData[0].get(), 16), + archRegWidth_); break; } case Opcode::RISCV_LHU: { // LHU rd,rs1,imm results[0] = - RegisterValue(zeroExtend(memoryData[0].get(), 16), 8); + RegisterValue(zeroExtend(memoryData[0].get(), 16), + archRegWidth_); break; } case Opcode::RISCV_LW: { // LW rd,rs1,imm results[0] = - RegisterValue(bitExtend(memoryData[0].get(), 32), 8); + RegisterValue(bitExtend(memoryData[0].get(), 32), + archRegWidth_); break; } case Opcode::RISCV_LWU: { // LWU rd,rs1,imm results[0] = - RegisterValue(zeroExtend(memoryData[0].get(), 32), 8); + RegisterValue(zeroExtend(memoryData[0].get(), 32), + archRegWidth_); break; } case Opcode::RISCV_LD: { // LD rd,rs1,imm @@ -123,19 +133,19 @@ void Instruction::execute() { break; } case Opcode::RISCV_SLL: { // SLL rd,rs1,rs2 - const int64_t rs1 = operands[0].get(); + const int64_t rs1 = getSignedInt(operands[0]); const int64_t rs2 = - operands[1].get() & 63; // Only use lowest 6 bits + getSignedInt(operands[1]) & 63; // Only use lowest 6 bits int64_t out = static_cast(rs1 << rs2); - results[0] = RegisterValue(out, 8); + results[0] = RegisterValue(out, archRegWidth_); break; } case Opcode::RISCV_SLLI: { // SLLI rd,rs1,shamt - const int64_t rs1 = operands[0].get(); + const int64_t rs1 = getSignedInt(operands[0]); const int64_t shamt = metadata.operands[2].imm & 63; // Only use lowest 6 bits int64_t out = static_cast(rs1 << shamt); - results[0] = RegisterValue(out, 8); + results[0] = RegisterValue(out, archRegWidth_); break; } case Opcode::RISCV_SLLW: { // SLLW rd,rs1,rs2 @@ -143,7 +153,7 @@ void Instruction::execute() { const int32_t rs2 = operands[1].get() & 63; // Only use lowest 6 bits int64_t out = signExtendW(static_cast(rs1 << rs2)); - results[0] = RegisterValue(out, 8); + results[0] = RegisterValue(out, archRegWidth_); break; } case Opcode::RISCV_SLLIW: { // SLLIW rd,rs1,shamt @@ -151,7 +161,7 @@ void Instruction::execute() { const int32_t shamt = metadata.operands[2].imm & 63; // Only use lowest 6 bits uint64_t out = signExtendW(static_cast(rs1 << shamt)); - results[0] = RegisterValue(out, 8); + results[0] = RegisterValue(out, archRegWidth_); break; } case Opcode::RISCV_SRL: { // SRL rd,rs1,rs2 @@ -159,7 +169,7 @@ void Instruction::execute() { const uint64_t rs2 = operands[1].get() & 63; // Only use lowest 6 bits uint64_t out = static_cast(rs1 >> rs2); - results[0] = RegisterValue(out, 8); + results[0] = RegisterValue(out, archRegWidth_); break; } case Opcode::RISCV_SRLI: { // SRLI rd,rs1,shamt @@ -167,7 +177,7 @@ void Instruction::execute() { const uint64_t shamt = metadata.operands[2].imm & 63; // Only use lowest 6 bits uint64_t out = static_cast(rs1 >> shamt); - results[0] = RegisterValue(out, 8); + results[0] = RegisterValue(out, archRegWidth_); break; } case Opcode::RISCV_SRLW: { // SRLW rd,rs1,rs2 @@ -175,7 +185,7 @@ void Instruction::execute() { const uint32_t rs2 = operands[1].get() & 63; // Only use lowest 6 bits uint64_t out = signExtendW(static_cast(rs1 >> rs2)); - results[0] = RegisterValue(out, 8); + results[0] = RegisterValue(out, archRegWidth_); break; } case Opcode::RISCV_SRLIW: { // SRLIW rd,rs1,shamt @@ -183,23 +193,23 @@ void Instruction::execute() { const uint32_t shamt = metadata.operands[2].imm & 63; // Only use lowest 6 bits uint64_t out = signExtendW(static_cast(rs1 >> shamt)); - results[0] = RegisterValue(out, 8); + results[0] = RegisterValue(out, archRegWidth_); break; } case Opcode::RISCV_SRA: { // SRA rd,rs1,rs2 - const int64_t rs1 = operands[0].get(); + const int64_t rs1 = getSignedInt(operands[0]); const int64_t rs2 = - operands[1].get() & 63; // Only use lowest 6 bits + getSignedInt(operands[1]) & 63; // Only use lowest 6 bits int64_t out = static_cast(rs1 >> rs2); - results[0] = RegisterValue(out, 8); + results[0] = RegisterValue(out, archRegWidth_); break; } case Opcode::RISCV_SRAI: { // SRAI rd,rs1,shamt - const int64_t rs1 = operands[0].get(); + const int64_t rs1 = getSignedInt(operands[0]); const int64_t shamt = metadata.operands[2].imm & 63; // Only use lowest 6 bits int64_t out = static_cast(rs1 >> shamt); - results[0] = RegisterValue(out, 8); + results[0] = RegisterValue(out, archRegWidth_); break; } case Opcode::RISCV_SRAW: { // SRAW rd,rs1,rs2 @@ -207,7 +217,7 @@ void Instruction::execute() { const int32_t rs2 = operands[1].get() & 63; // Only use lowest 6 bits int64_t out = static_cast(rs1 >> rs2); - results[0] = RegisterValue(out, 8); + results[0] = RegisterValue(out, archRegWidth_); break; } case Opcode::RISCV_SRAIW: { // SRAIW rd,rs1,shamt @@ -215,55 +225,55 @@ void Instruction::execute() { const int32_t shamt = metadata.operands[2].imm & 63; // Only use lowest 6 bits int64_t out = static_cast(rs1 >> shamt); - results[0] = RegisterValue(out, 8); + results[0] = RegisterValue(out, archRegWidth_); break; } case Opcode::RISCV_ADD: { // ADD rd,rs1,rs2 const uint64_t rs1 = operands[0].get(); const uint64_t rs2 = operands[1].get(); uint64_t out = static_cast(rs1 + rs2); - results[0] = RegisterValue(out, 8); + results[0] = RegisterValue(out, archRegWidth_); break; } case Opcode::RISCV_ADDW: { // ADDW rd,rs1,rs2 const int32_t rs1 = operands[0].get(); const int32_t rs2 = operands[1].get(); int64_t out = static_cast(static_cast(rs1 + rs2)); - results[0] = RegisterValue(out, 8); + results[0] = RegisterValue(out, archRegWidth_); break; } case Opcode::RISCV_ADDI: { // ADDI rd,rs1,imm const uint64_t rs1 = operands[0].get(); const uint64_t rs2 = metadata.operands[2].imm; uint64_t out = static_cast(rs1 + rs2); - results[0] = RegisterValue(out, 8); + results[0] = RegisterValue(out, archRegWidth_); break; } case Opcode::RISCV_ADDIW: { // ADDIW rd,rs1,imm const int32_t rs1 = operands[0].get(); const int32_t imm = metadata.operands[2].imm; uint64_t out = signExtendW(rs1 + imm); - results[0] = RegisterValue(out, 8); + results[0] = RegisterValue(out, archRegWidth_); break; } case Opcode::RISCV_SUB: { // SUB rd,rs1,rs2 const uint64_t rs1 = operands[0].get(); const uint64_t rs2 = operands[1].get(); uint64_t out = static_cast(rs1 - rs2); - results[0] = RegisterValue(out, 8); + results[0] = RegisterValue(out, archRegWidth_); break; } case Opcode::RISCV_SUBW: { // SUBW rd,rs1,rs2 const int32_t rs1 = operands[0].get(); const int32_t rs2 = operands[1].get(); int64_t out = static_cast(static_cast(rs1 - rs2)); - results[0] = RegisterValue(out, 8); + results[0] = RegisterValue(out, archRegWidth_); break; } case Opcode::RISCV_LUI: { // LUI rd,imm uint64_t out = signExtendW(metadata.operands[1].imm << 12); // Shift into upper 20 bits - results[0] = RegisterValue(out, 8); + results[0] = RegisterValue(out, archRegWidth_); break; } case Opcode::RISCV_AUIPC: { // AUIPC rd,imm @@ -271,58 +281,58 @@ void Instruction::execute() { const int64_t uimm = signExtendW(metadata.operands[1].imm << 12); // Shift into upper 20 bits uint64_t out = static_cast(pc + uimm); - results[0] = RegisterValue(out, 8); + results[0] = RegisterValue(out, archRegWidth_); break; } case Opcode::RISCV_XOR: { // XOR rd,rs1,rs2 const uint64_t rs1 = operands[0].get(); const uint64_t rs2 = operands[1].get(); uint64_t out = static_cast(rs1 ^ rs2); - results[0] = RegisterValue(out, 8); + results[0] = RegisterValue(out, archRegWidth_); break; } case Opcode::RISCV_XORI: { // XORI rd,rs1,imm const uint64_t rs1 = operands[0].get(); const uint64_t imm = metadata.operands[2].imm; uint64_t out = static_cast(rs1 ^ imm); - results[0] = RegisterValue(out, 8); + results[0] = RegisterValue(out, archRegWidth_); break; } case Opcode::RISCV_OR: { // OR rd,rs1,rs2 const uint64_t rs1 = operands[0].get(); const uint64_t rs2 = operands[1].get(); uint64_t out = static_cast(rs1 | rs2); - results[0] = RegisterValue(out, 8); + results[0] = RegisterValue(out, archRegWidth_); break; } case Opcode::RISCV_ORI: { // ORI rd,rs1,imm const uint64_t rs1 = operands[0].get(); const uint64_t imm = metadata.operands[2].imm; uint64_t out = static_cast(rs1 | imm); - results[0] = RegisterValue(out, 8); + results[0] = RegisterValue(out, archRegWidth_); break; } case Opcode::RISCV_AND: { // AND rd,rs1,rs2 const uint64_t rs1 = operands[0].get(); const uint64_t rs2 = operands[1].get(); uint64_t out = static_cast(rs1 & rs2); - results[0] = RegisterValue(out, 8); + results[0] = RegisterValue(out, archRegWidth_); break; } case Opcode::RISCV_ANDI: { // ANDI rd,rs1,imm const uint64_t rs1 = operands[0].get(); const uint64_t imm = metadata.operands[2].imm; uint64_t out = static_cast(rs1 & imm); - results[0] = RegisterValue(out, 8); + results[0] = RegisterValue(out, archRegWidth_); break; } case Opcode::RISCV_SLT: { // SLT rd,rs1,rs2 - const int64_t rs1 = operands[0].get(); - const int64_t rs2 = operands[1].get(); + const int64_t rs1 = getSignedInt(operands[0]); + const int64_t rs2 = getSignedInt(operands[1]); if (rs1 < rs2) { - results[0] = RegisterValue(static_cast(1), 8); + results[0] = RegisterValue(static_cast(1), archRegWidth_); } else { - results[0] = RegisterValue(static_cast(0), 8); + results[0] = RegisterValue(static_cast(0), archRegWidth_); } break; } @@ -330,19 +340,19 @@ void Instruction::execute() { const uint64_t rs1 = operands[0].get(); const uint64_t rs2 = operands[1].get(); if (rs1 < rs2) { - results[0] = RegisterValue(static_cast(1), 8); + results[0] = RegisterValue(static_cast(1), archRegWidth_); } else { - results[0] = RegisterValue(static_cast(0), 8); + results[0] = RegisterValue(static_cast(0), archRegWidth_); } break; } case Opcode::RISCV_SLTI: { // SLTI rd,rs1,imm - const int64_t rs1 = operands[0].get(); + const int64_t rs1 = getSignedInt(operands[0]); const int64_t imm = metadata.operands[2].imm; if (rs1 < imm) { - results[0] = RegisterValue(static_cast(1), 8); + results[0] = RegisterValue(static_cast(1), archRegWidth_); } else { - results[0] = RegisterValue(static_cast(0), 8); + results[0] = RegisterValue(static_cast(0), archRegWidth_); } break; } @@ -350,9 +360,9 @@ void Instruction::execute() { const uint64_t rs1 = operands[0].get(); const uint64_t imm = static_cast(metadata.operands[2].imm); if (rs1 < imm) { - results[0] = RegisterValue(static_cast(1), 8); + results[0] = RegisterValue(static_cast(1), archRegWidth_); } else { - results[0] = RegisterValue(static_cast(0), 8); + results[0] = RegisterValue(static_cast(0), archRegWidth_); } break; } @@ -383,8 +393,8 @@ void Instruction::execute() { break; } case Opcode::RISCV_BLT: { // BLT rs1,rs2,imm - const int64_t rs1 = operands[0].get(); - const int64_t rs2 = operands[1].get(); + const int64_t rs1 = getSignedInt(operands[0]); + const int64_t rs2 = getSignedInt(operands[1]); if (rs1 < rs2) { branchAddress_ = instructionAddress_ + metadata.operands[2].imm; // Set LSB of result to 0 @@ -409,8 +419,9 @@ void Instruction::execute() { break; } case Opcode::RISCV_BGE: { // BGE rs1,rs2,imm - const int64_t rs1 = operands[0].get(); - const int64_t rs2 = operands[1].get(); + const int64_t rs1 = getSignedInt(operands[0]); + const int64_t rs2 = getSignedInt(operands[1]); + if (rs1 >= rs2) { branchAddress_ = instructionAddress_ + metadata.operands[2].imm; // Set LSB of result to 0 @@ -438,7 +449,7 @@ void Instruction::execute() { branchAddress_ = instructionAddress_ + metadata.operands[1].imm; // Set LSB of result to 0 branchTaken_ = true; - results[0] = RegisterValue(instructionAddress_ + 4, 8); + results[0] = RegisterValue(instructionAddress_ + 4, archRegWidth_); break; } case Opcode::RISCV_JALR: { // JALR rd,rs1,imm @@ -446,7 +457,7 @@ void Instruction::execute() { (operands[0].get() + metadata.operands[2].imm) & ~1; // Set LSB of result to 0 branchTaken_ = true; - results[0] = RegisterValue(instructionAddress_ + 4, 8); + results[0] = RegisterValue(instructionAddress_ + 4, archRegWidth_); break; } // TODO EBREAK @@ -481,14 +492,15 @@ void Instruction::execute() { // TODO use aq and rl bits to prevent reordering with other memory // operations results[0] = - RegisterValue(bitExtend(memoryData[0].get(), 32), 8); + RegisterValue(bitExtend(memoryData[0].get(), 32), + archRegWidth_); break; } case Opcode::RISCV_LR_D: // LR.D rd,rs1 case Opcode::RISCV_LR_D_AQ: case Opcode::RISCV_LR_D_RL: case Opcode::RISCV_LR_D_AQ_RL: { - results[0] = RegisterValue(memoryData[0].get(), 8); + results[0] = RegisterValue(memoryData[0].get(), archRegWidth_); break; } case Opcode::RISCV_SC_W: // SC.W rd,rs1,rs2 @@ -507,7 +519,7 @@ void Instruction::execute() { // TODO use aq and rl bits to prevent reordering with other memory // operations memoryData[0] = operands[0]; - results[0] = RegisterValue(static_cast(0), 8); + results[0] = RegisterValue(static_cast(0), archRegWidth_); break; } case Opcode::RISCV_AMOSWAP_W: // AMOSWAP.W rd,rs1,rs2 @@ -521,7 +533,7 @@ void Instruction::execute() { // TODO account for AQ and RL bits int64_t rd = signExtendW(memoryData[0].get()); int32_t rs2 = operands[0].get(); - results[0] = RegisterValue(rd, 8); + results[0] = RegisterValue(rd, archRegWidth_); memoryData[0] = rs2; break; } @@ -531,7 +543,7 @@ void Instruction::execute() { case Opcode::RISCV_AMOSWAP_D_AQ_RL: { uint64_t rd = memoryData[0].get(); uint64_t rs2 = operands[0].get(); - results[0] = RegisterValue(rd, 8); + results[0] = RegisterValue(rd, archRegWidth_); memoryData[0] = rs2; break; } @@ -540,7 +552,7 @@ void Instruction::execute() { case Opcode::RISCV_AMOADD_W_RL: case Opcode::RISCV_AMOADD_W_AQ_RL: { int64_t rd = signExtendW(memoryData[0].get()); - results[0] = RegisterValue(rd, 8); + results[0] = RegisterValue(rd, archRegWidth_); memoryData[0] = static_cast(rd + operands[0].get()); break; } @@ -549,7 +561,7 @@ void Instruction::execute() { case Opcode::RISCV_AMOADD_D_RL: case Opcode::RISCV_AMOADD_D_AQ_RL: { int64_t rd = memoryData[0].get(); - results[0] = RegisterValue(rd, 8); + results[0] = RegisterValue(rd, archRegWidth_); memoryData[0] = static_cast(rd + operands[0].get()); break; } @@ -558,7 +570,7 @@ void Instruction::execute() { case Opcode::RISCV_AMOAND_W_RL: case Opcode::RISCV_AMOAND_W_AQ_RL: { int64_t rd = signExtendW(memoryData[0].get()); - results[0] = RegisterValue(rd, 8); + results[0] = RegisterValue(rd, archRegWidth_); memoryData[0] = static_cast(rd & operands[0].get()); break; } @@ -567,7 +579,7 @@ void Instruction::execute() { case Opcode::RISCV_AMOAND_D_RL: case Opcode::RISCV_AMOAND_D_AQ_RL: { int64_t rd = memoryData[0].get(); - results[0] = RegisterValue(rd, 8); + results[0] = RegisterValue(rd, archRegWidth_); memoryData[0] = static_cast(rd & operands[0].get()); break; } @@ -576,7 +588,7 @@ void Instruction::execute() { case Opcode::RISCV_AMOOR_W_RL: case Opcode::RISCV_AMOOR_W_AQ_RL: { int64_t rd = signExtendW(memoryData[0].get()); - results[0] = RegisterValue(rd, 8); + results[0] = RegisterValue(rd, archRegWidth_); memoryData[0] = static_cast(rd | operands[0].get()); break; } @@ -585,7 +597,7 @@ void Instruction::execute() { case Opcode::RISCV_AMOOR_D_RL: case Opcode::RISCV_AMOOR_D_AQ_RL: { int64_t rd = memoryData[0].get(); - results[0] = RegisterValue(rd, 8); + results[0] = RegisterValue(rd, archRegWidth_); memoryData[0] = static_cast(rd | operands[0].get()); break; } @@ -594,7 +606,7 @@ void Instruction::execute() { case Opcode::RISCV_AMOXOR_W_RL: case Opcode::RISCV_AMOXOR_W_AQ_RL: { int64_t rd = signExtendW(memoryData[0].get()); - results[0] = RegisterValue(rd, 8); + results[0] = RegisterValue(rd, archRegWidth_); memoryData[0] = static_cast(rd ^ operands[0].get()); break; } @@ -603,7 +615,7 @@ void Instruction::execute() { case Opcode::RISCV_AMOXOR_D_RL: case Opcode::RISCV_AMOXOR_D_AQ_RL: { int64_t rd = memoryData[0].get(); - results[0] = RegisterValue(rd, 8); + results[0] = RegisterValue(rd, archRegWidth_); memoryData[0] = static_cast(rd ^ operands[0].get()); break; } @@ -612,7 +624,8 @@ void Instruction::execute() { case Opcode::RISCV_AMOMIN_W_AQ: case Opcode::RISCV_AMOMIN_W_RL: case Opcode::RISCV_AMOMIN_W_AQ_RL: { - results[0] = RegisterValue(signExtendW(memoryData[0].get()), 8); + results[0] = RegisterValue(signExtendW(memoryData[0].get()), + archRegWidth_); memoryData[0] = std::min(memoryData[0].get(), operands[0].get()); break; @@ -622,7 +635,7 @@ void Instruction::execute() { case Opcode::RISCV_AMOMIN_D_RL: case Opcode::RISCV_AMOMIN_D_AQ_RL: { int64_t rd = memoryData[0].get(); - results[0] = RegisterValue(rd, 8); + results[0] = RegisterValue(rd, archRegWidth_); memoryData[0] = static_cast(std::min(rd, operands[0].get())); break; @@ -631,7 +644,8 @@ void Instruction::execute() { case Opcode::RISCV_AMOMINU_W_AQ: case Opcode::RISCV_AMOMINU_W_RL: case Opcode::RISCV_AMOMINU_W_AQ_RL: { - results[0] = RegisterValue(signExtendW(memoryData[0].get()), 8); + results[0] = RegisterValue(signExtendW(memoryData[0].get()), + archRegWidth_); memoryData[0] = std::min(memoryData[0].get(), operands[0].get()); break; @@ -641,7 +655,7 @@ void Instruction::execute() { case Opcode::RISCV_AMOMINU_D_RL: case Opcode::RISCV_AMOMINU_D_AQ_RL: { uint64_t rd = memoryData[0].get(); - results[0] = RegisterValue(rd, 8); + results[0] = RegisterValue(rd, archRegWidth_); memoryData[0] = static_cast(std::min(rd, operands[0].get())); break; @@ -651,7 +665,8 @@ void Instruction::execute() { case Opcode::RISCV_AMOMAX_W_AQ: case Opcode::RISCV_AMOMAX_W_RL: case Opcode::RISCV_AMOMAX_W_AQ_RL: { - results[0] = RegisterValue(signExtendW(memoryData[0].get()), 8); + results[0] = RegisterValue(signExtendW(memoryData[0].get()), + archRegWidth_); memoryData[0] = std::max(memoryData[0].get(), operands[0].get()); break; @@ -661,7 +676,7 @@ void Instruction::execute() { case Opcode::RISCV_AMOMAX_D_RL: case Opcode::RISCV_AMOMAX_D_AQ_RL: { int64_t rd = memoryData[0].get(); - results[0] = RegisterValue(rd, 8); + results[0] = RegisterValue(rd, archRegWidth_); memoryData[0] = static_cast(std::max(rd, operands[0].get())); break; @@ -670,7 +685,8 @@ void Instruction::execute() { case Opcode::RISCV_AMOMAXU_W_AQ: case Opcode::RISCV_AMOMAXU_W_RL: case Opcode::RISCV_AMOMAXU_W_AQ_RL: { - results[0] = RegisterValue(signExtendW(memoryData[0].get()), 8); + results[0] = RegisterValue(signExtendW(memoryData[0].get()), + archRegWidth_); memoryData[0] = std::max(memoryData[0].get(), operands[0].get()); break; @@ -680,7 +696,7 @@ void Instruction::execute() { case Opcode::RISCV_AMOMAXU_D_RL: case Opcode::RISCV_AMOMAXU_D_AQ_RL: { uint64_t rd = memoryData[0].get(); - results[0] = RegisterValue(rd, 8); + results[0] = RegisterValue(rd, archRegWidth_); memoryData[0] = static_cast(std::max(rd, operands[0].get())); break; @@ -688,9 +704,9 @@ void Instruction::execute() { // Integer multiplication division extension (M) case Opcode::RISCV_MUL: { // MUL rd,rs1,rs2 - const int64_t rs1 = operands[0].get(); - const int64_t rs2 = operands[1].get(); - results[0] = RegisterValue(static_cast(rs1 * rs2), 8); + const int64_t rs1 = getSignedInt(operands[0]); + const int64_t rs2 = getSignedInt(operands[1]); + results[0] = RegisterValue(static_cast(rs1 * rs2), archRegWidth_); break; } // case Opcode::RISCV_MULH: {//MULH rd,rs1,rs2 @@ -704,7 +720,7 @@ void Instruction::execute() { case Opcode::RISCV_MULHU: { // MULHU rd,rs1,rs2 const uint64_t rs1 = operands[0].get(); const uint64_t rs2 = operands[1].get(); - results[0] = RegisterValue(mulhiuu(rs1, rs2), 8); + results[0] = RegisterValue(mulhiuu(rs1, rs2), archRegWidth_); break; } // case Opcode::RISCV_MULHSU: {//MULHSU rd,rs1,rs2 @@ -718,21 +734,21 @@ void Instruction::execute() { case Opcode::RISCV_MULW: { // MULW rd,rs1,rs2 const uint32_t rs1 = operands[0].get(); const uint32_t rs2 = operands[1].get(); - results[0] = RegisterValue(signExtendW(rs1 * rs2), 8); + results[0] = RegisterValue(signExtendW(rs1 * rs2), archRegWidth_); break; } case Opcode::RISCV_DIV: { // DIV rd,rs1,rs2 - const int64_t rs1 = operands[0].get(); - const int64_t rs2 = operands[1].get(); + const int64_t rs1 = getSignedInt(operands[0]); + const int64_t rs2 = getSignedInt(operands[1]); if (rs2 == 0) { // divide by zero - results[0] = RegisterValue(static_cast(-1), 8); + results[0] = RegisterValue(static_cast(-1), archRegWidth_); } else if (rs1 == static_cast(0x8000000000000000) && rs2 == -1) { // division overflow - results[0] = RegisterValue(rs1, 8); + results[0] = RegisterValue(rs1, archRegWidth_); } else { - results[0] = RegisterValue(static_cast(rs1 / rs2), 8); + results[0] = RegisterValue(static_cast(rs1 / rs2), archRegWidth_); } break; } @@ -741,13 +757,15 @@ void Instruction::execute() { const int32_t rs2 = operands[1].get(); if (rs2 == 0) { // divide by zero - results[0] = RegisterValue(static_cast(-1), 8); + results[0] = RegisterValue(static_cast(-1), archRegWidth_); } else if (rs1 == static_cast(0x80000000) && rs2 == -1) { // division overflow - results[0] = RegisterValue(static_cast(signExtendW(rs1)), 8); + results[0] = RegisterValue(static_cast(signExtendW(rs1)), + archRegWidth_); } else { results[0] = - RegisterValue(static_cast(signExtendW(rs1 / rs2)), 8); + RegisterValue(static_cast(signExtendW(rs1 / rs2)), + archRegWidth_); } break; } @@ -756,9 +774,9 @@ void Instruction::execute() { const uint64_t rs2 = operands[1].get(); if (rs2 == 0) { // divide by zero - results[0] = RegisterValue(static_cast(-1), 8); + results[0] = RegisterValue(static_cast(-1), archRegWidth_); } else { - results[0] = RegisterValue(static_cast(rs1 / rs2), 8); + results[0] = RegisterValue(static_cast(rs1 / rs2), archRegWidth_); } break; } @@ -767,24 +785,24 @@ void Instruction::execute() { const uint32_t rs2 = operands[1].get(); if (rs2 == 0) { // divide by zero - results[0] = RegisterValue(static_cast(-1), 8); + results[0] = RegisterValue(static_cast(-1), archRegWidth_); } else { results[0] = - RegisterValue(static_cast(signExtendW(rs1 / rs2)), 8); + RegisterValue(static_cast(signExtendW(rs1 / rs2)), archRegWidth_); } break; } case Opcode::RISCV_REM: { // REM rd,rs1,rs2 - const int64_t rs1 = operands[0].get(); - const int64_t rs2 = operands[1].get(); + const int64_t rs1 = getSignedInt(operands[0]); + const int64_t rs2 = getSignedInt(operands[1]); if (rs2 == 0) { // divide by zero - results[0] = RegisterValue(static_cast(rs1), 8); + results[0] = RegisterValue(static_cast(rs1), archRegWidth_); } else if (rs1 == static_cast(0x8000000000000000) && rs2 == -1) { // division overflow - results[0] = RegisterValue(static_cast(0), 8); + results[0] = RegisterValue(static_cast(0), archRegWidth_); } else { - results[0] = RegisterValue(static_cast(rs1 % rs2), 8); + results[0] = RegisterValue(static_cast(rs1 % rs2), archRegWidth_); } break; } @@ -793,13 +811,15 @@ void Instruction::execute() { const int32_t rs2 = operands[1].get(); if (rs2 == 0) { // divide by zero - results[0] = RegisterValue(static_cast(signExtendW(rs1)), 8); + results[0] = RegisterValue(static_cast(signExtendW(rs1)), + archRegWidth_); } else if (rs1 == static_cast(0x80000000) && rs2 == -1) { // division overflow - results[0] = RegisterValue(static_cast(0), 8); + results[0] = RegisterValue(static_cast(0), archRegWidth_); } else { results[0] = - RegisterValue(static_cast(signExtendW(rs1 % rs2)), 8); + RegisterValue(static_cast(signExtendW(rs1 % rs2)), + archRegWidth_); } break; } @@ -808,9 +828,9 @@ void Instruction::execute() { const uint64_t rs2 = operands[1].get(); if (rs2 == 0) { // divide by zero - results[0] = RegisterValue(rs1, 8); + results[0] = RegisterValue(rs1, archRegWidth_); } else { - results[0] = RegisterValue(static_cast(rs1 % rs2), 8); + results[0] = RegisterValue(static_cast(rs1 % rs2), archRegWidth_); } break; } @@ -819,13 +839,214 @@ void Instruction::execute() { const uint32_t rs2 = operands[1].get(); if (rs2 == 0) { // divide by zero - results[0] = RegisterValue(static_cast(signExtendW(rs1)), 8); + results[0] = RegisterValue(static_cast(signExtendW(rs1)), + archRegWidth_); } else { results[0] = - RegisterValue(static_cast(signExtendW(rs1 % rs2)), 8); + RegisterValue(static_cast(signExtendW(rs1 % rs2)), archRegWidth_); } break; } + case Opcode::RISCV_CSRRC: { + uint32_t old_csr_value = operands[0].get(); + uint32_t new_csr_value = old_csr_value & ~(operands[1].get()); + results[0] = RegisterValue(new_csr_value, 4); + results[1] = RegisterValue(old_csr_value, 4); + } + case Opcode::RISCV_CSRRCI: { + uint32_t old_csr_value = operands[0].get(); + uint32_t new_csr_value = old_csr_value & ~(c_imm); + results[0] = RegisterValue(new_csr_value, 4); + results[1] = RegisterValue(old_csr_value, 4); + } + case Opcode::RISCV_CSRRS: { + uint32_t old_csr_value = operands[0].get(); + uint32_t new_csr_value = old_csr_value | (operands[1].get()); + results[0] = RegisterValue(new_csr_value, 4); + results[1] = RegisterValue(old_csr_value, 4); + break; + } + case Opcode::RISCV_CSRRSI: { + uint32_t old_csr_value = operands[0].get(); + uint32_t new_csr_value = old_csr_value | (c_imm); + results[0] = RegisterValue(new_csr_value, 4); + results[1] = RegisterValue(old_csr_value, 4); + break; + } + case Opcode::RISCV_CSRRW: { + uint32_t old_csr_value = operands[0].get(); + uint32_t new_csr_value = operands[1].get(); + results[0] = RegisterValue(new_csr_value, 4); + results[1] = RegisterValue(old_csr_value, 4); + break; + } + case Opcode::RISCV_CSRRWI: { + uint32_t old_csr_value = operands[0].get(); + uint32_t new_csr_value = c_imm; + results[0] = RegisterValue(new_csr_value, 4); + results[1] = RegisterValue(old_csr_value, 4); + break; + } + case Opcode::RISCV_C_ADD: { + const uint32_t rs1 = operands[0].get(); + const uint32_t rs2 = operands[1].get(); + uint32_t out = (rs1 + rs2); + results[0] = RegisterValue(out, 4); + break; + } + case Opcode::RISCV_C_ADDI4SPN: + case Opcode::RISCV_C_ADDI16SP: + case Opcode::RISCV_C_ADDI: { + uint32_t out = (operands[0].get() + c_imm); + results[0] = RegisterValue(out, 4); + break; + } + //case Opcode::RISCV_C_ADDIW: + //case Opcode::RISCV_C_ADDW: + case Opcode::RISCV_C_AND: { + const uint32_t rs1 = operands[0].get(); + const uint32_t rs2 = operands[1].get(); + uint32_t out = (rs1 & rs2); + results[0] = RegisterValue(out, 4); + break; + } + case Opcode::RISCV_C_ANDI: { + const uint32_t rs1 = operands[0].get(); + uint32_t out = (rs1 & c_imm); + results[0] = RegisterValue(out, 4); + break; + } + case Opcode::RISCV_C_BEQZ: { + const uint32_t rs1 = operands[0].get(); + if (rs1 == 0) { + branchAddress_ = (uint32_t)(instructionAddress_ + c_imm); // Set LSB of result to 0 + branchTaken_ = true; + } else { + branchAddress_ = instructionAddress_ + 2; + branchTaken_ = false; + } + break; + } + case Opcode::RISCV_C_BNEZ: { + const uint32_t rs1 = operands[0].get(); + if (rs1 != 0) { + branchAddress_ = (uint32_t)(instructionAddress_ + c_imm); + branchTaken_ = true; + } else { + branchAddress_ = instructionAddress_ + 2; + branchTaken_ = false; + } + break; + } + case Opcode::RISCV_C_EBREAK: + break; + case Opcode::RISCV_C_FLD: + break; + case Opcode::RISCV_C_FLDSP: + break; + case Opcode::RISCV_C_FLW: + break; + case Opcode::RISCV_C_FLWSP: + break; + case Opcode::RISCV_C_FSD: + break; + case Opcode::RISCV_C_FSDSP: + break; + case Opcode::RISCV_C_FSW: + break; + case Opcode::RISCV_C_J: + case Opcode::RISCV_C_JAL: { + branchAddress_ = (uint32_t)(instructionAddress_ + c_imm); + branchTaken_ = true; + results[0] = RegisterValue(static_cast(instructionAddress_ + 2), 4); + break; + } + case Opcode::RISCV_C_JR: + case Opcode::RISCV_C_JALR: { + branchAddress_ = (operands[0].get()) & ~1;// Set LSB of result to 0 + branchTaken_ = true; + results[0] = RegisterValue(static_cast(instructionAddress_ + 2), 4); + break; + } + case Opcode::RISCV_C_LD: + case Opcode::RISCV_C_LDSP: + break; + case Opcode::RISCV_C_LI: { + uint32_t out = signExtendW(metadata.operands[1].imm); + results[0] = RegisterValue(static_cast(out), 4); + break; + } + case Opcode::RISCV_C_LUI: { + uint32_t out = signExtendW(metadata.operands[1].imm + << 12); // Shift into upper 20 bits + results[0] = RegisterValue(static_cast(out), 4); + break; + } + case Opcode::RISCV_C_LW: + case Opcode::RISCV_C_LWSP: { + results[0] = RegisterValue(bitExtend(memoryData[0].get(), 32), 4); + break; + } + case Opcode::RISCV_C_MV: { + results[0] = RegisterValue(operands[0].get(), 4); + break; + } + case Opcode::RISCV_C_NOP: + break; + case Opcode::RISCV_C_OR: { + const uint32_t rs1 = operands[0].get(); + const uint32_t rs2 = operands[1].get(); + uint32_t out = (rs1 | rs2); + results[0] = RegisterValue(out, 4); + break; + } + case Opcode::RISCV_C_SD: + break; + case Opcode::RISCV_C_SDSP: + break; + case Opcode::RISCV_C_SLLI: { + const int32_t rs1 = operands[0].get(); + const int32_t shamt = c_imm & 63; // Only use lowest 6 bits + int32_t out = (rs1 << shamt); + results[0] = RegisterValue(out, 4); + break; + } + case Opcode::RISCV_C_SRAI: { + const int32_t rs1 = operands[0].get(); + int32_t out = (rs1 >> (c_imm & 63)); + results[0] = RegisterValue(out, 4); + break; + } + case Opcode::RISCV_C_SRLI: { + const uint32_t rs1 = operands[0].get(); + uint32_t out = (rs1 >> (c_imm & 63)); + results[0] = RegisterValue(out, 4); + break; + } + case Opcode::RISCV_C_SUB: { + const uint32_t rs1 = operands[0].get(); + const uint32_t rs2 = operands[1].get(); + uint32_t out = (rs1 - rs2); + results[0] = RegisterValue(out, 4); + break; + } + case Opcode::RISCV_C_SUBW: + break; + case Opcode::RISCV_C_SW: + case Opcode::RISCV_C_FSWSP: + case Opcode::RISCV_C_SWSP: { + memoryData[0] = operands[0]; + break; + } + case Opcode::RISCV_C_UNIMP: + break; + case Opcode::RISCV_C_XOR: { + const uint32_t rs1 = operands[0].get(); + const uint32_t rs2 = operands[1].get(); + uint32_t out = (rs1 ^ rs2); + results[0] = RegisterValue(out, 4); + break; + } default: return executionNYI(); diff --git a/src/lib/models/emulation/Core.cc b/src/lib/models/emulation/Core.cc index 1d572ee160..0eff31d5a5 100644 --- a/src/lib/models/emulation/Core.cc +++ b/src/lib/models/emulation/Core.cc @@ -44,27 +44,6 @@ void Core::tick() { return; } - if (pendingReads_ > 0) { - // Handle pending reads to a uop - auto& uop = microOps_.front(); - - const auto& completedReads = dataMemory_.getCompletedReads(); - for (const auto& response : completedReads) { - assert(pendingReads_ > 0); - uop->supplyData(response.target.address, response.data); - pendingReads_--; - } - dataMemory_.clearCompletedReads(); - - if (pendingReads_ == 0) { - // Load complete: resume execution - execute(uop); - } - - // More data pending, end cycle early - return; - } - // Fetch // Determine if new uops are needed to be fetched @@ -130,7 +109,13 @@ void Core::tick() { previousAddresses_.push_back(target); } pendingReads_ = addresses.size(); - return; + const auto& completedReads = dataMemory_.getCompletedReads(); + for (const auto& response : completedReads) { + assert(pendingReads_ > 0); + uop->supplyData(response.target.address, response.data); + pendingReads_--; + } + dataMemory_.clearCompletedReads(); } else { // Early execution due to lacking addresses execute(uop); @@ -166,6 +151,8 @@ void Core::execute(std::shared_ptr& uop) { uop->execute(); if (uop->exceptionEncountered()) { + instructionsExecuted_++; + isa_.updateInstrTrace(uop, ®isterFileSet_, ticks_); // Handle ECALL into trace here handleException(uop); return; } @@ -197,7 +184,19 @@ void Core::execute(std::shared_ptr& uop) { } } - if (uop->isLastMicroOp()) instructionsExecuted_++; + if (uop->isLastMicroOp()) { + instructionsExecuted_++; + // TODO: This is architecture-specific. It's here for the reference and should(will) be refactored later + uint16_t sysreg_instrret = isa_.getSystemRegisterTag(arch::riscv::riscv_sysreg::SYSREG_INSTRRET); + uint16_t sysreg_cycle = isa_.getSystemRegisterTag(arch::riscv::riscv_sysreg::SYSREG_CYCLE); + // NOTE: 64-bit system registers are not implemented yet + //TODO: Maybe make use of byteLength and remove is32BitMode() function? + if (isa_.is32BitMode()) { + registerFileSet_.set(Register{0x2, sysreg_instrret}, RegisterValue(instructionsExecuted_, 4)); + registerFileSet_.set(Register{0x2, sysreg_cycle}, RegisterValue(ticks_, 4)); + } + isa_.updateInstrTrace(uop, ®isterFileSet_, ticks_); + } // Fetch memory for next cycle instructionMemory_.requestRead({pc_, FETCH_SIZE}); From 1e2ab32367f115e131a25de077a54f60eeb23671 Mon Sep 17 00:00:00 2001 From: dANW34V3R Date: Fri, 12 May 2023 14:01:07 +0100 Subject: [PATCH 2/5] Clang format --- src/include/simeng/models/emulation/Core.hh | 3 ++- .../simeng/pipeline/PipelineBuffer1.hh | 19 +++++++++------ src/lib/Elf.cc | 17 ++++++++------ src/lib/arch/riscv/ExceptionHandler.cc | 23 ++++++++++--------- src/lib/arch/riscv/InstructionMetadata.hh | 6 +---- src/lib/models/emulation/Core.cc | 20 ++++++++++------ 6 files changed, 50 insertions(+), 38 deletions(-) diff --git a/src/include/simeng/models/emulation/Core.hh b/src/include/simeng/models/emulation/Core.hh index c4a4acc453..2c94356d72 100644 --- a/src/include/simeng/models/emulation/Core.hh +++ b/src/include/simeng/models/emulation/Core.hh @@ -11,7 +11,8 @@ #include "simeng/arch/Architecture.hh" #include "simeng/span.hh" -// TODO: This is architecture-specific, need to be refactored later. See comments in Core.cc +// TODO: This is architecture-specific, need to be refactored later. See +// comments in Core.cc #include "simeng/arch/riscv/Architecture.hh" namespace simeng { diff --git a/src/include/simeng/pipeline/PipelineBuffer1.hh b/src/include/simeng/pipeline/PipelineBuffer1.hh index dd2ed70ce7..e677645fdf 100644 --- a/src/include/simeng/pipeline/PipelineBuffer1.hh +++ b/src/include/simeng/pipeline/PipelineBuffer1.hh @@ -15,13 +15,18 @@ class PipelineBuffer { /** Construct a pipeline buffer of width `width`, and fill all slots with * `initialValue`. */ PipelineBuffer(int width, const T& initialValue) - : width(width), buffer(width * defaultLength_, initialValue), - length_(defaultLength_), headIndex_(defaultLength_-1), + : width(width), + buffer(width * defaultLength_, initialValue), + length_(defaultLength_), + headIndex_(defaultLength_ - 1), tailIndex_(0) {} PipelineBuffer(int width, const T& initialValue, int length) - : width(width), buffer(width * length, initialValue), length_(length), - headIndex_(length_-1), tailIndex_(0) { + : width(width), + buffer(width * length, initialValue), + length_(length), + headIndex_(length_ - 1), + tailIndex_(0) { assert(length_ != 0 && "Pipeline buffer length cannot be 0"); } @@ -30,14 +35,14 @@ class PipelineBuffer { void tick() { if (isStalled_) return; - //length ==1 shortcut? condition check cost + // length ==1 shortcut? condition check cost - if (headIndex_) { // when headIndex != 0 + if (headIndex_) { // when headIndex != 0 headIndex_--; } else { headIndex_ = length_ - 1; } - if (tailIndex_) { // when tailIndex != 0 + if (tailIndex_) { // when tailIndex != 0 tailIndex_--; } else { tailIndex_ = length_ - 1; diff --git a/src/lib/Elf.cc b/src/lib/Elf.cc index 6281598403..be11a2c753 100644 --- a/src/lib/Elf.cc +++ b/src/lib/Elf.cc @@ -47,7 +47,8 @@ Elf::Elf(std::string path, char** imagePointer) { // Check whether this is a 32 or 64-bit executable char bitFormat; file.read(&bitFormat, sizeof(bitFormat)); - if (bitFormat != ElfBitFormat::Format32 && bitFormat != ElfBitFormat::Format64) { + if (bitFormat != ElfBitFormat::Format32 && + bitFormat != ElfBitFormat::Format64) { return; } @@ -92,7 +93,8 @@ Elf::Elf(std::string path, char** imagePointer) { // Seek to the byte representing header entry size. file.seekg(0x36); uint16_t headerEntrySize; - file.read(reinterpret_cast(&headerEntrySize), sizeof(headerEntrySize)); + file.read(reinterpret_cast(&headerEntrySize), + sizeof(headerEntrySize)); uint16_t headerEntries; file.read(reinterpret_cast(&headerEntries), sizeof(headerEntries)); @@ -168,8 +170,8 @@ Elf::Elf(std::string path, char** imagePointer) { for (const auto& header : headers_) { if (header.type == 1) { // LOAD file.seekg(header.offset); - // Read `fileSize` bytes from `file` into the appropriate place in process - // memory + // Read `fileSize` bytes from `file` into the appropriate place in + // process memory file.read(*imagePointer + header.virtualAddress, header.fileSize); } } @@ -210,7 +212,8 @@ Elf::Elf(std::string path, char** imagePointer) { // Seek to the byte representing header entry size. file.seekg(0x2a); uint16_t headerEntrySize; - file.read(reinterpret_cast(&headerEntrySize), sizeof(headerEntrySize)); + file.read(reinterpret_cast(&headerEntrySize), + sizeof(headerEntrySize)); uint16_t headerEntries; file.read(reinterpret_cast(&headerEntries), sizeof(headerEntries)); @@ -285,8 +288,8 @@ Elf::Elf(std::string path, char** imagePointer) { for (const auto& header : headers32_) { if (header.type == 1) { // LOAD file.seekg(header.offset); - // Read `fileSize` bytes from `file` into the appropriate place in process - // memory + // Read `fileSize` bytes from `file` into the appropriate place in + // process memory file.read(*imagePointer + header.virtualAddress, header.fileSize); } } diff --git a/src/lib/arch/riscv/ExceptionHandler.cc b/src/lib/arch/riscv/ExceptionHandler.cc index c88448048d..ffd7895233 100644 --- a/src/lib/arch/riscv/ExceptionHandler.cc +++ b/src/lib/arch/riscv/ExceptionHandler.cc @@ -98,7 +98,8 @@ bool ExceptionHandler::init() { case 57: { // close int64_t fd = registerFileSet.get(R0).get(); stateChange = {ChangeType::REPLACEMENT, {R0}}; - stateChange.modifiedRegisterValues.push_back(RegisterValue(linux_.close(fd), instruction_.getArchRegWidth())); + stateChange.modifiedRegisterValues.push_back( + RegisterValue(linux_.close(fd), instruction_.getArchRegWidth())); break; } case 61: { // getdents64 @@ -185,9 +186,9 @@ bool ExceptionHandler::init() { uint64_t count = registerFileSet.get(R2).get(); return readBufferThen(bufPtr, count, [=]() { int64_t retval = linux_.write(fd, dataBuffer.data(), count); - ProcessStateChange stateChange = { - ChangeType::REPLACEMENT, {R0}}; - stateChange.modifiedRegisterValues.push_back(RegisterValue(retval, instruction_.getArchRegWidth())); + ProcessStateChange stateChange = {ChangeType::REPLACEMENT, {R0}}; + stateChange.modifiedRegisterValues.push_back( + RegisterValue(retval, instruction_.getArchRegWidth())); return concludeSyscall(stateChange); }); } @@ -355,9 +356,9 @@ bool ExceptionHandler::init() { uint64_t statbufPtr = registerFileSet.get(R1).get(); kernel::stat statOut; - stateChange = { - ChangeType::REPLACEMENT, {R0}}; - stateChange.modifiedRegisterValues.push_back(RegisterValue(linux_.fstat(fd, statOut), instruction_.getArchRegWidth())); + stateChange = {ChangeType::REPLACEMENT, {R0}}; + stateChange.modifiedRegisterValues.push_back(RegisterValue( + linux_.fstat(fd, statOut), instruction_.getArchRegWidth())); stateChange.memoryAddresses.push_back({statbufPtr, sizeof(statOut)}); stateChange.memoryAddressValues.push_back(statOut); break; @@ -556,9 +557,9 @@ bool ExceptionHandler::init() { } case 214: { // brk auto result = linux_.brk(registerFileSet.get(R0).get()); - stateChange = { - ChangeType::REPLACEMENT, {R0}}; - stateChange.modifiedRegisterValues.push_back(RegisterValue(static_cast(result), instruction_.getArchRegWidth())); + stateChange = {ChangeType::REPLACEMENT, {R0}}; + stateChange.modifiedRegisterValues.push_back(RegisterValue( + static_cast(result), instruction_.getArchRegWidth())); break; } case 215: { // munmap @@ -844,7 +845,7 @@ void ExceptionHandler::printException(const Instruction& insn) const { auto& metadata = insn.getMetadata(); for (int8_t i = metadata.lenBytes; i > 0; i--) { std::cout << std::setfill('0') << std::setw(2) - << static_cast(metadata.encoding[i-1]); + << static_cast(metadata.encoding[i - 1]); } std::cout << std::dec << " "; if (exception == InstructionException::EncodingUnallocated) { diff --git a/src/lib/arch/riscv/InstructionMetadata.hh b/src/lib/arch/riscv/InstructionMetadata.hh index 4ce164a346..796afc96c2 100644 --- a/src/lib/arch/riscv/InstructionMetadata.hh +++ b/src/lib/arch/riscv/InstructionMetadata.hh @@ -14,11 +14,7 @@ namespace Opcode { #include "RISCVGenInstrInfo.inc" } // namespace Opcode -enum INSTR_LENGTH { - IL_16B, - IL_32B, - IL_INVALID -}; +enum INSTR_LENGTH { IL_16B, IL_32B, IL_INVALID }; /** A simplified RISC-V-only version of the Capstone instruction structure. */ struct InstructionMetadata { diff --git a/src/lib/models/emulation/Core.cc b/src/lib/models/emulation/Core.cc index 0eff31d5a5..6357c898d3 100644 --- a/src/lib/models/emulation/Core.cc +++ b/src/lib/models/emulation/Core.cc @@ -152,7 +152,8 @@ void Core::execute(std::shared_ptr& uop) { if (uop->exceptionEncountered()) { instructionsExecuted_++; - isa_.updateInstrTrace(uop, ®isterFileSet_, ticks_); // Handle ECALL into trace here + isa_.updateInstrTrace(uop, ®isterFileSet_, + ticks_); // Handle ECALL into trace here handleException(uop); return; } @@ -186,14 +187,19 @@ void Core::execute(std::shared_ptr& uop) { if (uop->isLastMicroOp()) { instructionsExecuted_++; - // TODO: This is architecture-specific. It's here for the reference and should(will) be refactored later - uint16_t sysreg_instrret = isa_.getSystemRegisterTag(arch::riscv::riscv_sysreg::SYSREG_INSTRRET); - uint16_t sysreg_cycle = isa_.getSystemRegisterTag(arch::riscv::riscv_sysreg::SYSREG_CYCLE); + // TODO: This is architecture-specific. It's here for the reference and + // should(will) be refactored later + uint16_t sysreg_instrret = + isa_.getSystemRegisterTag(arch::riscv::riscv_sysreg::SYSREG_INSTRRET); + uint16_t sysreg_cycle = + isa_.getSystemRegisterTag(arch::riscv::riscv_sysreg::SYSREG_CYCLE); // NOTE: 64-bit system registers are not implemented yet - //TODO: Maybe make use of byteLength and remove is32BitMode() function? + // TODO: Maybe make use of byteLength and remove is32BitMode() function? if (isa_.is32BitMode()) { - registerFileSet_.set(Register{0x2, sysreg_instrret}, RegisterValue(instructionsExecuted_, 4)); - registerFileSet_.set(Register{0x2, sysreg_cycle}, RegisterValue(ticks_, 4)); + registerFileSet_.set(Register{0x2, sysreg_instrret}, + RegisterValue(instructionsExecuted_, 4)); + registerFileSet_.set(Register{0x2, sysreg_cycle}, + RegisterValue(ticks_, 4)); } isa_.updateInstrTrace(uop, ®isterFileSet_, ticks_); } From 20e5236c95f8cfad63556833d4a35cc35918fb8a Mon Sep 17 00:00:00 2001 From: dANW34V3R Date: Mon, 15 May 2023 15:49:04 +0100 Subject: [PATCH 3/5] Add Trace config option to node checker --- configs/DEMO_RISCV.yaml | 1 + src/lib/ModelConfig.cc | 8 ++++++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/configs/DEMO_RISCV.yaml b/configs/DEMO_RISCV.yaml index e5a11d3c54..d00531d2a2 100644 --- a/configs/DEMO_RISCV.yaml +++ b/configs/DEMO_RISCV.yaml @@ -6,6 +6,7 @@ Core: ISA: rv64 Simulation-Mode: outoforder + Trace: false Clock-Frequency: 2.5 Fetch-Block-Size: 32 Fetch: diff --git a/src/lib/ModelConfig.cc b/src/lib/ModelConfig.cc index 88cc1f7d59..ee804d9afb 100644 --- a/src/lib/ModelConfig.cc +++ b/src/lib/ModelConfig.cc @@ -64,10 +64,12 @@ void ModelConfig::validate() { "Timer-Frequency", "Micro-Operations", "Vector-Length", - "Streaming-Vector-Length"}; + "Streaming-Vector-Length", + "Trace"}; validISA = nodeChecker( configFile_[root][subFields[0]], subFields[0], - std::vector({"AArch64", "rv64", "rv32"}), ExpectedValue::String); + std::vector({"AArch64", "rv64", "rv32"}), + ExpectedValue::String); nodeChecker(configFile_[root][subFields[1]], subFields[1], {"emulation", "inorderpipelined", "outoforder"}, ExpectedValue::String); @@ -86,6 +88,8 @@ void ModelConfig::validate() { {128, 256, 384, 512, 640, 768, 896, 1024, 1152, 1280, 1408, 1536, 1664, 1792, 1920, 2048}, ExpectedValue::UInteger, 512); + nodeChecker(configFile_[root][subFields[7]], subFields[7], + std::vector{false, true}, ExpectedValue::Bool, false); subFields.clear(); // First check that the ISA config option is valid, this protects reads from From 6f0c692ab0719ea508ee0b648e9f9860aecb13a6 Mon Sep 17 00:00:00 2001 From: dANW34V3R Date: Wed, 6 Sep 2023 11:44:35 +0100 Subject: [PATCH 4/5] MCU patch --- CMakeLists.txt | 4 +- Makefile | 33 + README_RV32.md | 18 + configs/DEMO_RISCV.yaml | 5 +- configs/DEMO_RISCV32_mcu.yaml | 145 + configs/DEMO_RISCV32_mcu_sst.yaml | 145 + share_ext/share_sample_mcu_model.patch | 6824 +++++++++++++++++ src/include/simeng/BranchPredictor.hh | 5 + src/include/simeng/CoreInstance.hh | 3 +- src/include/simeng/Elf.hh | 93 +- src/include/simeng/GenericPredictor.hh | 3 + src/include/simeng/Instruction.hh | 23 +- src/include/simeng/arch/Architecture.hh | 5 +- .../simeng/arch/aarch64/Architecture.hh | 5 +- .../simeng/arch/aarch64/Instruction.hh | 2 +- src/include/simeng/arch/riscv/Architecture.hh | 41 +- .../simeng/arch/riscv/ExceptionHandler.hh | 3 + src/include/simeng/arch/riscv/Instruction.hh | 25 +- .../simeng/arch/riscv/SystemRegister.hh | 229 + src/include/simeng/kernel/Linux.hh | 5 + src/include/simeng/kernel/LinuxProcess.hh | 5 + src/include/simeng/models/emulation/Core.hh | 3 + src/include/simeng/models/mcu/Core.hh | 181 + src/include/simeng/pipeline_hi/DecodeUnit.hh | 66 + .../simeng/pipeline_hi/DispatchIssueUnit.hh | 150 + src/include/simeng/pipeline_hi/ExecuteUnit.hh | 147 + src/include/simeng/pipeline_hi/FetchUnit.hh | 127 + .../simeng/pipeline_hi/LoadStoreQueue.hh | 235 + .../simeng/pipeline_hi/PipelineBuffer.hh | 107 + .../simeng/pipeline_hi/PipelineBuffer1.hh | 133 + .../simeng/pipeline_hi/PortAllocator.hh | 43 + src/include/simeng/pipeline_hi/RegDepMap.hh | 57 + .../simeng/pipeline_hi/RegisterAliasTable.hh | 69 + .../simeng/pipeline_hi/ReorderBuffer.hh | 136 + .../simeng/pipeline_hi/StaticPredictor.hh | 53 + .../simeng/pipeline_hi/WritebackUnit.hh | 62 + src/lib/CMakeLists.txt | 12 + src/lib/CoreInstance.cc | 13 +- src/lib/Elf.cc | 161 +- src/lib/GenericPredictor.cc | 7 + src/lib/Instruction.cc | 3 + src/lib/ModelConfig.cc | 2 +- src/lib/arch/aarch64/Architecture.cc | 6 +- src/lib/arch/aarch64/Instruction.cc | 4 +- src/lib/arch/aarch64/Instruction_decode.cc | 10 +- src/lib/arch/riscv/Architecture.cc | 78 +- src/lib/arch/riscv/ExceptionHandler.cc | 56 +- src/lib/arch/riscv/Instruction.cc | 6 +- src/lib/arch/riscv/InstructionMetadata.cc | 5 +- src/lib/arch/riscv/Instruction_decode.cc | 78 +- src/lib/arch/riscv/Instruction_execute.cc | 85 +- src/lib/arch/riscv/SystemRegister.cc | 124 + src/lib/kernel/Linux.cc | 10 +- src/lib/kernel/LinuxProcess.cc | 14 +- src/lib/models/emulation/Core.cc | 12 +- src/lib/models/mcu/Core.cc | 515 ++ src/lib/pipeline/FetchUnit.cc | 2 +- src/lib/pipeline_hi/DecodeUnit.cc | 117 + src/lib/pipeline_hi/DispatchIssueUnit.cc | 269 + src/lib/pipeline_hi/ExecuteUnit.cc | 255 + src/lib/pipeline_hi/FetchUnit.cc | 265 + src/lib/pipeline_hi/LoadStoreQueue.cc | 315 + src/lib/pipeline_hi/RegDepMap.cc | 143 + src/lib/pipeline_hi/RegisterAliasTable.cc | 110 + src/lib/pipeline_hi/ReorderBuffer.cc | 206 + src/lib/pipeline_hi/StaticPredictor.cc | 120 + src/lib/pipeline_hi/WritebackUnit.cc | 74 + src/tools/simeng/main.cc | 4 +- sst/SimEngCoreWrapper.cc | 94 +- sst/SimEngMemInterface.cc | 13 +- sst/config/mcu_int_example_config.py | 74 + sst/include/SimEngCoreWrapper.hh | 4 +- sst/include/SimEngMemInterface.hh | 2 +- 73 files changed, 12224 insertions(+), 234 deletions(-) create mode 100644 Makefile create mode 100644 configs/DEMO_RISCV32_mcu.yaml create mode 100644 configs/DEMO_RISCV32_mcu_sst.yaml create mode 100644 share_ext/share_sample_mcu_model.patch create mode 100644 src/include/simeng/arch/riscv/SystemRegister.hh create mode 100644 src/include/simeng/models/mcu/Core.hh create mode 100644 src/include/simeng/pipeline_hi/DecodeUnit.hh create mode 100644 src/include/simeng/pipeline_hi/DispatchIssueUnit.hh create mode 100644 src/include/simeng/pipeline_hi/ExecuteUnit.hh create mode 100644 src/include/simeng/pipeline_hi/FetchUnit.hh create mode 100644 src/include/simeng/pipeline_hi/LoadStoreQueue.hh create mode 100644 src/include/simeng/pipeline_hi/PipelineBuffer.hh create mode 100644 src/include/simeng/pipeline_hi/PipelineBuffer1.hh create mode 100644 src/include/simeng/pipeline_hi/PortAllocator.hh create mode 100644 src/include/simeng/pipeline_hi/RegDepMap.hh create mode 100644 src/include/simeng/pipeline_hi/RegisterAliasTable.hh create mode 100644 src/include/simeng/pipeline_hi/ReorderBuffer.hh create mode 100644 src/include/simeng/pipeline_hi/StaticPredictor.hh create mode 100644 src/include/simeng/pipeline_hi/WritebackUnit.hh create mode 100644 src/lib/arch/riscv/SystemRegister.cc create mode 100644 src/lib/models/mcu/Core.cc create mode 100644 src/lib/pipeline_hi/DecodeUnit.cc create mode 100644 src/lib/pipeline_hi/DispatchIssueUnit.cc create mode 100644 src/lib/pipeline_hi/ExecuteUnit.cc create mode 100644 src/lib/pipeline_hi/FetchUnit.cc create mode 100644 src/lib/pipeline_hi/LoadStoreQueue.cc create mode 100644 src/lib/pipeline_hi/RegDepMap.cc create mode 100644 src/lib/pipeline_hi/RegisterAliasTable.cc create mode 100644 src/lib/pipeline_hi/ReorderBuffer.cc create mode 100644 src/lib/pipeline_hi/StaticPredictor.cc create mode 100644 src/lib/pipeline_hi/WritebackUnit.cc create mode 100644 sst/config/mcu_int_example_config.py diff --git a/CMakeLists.txt b/CMakeLists.txt index ccbc9074a0..0a95e01796 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -50,8 +50,8 @@ FetchContent_Declare( FetchContent_Declare( capstone-lib GIT_REPOSITORY https://github.com/UoB-HPC/capstone.git - GIT_TAG next - GIT_PROGRESS TRUE + GIT_TAG next + GIT_PROGRESS TRUE # Old Git tag pre-Armv9.2 # GIT_TAG e7be7d99e718ef9741026b80fc6f5e100fdf4f94 # trunk diff --git a/Makefile b/Makefile new file mode 100644 index 0000000000..0029c8cc61 --- /dev/null +++ b/Makefile @@ -0,0 +1,33 @@ +# Helper setup to build simeng binary + +NPROC ?= 4 +CMAKE ?= /data/tools/cmake/cmake-3.21.0-linux-x86_64/bin/cmake +BUILD_DIR ?= build +TYPE ?= Debug +INSTALLDIR ?= $(shell pwd)/install +TEST_FLAG ?= OFF +SST_FLAG ?= ON +SST_CORE_INSTALLDIR ?= $(SST_CORE_HOME) + +all: configure build install + +configure: clean + $(CMAKE) -B $(BUILD_DIR) -S . -DCMAKE_BUILD_TYPE=$(TYPE) -DCMAKE_INSTALL_PREFIX=$(INSTALLDIR) -DSIMENG_ENABLE_TESTS=$(TEST_FLAG) -DSIMENG_USE_EXTERNAL_LLVM=ON -DSIMENG_ENABLE_SST=$(SST_FLAG) -DSST_INSTALL_DIR=$(SST_CORE_INSTALLDIR) -DLLVM_DIR=/usr/lib/llvm-12/lib/ + +build: + $(CMAKE) --build $(BUILD_DIR) -j $(NPROC) + +test: + $(CMAKE) --build $(BUILD_DIR) -j $(NPROC) --target test + +install: + $(CMAKE) --build $(BUILD_DIR) -j $(NPROC) --target install + +run_sst_example: + sst sst/config/eacf_int_example_config.py + +clean: + rm -rf build + +#.PHONY : all configure build test install run_sst_example clean +.PHONY : * diff --git a/README_RV32.md b/README_RV32.md index fe5942068e..f9587791c2 100644 --- a/README_RV32.md +++ b/README_RV32.md @@ -11,3 +11,21 @@ - Added an alternative implementation of pipeline buffer with variable latency support. - Supports 0 delay that is benefitial for merging pipeline stages if required. - Supports more than 1 cycle delay between pipeline stages. + +# SimEng Update to share the sample implementation of the MicroController (MCU) class core model using 32-bit RISC-V ISA +- Small MCU like three stage pipeline core model +- Additonal fixed memory support for LSU in the mcu core +- Some update to ELF loader and SST image loading to SST memory +- Makefile to build and run +- Added support for memory mapped system registers, +- Used to add a HostTargetInterface for I/O and termination so that spike binaries can run on SimEng +- Added interrupt support; +- Fixed csrc handling; +- Fixed 32-bit sltiu instruction; +- Fixed 32-bit mulh, mulhu and mulhsu instructions +- Add support for interrupt by flushing the pipe at execution stage when an interrupt is visible, fix iteration count being int in main.cc +- Some bug fixes + +# Capstone change required for RV32 compresses instruction usage in file include/capstone/capstone.h +CS_MODE_RISV32GC = CS_MODE_RISCV32 | CS_MODE_RISCVC, ///< RISCV RV32GC +- diff --git a/configs/DEMO_RISCV.yaml b/configs/DEMO_RISCV.yaml index e5a11d3c54..15b61ed5e2 100644 --- a/configs/DEMO_RISCV.yaml +++ b/configs/DEMO_RISCV.yaml @@ -5,9 +5,10 @@ Core: ISA: rv64 - Simulation-Mode: outoforder + Simulation-Mode: emulation Clock-Frequency: 2.5 Fetch-Block-Size: 32 + Trace: True Fetch: Fetch-Block-Size: 32 Loop-Buffer-Size: 0 @@ -36,7 +37,7 @@ Branch-Predictor: Branch-Predictor: BTB-bitlength: 16 L1-Data-Memory: - Interface-Type: Fixed + Interface-Type: Flat L1-Instruction-Memory: Interface-Type: Flat LSQ-L1-Interface: diff --git a/configs/DEMO_RISCV32_mcu.yaml b/configs/DEMO_RISCV32_mcu.yaml new file mode 100644 index 0000000000..2e7983e178 --- /dev/null +++ b/configs/DEMO_RISCV32_mcu.yaml @@ -0,0 +1,145 @@ +--- +# The following resources where utilised to create the config file and naming schemes: +# https://en.wikichip.org/wiki/cavium/microarchitectures/vulcan + +Core: + ISA: rv32 + Simulation-Mode: mcu + Clock-Frequency: 2.5 + Fetch-Block-Size: 32 + Trace: True + EnableHaltCheck: True + MaxStallCycleTimeout: 10000 + MaxSimCycleTimeout: 1000000000 + MaxInstrTimeout: 1000000000 +Fetch: + Fetch-Block-Size: 32 + Loop-Buffer-Size: 0 + Loop-Detection-Threshold: 0 +Process-Image: + Heap-Size: 1073741824 + Stack-Size: 1048576 +Register-Set: + GeneralPurpose-Count: 32 + FloatingPoint-Count: 32 +Pipeline-Widths: + Commit: 4 + Dispatch-Rate: 4 + FrontEnd: 4 + LSQ-Completion: 2 +Queue-Sizes: + ROB: 180 + Load: 64 + Store: 36 +Branch-Predictor: + BTB-Tag-Bits: 11 + Saturating-Count-Bits: 2 + Global-History-Length: 10 + RAS-entries: 1 # need change; tmp solution: staticPred header file + Fallback-Static-Predictor: "Always-Taken" + BTB-bitlength: 16 + Static-Type: "Always-Taken" +L1-Data-Memory: + Interface-Type: Fixed +L1-Instruction-Memory: + Interface-Type: Flat +LSQ-L1-Interface: + Access-Latency: 1 + Exclusive: False + Load-Bandwidth: 32 + Store-Bandwidth: 16 + Permitted-Requests-Per-Cycle: 2 + Permitted-Loads-Per-Cycle: 2 + Permitted-Stores-Per-Cycle: 1 +Ports: + 0: + Portname: Port 0 + Instruction-Support: + - INT_SIMPLE + - INT_MUL + 1: + Portname: Port 1 + Instruction-Support: + - INT + 2: + Portname: Port 2 + Instruction-Support: + - INT_SIMPLE + - INT_MUL + - BRANCH + 3: + Portname: Port 4 + Instruction-Support: + - LOAD + 4: + Portname: Port 5 + Instruction-Support: + - LOAD + 5: + Portname: Port 3 + Instruction-Support: + - STORE +Reservation-Stations: + 0: + Size: 60 + Dispatch-Rate: 4 + Ports: + - Port 0 + - Port 1 + - Port 2 + - Port 4 + - Port 5 + - Port 3 +Execution-Units: + 0: + Pipelined: True + 1: + Pipelined: True + 2: + Pipelined: True + 3: + Pipelined: True + 4: + Pipelined: True + 5: + Pipelined: True +Latencies: + 0: + Instruction-Groups: + - INT_SIMPLE_ARTH + - INT_SIMPLE_LOGICAL + Execution-Latency: 1 + Execution-Throughput: 1 + 1: + Instruction-Groups: + - INT_MUL + Execution-Latency: 1 + Execution-Throughput: 1 + 2: + Instruction-Groups: + - INT_DIV + Execution-Latency: 4 + Execution-Throughput: 4 +# CPU-Info mainly used to generate a replica of the special (or system) file directory +# structure +CPU-Info: + # Set Generate-Special-Dir to 'T' to generate the special files directory, or to 'F' to not. + # (Not generating the special files directory may require the user to copy over files manually) + Generate-Special-Dir: true + # Core-Count MUST be 1 as multi-core is not supported at this time. (TX2 true value is 32) + Core-Count: 1 + # Socket-Count MUST be 1 as multi-socket simulations are not supported at this time. (TX2 true value is 2) + Socket-Count: 1 + # SMT MUST be 1 as Simultanious-Multi-Threading is not supported at this time. (TX2 true value is 4) + SMT: 1 + # Below are the values needed to generate /proc/cpuinfo + BogoMIPS: 400.00 + Features: fp asimd evtstrm aes pmull sha1 sha2 crc32 atomics cpuid asimdrdm + CPU-Implementer: "0x43" + CPU-Architecture: 8 + CPU-Variant: "0x1" + CPU-Part: "0x0af" + CPU-Revision: 2 + # Package-Count is used to generate + # /sys/devices/system/cpu/cpu{0..Core-Count}/topology/{physical_package_id, core_id} + Package-Count: 1 \ No newline at end of file diff --git a/configs/DEMO_RISCV32_mcu_sst.yaml b/configs/DEMO_RISCV32_mcu_sst.yaml new file mode 100644 index 0000000000..42e4c5d87f --- /dev/null +++ b/configs/DEMO_RISCV32_mcu_sst.yaml @@ -0,0 +1,145 @@ +--- +# The following resources where utilised to create the config file and naming schemes: +# https://en.wikichip.org/wiki/cavium/microarchitectures/vulcan + +Core: + ISA: rv32 + Simulation-Mode: mcu + Clock-Frequency: 2.5 + Fetch-Block-Size: 32 + Trace: True + EnableHaltCheck: False + MaxStallCycleTimeout: 10000 + MaxSimCycleTimeout: 1000000000 + MaxInstrTimeout: 1000000000 +Fetch: + Fetch-Block-Size: 32 + Loop-Buffer-Size: 0 + Loop-Detection-Threshold: 0 +Process-Image: + Heap-Size: 1073741824 + Stack-Size: 1048576 +Register-Set: + GeneralPurpose-Count: 32 + FloatingPoint-Count: 32 +Pipeline-Widths: + Commit: 4 + Dispatch-Rate: 4 + FrontEnd: 4 + LSQ-Completion: 2 +Queue-Sizes: + ROB: 180 + Load: 64 + Store: 36 +Branch-Predictor: + BTB-Tag-Bits: 11 + Saturating-Count-Bits: 2 + Global-History-Length: 10 + RAS-entries: 1 # need change; tmp solution: staticPred header file + Fallback-Static-Predictor: "Always-Taken" + BTB-bitlength: 16 + Static-Type: "Always-Taken" +L1-Data-Memory: + Interface-Type: External +L1-Instruction-Memory: + Interface-Type: Flat +LSQ-L1-Interface: + Access-Latency: 1 + Exclusive: False + Load-Bandwidth: 32 + Store-Bandwidth: 16 + Permitted-Requests-Per-Cycle: 2 + Permitted-Loads-Per-Cycle: 2 + Permitted-Stores-Per-Cycle: 1 +Ports: + 0: + Portname: Port 0 + Instruction-Support: + - INT_SIMPLE + - INT_MUL + 1: + Portname: Port 1 + Instruction-Support: + - INT + 2: + Portname: Port 2 + Instruction-Support: + - INT_SIMPLE + - INT_MUL + - BRANCH + 3: + Portname: Port 4 + Instruction-Support: + - LOAD + 4: + Portname: Port 5 + Instruction-Support: + - LOAD + 5: + Portname: Port 3 + Instruction-Support: + - STORE +Reservation-Stations: + 0: + Size: 60 + Dispatch-Rate: 4 + Ports: + - Port 0 + - Port 1 + - Port 2 + - Port 4 + - Port 5 + - Port 3 +Execution-Units: + 0: + Pipelined: True + 1: + Pipelined: True + 2: + Pipelined: True + 3: + Pipelined: True + 4: + Pipelined: True + 5: + Pipelined: True +Latencies: + 0: + Instruction-Groups: + - INT_SIMPLE_ARTH + - INT_SIMPLE_LOGICAL + Execution-Latency: 1 + Execution-Throughput: 1 + 1: + Instruction-Groups: + - INT_MUL + Execution-Latency: 1 + Execution-Throughput: 1 + 2: + Instruction-Groups: + - INT_DIV + Execution-Latency: 4 + Execution-Throughput: 4 +# CPU-Info mainly used to generate a replica of the special (or system) file directory +# structure +CPU-Info: + # Set Generate-Special-Dir to 'T' to generate the special files directory, or to 'F' to not. + # (Not generating the special files directory may require the user to copy over files manually) + Generate-Special-Dir: true + # Core-Count MUST be 1 as multi-core is not supported at this time. (TX2 true value is 32) + Core-Count: 1 + # Socket-Count MUST be 1 as multi-socket simulations are not supported at this time. (TX2 true value is 2) + Socket-Count: 1 + # SMT MUST be 1 as Simultanious-Multi-Threading is not supported at this time. (TX2 true value is 4) + SMT: 1 + # Below are the values needed to generate /proc/cpuinfo + BogoMIPS: 400.00 + Features: fp asimd evtstrm aes pmull sha1 sha2 crc32 atomics cpuid asimdrdm + CPU-Implementer: "0x43" + CPU-Architecture: 8 + CPU-Variant: "0x1" + CPU-Part: "0x0af" + CPU-Revision: 2 + # Package-Count is used to generate + # /sys/devices/system/cpu/cpu{0..Core-Count}/topology/{physical_package_id, core_id} + Package-Count: 1 diff --git a/share_ext/share_sample_mcu_model.patch b/share_ext/share_sample_mcu_model.patch new file mode 100644 index 0000000000..f6cc3acb43 --- /dev/null +++ b/share_ext/share_sample_mcu_model.patch @@ -0,0 +1,6824 @@ +diff --git a/CMakeLists.txt b/CMakeLists.txt +index ccbc9074..0a95e017 100644 +--- a/CMakeLists.txt ++++ b/CMakeLists.txt +@@ -50,8 +50,8 @@ FetchContent_Declare( + FetchContent_Declare( + capstone-lib + GIT_REPOSITORY https://github.com/UoB-HPC/capstone.git +- GIT_TAG next +- GIT_PROGRESS TRUE ++ GIT_TAG next ++ GIT_PROGRESS TRUE + + # Old Git tag pre-Armv9.2 + # GIT_TAG e7be7d99e718ef9741026b80fc6f5e100fdf4f94 # trunk +diff --git a/Makefile b/Makefile +new file mode 100644 +index 00000000..0029c8cc +--- /dev/null ++++ b/Makefile +@@ -0,0 +1,33 @@ ++# Helper setup to build simeng binary ++ ++NPROC ?= 4 ++CMAKE ?= /data/tools/cmake/cmake-3.21.0-linux-x86_64/bin/cmake ++BUILD_DIR ?= build ++TYPE ?= Debug ++INSTALLDIR ?= $(shell pwd)/install ++TEST_FLAG ?= OFF ++SST_FLAG ?= ON ++SST_CORE_INSTALLDIR ?= $(SST_CORE_HOME) ++ ++all: configure build install ++ ++configure: clean ++ $(CMAKE) -B $(BUILD_DIR) -S . -DCMAKE_BUILD_TYPE=$(TYPE) -DCMAKE_INSTALL_PREFIX=$(INSTALLDIR) -DSIMENG_ENABLE_TESTS=$(TEST_FLAG) -DSIMENG_USE_EXTERNAL_LLVM=ON -DSIMENG_ENABLE_SST=$(SST_FLAG) -DSST_INSTALL_DIR=$(SST_CORE_INSTALLDIR) -DLLVM_DIR=/usr/lib/llvm-12/lib/ ++ ++build: ++ $(CMAKE) --build $(BUILD_DIR) -j $(NPROC) ++ ++test: ++ $(CMAKE) --build $(BUILD_DIR) -j $(NPROC) --target test ++ ++install: ++ $(CMAKE) --build $(BUILD_DIR) -j $(NPROC) --target install ++ ++run_sst_example: ++ sst sst/config/eacf_int_example_config.py ++ ++clean: ++ rm -rf build ++ ++#.PHONY : all configure build test install run_sst_example clean ++.PHONY : * +diff --git a/configs/DEMO_RISCV.yaml b/configs/DEMO_RISCV.yaml +index e5a11d3c..15b61ed5 100644 +--- a/configs/DEMO_RISCV.yaml ++++ b/configs/DEMO_RISCV.yaml +@@ -5,9 +5,10 @@ + + Core: + ISA: rv64 +- Simulation-Mode: outoforder ++ Simulation-Mode: emulation + Clock-Frequency: 2.5 + Fetch-Block-Size: 32 ++ Trace: True + Fetch: + Fetch-Block-Size: 32 + Loop-Buffer-Size: 0 +@@ -36,7 +37,7 @@ Branch-Predictor: + Branch-Predictor: + BTB-bitlength: 16 + L1-Data-Memory: +- Interface-Type: Fixed ++ Interface-Type: Flat + L1-Instruction-Memory: + Interface-Type: Flat + LSQ-L1-Interface: +diff --git a/configs/DEMO_RISCV32_mcu.yaml b/configs/DEMO_RISCV32_mcu.yaml +new file mode 100644 +index 00000000..2e7983e1 +--- /dev/null ++++ b/configs/DEMO_RISCV32_mcu.yaml +@@ -0,0 +1,145 @@ ++--- ++# The following resources where utilised to create the config file and naming schemes: ++# https://en.wikichip.org/wiki/cavium/microarchitectures/vulcan ++ ++Core: ++ ISA: rv32 ++ Simulation-Mode: mcu ++ Clock-Frequency: 2.5 ++ Fetch-Block-Size: 32 ++ Trace: True ++ EnableHaltCheck: True ++ MaxStallCycleTimeout: 10000 ++ MaxSimCycleTimeout: 1000000000 ++ MaxInstrTimeout: 1000000000 ++Fetch: ++ Fetch-Block-Size: 32 ++ Loop-Buffer-Size: 0 ++ Loop-Detection-Threshold: 0 ++Process-Image: ++ Heap-Size: 1073741824 ++ Stack-Size: 1048576 ++Register-Set: ++ GeneralPurpose-Count: 32 ++ FloatingPoint-Count: 32 ++Pipeline-Widths: ++ Commit: 4 ++ Dispatch-Rate: 4 ++ FrontEnd: 4 ++ LSQ-Completion: 2 ++Queue-Sizes: ++ ROB: 180 ++ Load: 64 ++ Store: 36 ++Branch-Predictor: ++ BTB-Tag-Bits: 11 ++ Saturating-Count-Bits: 2 ++ Global-History-Length: 10 ++ RAS-entries: 1 # need change; tmp solution: staticPred header file ++ Fallback-Static-Predictor: "Always-Taken" ++ BTB-bitlength: 16 ++ Static-Type: "Always-Taken" ++L1-Data-Memory: ++ Interface-Type: Fixed ++L1-Instruction-Memory: ++ Interface-Type: Flat ++LSQ-L1-Interface: ++ Access-Latency: 1 ++ Exclusive: False ++ Load-Bandwidth: 32 ++ Store-Bandwidth: 16 ++ Permitted-Requests-Per-Cycle: 2 ++ Permitted-Loads-Per-Cycle: 2 ++ Permitted-Stores-Per-Cycle: 1 ++Ports: ++ 0: ++ Portname: Port 0 ++ Instruction-Support: ++ - INT_SIMPLE ++ - INT_MUL ++ 1: ++ Portname: Port 1 ++ Instruction-Support: ++ - INT ++ 2: ++ Portname: Port 2 ++ Instruction-Support: ++ - INT_SIMPLE ++ - INT_MUL ++ - BRANCH ++ 3: ++ Portname: Port 4 ++ Instruction-Support: ++ - LOAD ++ 4: ++ Portname: Port 5 ++ Instruction-Support: ++ - LOAD ++ 5: ++ Portname: Port 3 ++ Instruction-Support: ++ - STORE ++Reservation-Stations: ++ 0: ++ Size: 60 ++ Dispatch-Rate: 4 ++ Ports: ++ - Port 0 ++ - Port 1 ++ - Port 2 ++ - Port 4 ++ - Port 5 ++ - Port 3 ++Execution-Units: ++ 0: ++ Pipelined: True ++ 1: ++ Pipelined: True ++ 2: ++ Pipelined: True ++ 3: ++ Pipelined: True ++ 4: ++ Pipelined: True ++ 5: ++ Pipelined: True ++Latencies: ++ 0: ++ Instruction-Groups: ++ - INT_SIMPLE_ARTH ++ - INT_SIMPLE_LOGICAL ++ Execution-Latency: 1 ++ Execution-Throughput: 1 ++ 1: ++ Instruction-Groups: ++ - INT_MUL ++ Execution-Latency: 1 ++ Execution-Throughput: 1 ++ 2: ++ Instruction-Groups: ++ - INT_DIV ++ Execution-Latency: 4 ++ Execution-Throughput: 4 ++# CPU-Info mainly used to generate a replica of the special (or system) file directory ++# structure ++CPU-Info: ++ # Set Generate-Special-Dir to 'T' to generate the special files directory, or to 'F' to not. ++ # (Not generating the special files directory may require the user to copy over files manually) ++ Generate-Special-Dir: true ++ # Core-Count MUST be 1 as multi-core is not supported at this time. (TX2 true value is 32) ++ Core-Count: 1 ++ # Socket-Count MUST be 1 as multi-socket simulations are not supported at this time. (TX2 true value is 2) ++ Socket-Count: 1 ++ # SMT MUST be 1 as Simultanious-Multi-Threading is not supported at this time. (TX2 true value is 4) ++ SMT: 1 ++ # Below are the values needed to generate /proc/cpuinfo ++ BogoMIPS: 400.00 ++ Features: fp asimd evtstrm aes pmull sha1 sha2 crc32 atomics cpuid asimdrdm ++ CPU-Implementer: "0x43" ++ CPU-Architecture: 8 ++ CPU-Variant: "0x1" ++ CPU-Part: "0x0af" ++ CPU-Revision: 2 ++ # Package-Count is used to generate ++ # /sys/devices/system/cpu/cpu{0..Core-Count}/topology/{physical_package_id, core_id} ++ Package-Count: 1 +\ No newline at end of file +diff --git a/configs/DEMO_RISCV32_mcu_sst.yaml b/configs/DEMO_RISCV32_mcu_sst.yaml +new file mode 100644 +index 00000000..42e4c5d8 +--- /dev/null ++++ b/configs/DEMO_RISCV32_mcu_sst.yaml +@@ -0,0 +1,145 @@ ++--- ++# The following resources where utilised to create the config file and naming schemes: ++# https://en.wikichip.org/wiki/cavium/microarchitectures/vulcan ++ ++Core: ++ ISA: rv32 ++ Simulation-Mode: mcu ++ Clock-Frequency: 2.5 ++ Fetch-Block-Size: 32 ++ Trace: True ++ EnableHaltCheck: False ++ MaxStallCycleTimeout: 10000 ++ MaxSimCycleTimeout: 1000000000 ++ MaxInstrTimeout: 1000000000 ++Fetch: ++ Fetch-Block-Size: 32 ++ Loop-Buffer-Size: 0 ++ Loop-Detection-Threshold: 0 ++Process-Image: ++ Heap-Size: 1073741824 ++ Stack-Size: 1048576 ++Register-Set: ++ GeneralPurpose-Count: 32 ++ FloatingPoint-Count: 32 ++Pipeline-Widths: ++ Commit: 4 ++ Dispatch-Rate: 4 ++ FrontEnd: 4 ++ LSQ-Completion: 2 ++Queue-Sizes: ++ ROB: 180 ++ Load: 64 ++ Store: 36 ++Branch-Predictor: ++ BTB-Tag-Bits: 11 ++ Saturating-Count-Bits: 2 ++ Global-History-Length: 10 ++ RAS-entries: 1 # need change; tmp solution: staticPred header file ++ Fallback-Static-Predictor: "Always-Taken" ++ BTB-bitlength: 16 ++ Static-Type: "Always-Taken" ++L1-Data-Memory: ++ Interface-Type: External ++L1-Instruction-Memory: ++ Interface-Type: Flat ++LSQ-L1-Interface: ++ Access-Latency: 1 ++ Exclusive: False ++ Load-Bandwidth: 32 ++ Store-Bandwidth: 16 ++ Permitted-Requests-Per-Cycle: 2 ++ Permitted-Loads-Per-Cycle: 2 ++ Permitted-Stores-Per-Cycle: 1 ++Ports: ++ 0: ++ Portname: Port 0 ++ Instruction-Support: ++ - INT_SIMPLE ++ - INT_MUL ++ 1: ++ Portname: Port 1 ++ Instruction-Support: ++ - INT ++ 2: ++ Portname: Port 2 ++ Instruction-Support: ++ - INT_SIMPLE ++ - INT_MUL ++ - BRANCH ++ 3: ++ Portname: Port 4 ++ Instruction-Support: ++ - LOAD ++ 4: ++ Portname: Port 5 ++ Instruction-Support: ++ - LOAD ++ 5: ++ Portname: Port 3 ++ Instruction-Support: ++ - STORE ++Reservation-Stations: ++ 0: ++ Size: 60 ++ Dispatch-Rate: 4 ++ Ports: ++ - Port 0 ++ - Port 1 ++ - Port 2 ++ - Port 4 ++ - Port 5 ++ - Port 3 ++Execution-Units: ++ 0: ++ Pipelined: True ++ 1: ++ Pipelined: True ++ 2: ++ Pipelined: True ++ 3: ++ Pipelined: True ++ 4: ++ Pipelined: True ++ 5: ++ Pipelined: True ++Latencies: ++ 0: ++ Instruction-Groups: ++ - INT_SIMPLE_ARTH ++ - INT_SIMPLE_LOGICAL ++ Execution-Latency: 1 ++ Execution-Throughput: 1 ++ 1: ++ Instruction-Groups: ++ - INT_MUL ++ Execution-Latency: 1 ++ Execution-Throughput: 1 ++ 2: ++ Instruction-Groups: ++ - INT_DIV ++ Execution-Latency: 4 ++ Execution-Throughput: 4 ++# CPU-Info mainly used to generate a replica of the special (or system) file directory ++# structure ++CPU-Info: ++ # Set Generate-Special-Dir to 'T' to generate the special files directory, or to 'F' to not. ++ # (Not generating the special files directory may require the user to copy over files manually) ++ Generate-Special-Dir: true ++ # Core-Count MUST be 1 as multi-core is not supported at this time. (TX2 true value is 32) ++ Core-Count: 1 ++ # Socket-Count MUST be 1 as multi-socket simulations are not supported at this time. (TX2 true value is 2) ++ Socket-Count: 1 ++ # SMT MUST be 1 as Simultanious-Multi-Threading is not supported at this time. (TX2 true value is 4) ++ SMT: 1 ++ # Below are the values needed to generate /proc/cpuinfo ++ BogoMIPS: 400.00 ++ Features: fp asimd evtstrm aes pmull sha1 sha2 crc32 atomics cpuid asimdrdm ++ CPU-Implementer: "0x43" ++ CPU-Architecture: 8 ++ CPU-Variant: "0x1" ++ CPU-Part: "0x0af" ++ CPU-Revision: 2 ++ # Package-Count is used to generate ++ # /sys/devices/system/cpu/cpu{0..Core-Count}/topology/{physical_package_id, core_id} ++ Package-Count: 1 +diff --git a/src/include/simeng/BranchPredictor.hh b/src/include/simeng/BranchPredictor.hh +index 88be07dd..8d76f087 100644 +--- a/src/include/simeng/BranchPredictor.hh ++++ b/src/include/simeng/BranchPredictor.hh +@@ -46,6 +46,11 @@ class BranchPredictor { + public: + virtual ~BranchPredictor(){}; + ++ /** Overload predict() with more information in parameters */ ++ virtual BranchPrediction predict(uint64_t address, BranchType type, ++ uint64_t knownTarget, uint8_t instByteLength) ++ = 0; ++ + /** Generate a branch prediction for the specified instruction address with a + * branch type and possible known target. */ + virtual BranchPrediction predict(uint64_t address, BranchType type, +diff --git a/src/include/simeng/CoreInstance.hh b/src/include/simeng/CoreInstance.hh +index c8e151e8..e4d5b232 100644 +--- a/src/include/simeng/CoreInstance.hh ++++ b/src/include/simeng/CoreInstance.hh +@@ -16,6 +16,7 @@ + #include "simeng/kernel/Linux.hh" + #include "simeng/models/emulation/Core.hh" + #include "simeng/models/inorder/Core.hh" ++#include "simeng/models/mcu/Core.hh" + #include "simeng/models/outoforder/Core.hh" + #include "simeng/pipeline/A64FXPortAllocator.hh" + #include "simeng/pipeline/BalancedPortAllocator.hh" +@@ -37,7 +38,7 @@ uint32_t hex_[] = { + namespace simeng { + + /** The available modes of simulation. */ +-enum class SimulationMode { Emulation, InOrderPipelined, OutOfOrder }; ++enum class SimulationMode { Emulation, InOrderPipelined, MCU, OutOfOrder }; + + /** A class to create a SimEng core instance from a supplied config. */ + class CoreInstance { +diff --git a/src/include/simeng/Elf.hh b/src/include/simeng/Elf.hh +index 14bcddcb..485debea 100644 +--- a/src/include/simeng/Elf.hh ++++ b/src/include/simeng/Elf.hh +@@ -2,6 +2,7 @@ + + #include + #include ++#include + + #include "simeng/span.hh" + +@@ -30,23 +31,85 @@ struct Elf32Header { + uint32_t memorySize; + }; + ++typedef struct { ++ unsigned char e_ident[16]; ++ uint16_t e_type; ++ uint16_t e_machine; ++ uint32_t e_version; ++ uint32_t e_entry; ++ uint32_t e_phoff; ++ uint32_t e_shoff; ++ uint32_t e_flags; ++ uint16_t e_ehsize; ++ uint16_t e_phentsize; ++ uint16_t e_phnum; ++ uint16_t e_shentsize; ++ uint16_t e_shnum; ++ uint16_t e_shstrndx; ++} Elf32_Ehdr; ++ ++typedef struct { ++ uint32_t p_type; ++ uint32_t p_offset; ++ uint32_t p_vaddr; ++ uint32_t p_paddr; ++ uint32_t p_filesz; ++ uint32_t p_memsz; ++ uint32_t p_flags; ++ uint32_t p_align; ++} Elf32_Phdr; ++ ++typedef struct { ++ uint32_t sh_name; ++ uint32_t sh_type; ++ uint32_t sh_flags; ++ uint32_t sh_addr; ++ uint32_t sh_offset; ++ uint32_t sh_size; ++ uint32_t sh_link; ++ uint32_t sh_info; ++ uint32_t sh_addralign; ++ uint32_t sh_entsize; ++} Elf32_Shdr; ++ ++typedef struct { ++ uint32_t st_name; ++ uint32_t st_value; ++ uint32_t st_size; ++ unsigned char st_info; ++ unsigned char st_other; ++ uint16_t st_shndx; ++} Elf32_Sym; ++ ++enum ElfPhType { ++ PT_NULL, ++ PT_LOAD ++}; ++ ++enum ElfShType { ++ SHT_NULL, ++ SHT_PROGBITS, ++ SHT_SYMTAB, ++ SHT_STRTAB ++}; ++ + /** A processed Executable and Linkable Format (ELF) file. */ + class Elf { +- public: +- Elf(std::string path, char** imagePointer); +- ~Elf(); +- uint64_t getProcessImageSize() const; +- bool isValid() const; +- uint64_t getEntryPoint() const; +- +- private: +- uint64_t entryPoint_; +- std::vector headers_; +- uint32_t entryPoint32_; +- std::vector headers32_; +- bool isValid_ = false; +- uint64_t processImageSize_; +- bool mode32bit_; ++ public: ++ Elf(std::string path, char** imagePointer, std::unordered_map& symbols); ++ ~Elf(); ++ uint64_t getProcessImageSize() const; ++ bool isValid() const; ++ uint64_t getEntryPoint() const; ++ ++ private: ++ uint64_t entryPoint_; ++ std::vector headers_; ++ uint32_t entryPoint32_; ++ std::vector headers32_; ++ bool isValid_ = false; ++ uint64_t processImageSize_; ++ bool mode32bit_; + }; + + } // namespace simeng +diff --git a/src/include/simeng/GenericPredictor.hh b/src/include/simeng/GenericPredictor.hh +index 21df57a4..aff5ade8 100644 +--- a/src/include/simeng/GenericPredictor.hh ++++ b/src/include/simeng/GenericPredictor.hh +@@ -26,6 +26,9 @@ class GenericPredictor : public BranchPredictor { + GenericPredictor(YAML::Node config); + ~GenericPredictor(); + ++ BranchPrediction predict(uint64_t address, BranchType type, ++ uint64_t knownTarget, uint8_t byteLength) override; ++ + /** Generate a branch prediction for the supplied instruction address, a + * branch type, and a known target if not 0. Returns a branch direction and + * branch target address. */ +diff --git a/src/include/simeng/Instruction.hh b/src/include/simeng/Instruction.hh +index 8b1cf2f9..9ffc4a8d 100644 +--- a/src/include/simeng/Instruction.hh ++++ b/src/include/simeng/Instruction.hh +@@ -23,6 +23,9 @@ class Instruction { + * instruction. */ + bool exceptionEncountered() const; + ++ /** Binds an interrupt to this instruction */ ++ virtual void raiseInterrupt(int16_t& interruptId) {} ++ + /** Retrieve the source registers this instruction reads. */ + virtual const span getOperandRegisters() const = 0; + +@@ -99,8 +102,8 @@ class Instruction { + /** Retrieve branch type. */ + virtual BranchType getBranchType() const = 0; + +- /** Retrieve a branch target from the instruction's metadata if known. */ +- virtual uint64_t getKnownTarget() const = 0; ++ /** Retrieve an offset of branch target from the instruction's metadata if known. */ ++ virtual uint64_t getKnownOffset() const = 0; + + /** Is this a store address operation (a subcategory of store operations which + * deal with the generation of store addresses to store data at)? */ +@@ -178,6 +181,12 @@ class Instruction { + /** Get arbitrary micro-operation index. */ + int getMicroOpIndex() const; + ++ bool isDiv() const; ++ ++ bool isMul() const; ++ ++ bool isSysCall() const; ++ + protected: + /** Whether an exception has been encountered. */ + bool exceptionEncountered_ = false; +@@ -208,8 +217,8 @@ class Instruction { + /** What type of branch this instruction is. */ + BranchType branchType_ = BranchType::Unknown; + +- /** If the branch target is known at the time of decode, store it. */ +- uint64_t knownTarget_ = 0; ++ /** If the offset of branch target is known at the time of decode, store it. */ ++ uint64_t knownOffset_ = 0; + + // Flushing + /** This instruction's sequence ID; a higher ID represents a chronologically +@@ -252,6 +261,12 @@ class Instruction { + /** An arbitrary index value for the micro-operation. Its use is based on the + * implementation of specific micro-operations. */ + int microOpIndex_; ++ ++ bool isMul_ = false; ++ ++ bool isDiv_ = false; ++ ++ bool isSysCall_ = false; + }; + + } // namespace simeng +\ No newline at end of file +diff --git a/src/include/simeng/arch/Architecture.hh b/src/include/simeng/arch/Architecture.hh +index edd404c8..29874c6d 100644 +--- a/src/include/simeng/arch/Architecture.hh ++++ b/src/include/simeng/arch/Architecture.hh +@@ -101,6 +101,9 @@ class Architecture { + /** Returns the maximum size of a valid instruction in bytes. */ + virtual uint8_t getMaxInstructionSize() const = 0; + ++ /** Returns the minimum size of a valid instruction in bytes. */ ++ virtual uint8_t getMinInstructionSize() const = 0; ++ + /** Returns the physical register structure as defined within the config + * file + */ +@@ -113,7 +116,7 @@ class Architecture { + YAML::Node config) const = 0; + + /** Updates System registers of any system-based timers. */ +- virtual void updateSystemTimerRegisters(RegisterFileSet* regFile, ++ virtual int16_t updateSystemTimerRegisters(RegisterFileSet* regFile, + const uint64_t iterations) const = 0; + + /** Update trace file */ +diff --git a/src/include/simeng/arch/aarch64/Architecture.hh b/src/include/simeng/arch/aarch64/Architecture.hh +index ad14dc1c..3c1ce27f 100644 +--- a/src/include/simeng/arch/aarch64/Architecture.hh ++++ b/src/include/simeng/arch/aarch64/Architecture.hh +@@ -51,6 +51,9 @@ class Architecture : public arch::Architecture { + /** Returns the maximum size of a valid instruction in bytes. */ + uint8_t getMaxInstructionSize() const override; + ++ /** Returns the minimum size of a valid instruction in bytes. */ ++ uint8_t getMinInstructionSize() const override; ++ + /** Returns the current vector length set by the provided configuration. */ + uint64_t getVectorLength() const; + +@@ -59,7 +62,7 @@ class Architecture : public arch::Architecture { + uint64_t getStreamingVectorLength() const; + + /** Updates System registers of any system-based timers. */ +- void updateSystemTimerRegisters(RegisterFileSet* regFile, ++ int16_t updateSystemTimerRegisters(RegisterFileSet* regFile, + const uint64_t iterations) const override; + + /** Returns the physical register structure as defined within the config file +diff --git a/src/include/simeng/arch/aarch64/Instruction.hh b/src/include/simeng/arch/aarch64/Instruction.hh +index 43d1bd49..bffa3c62 100644 +--- a/src/include/simeng/arch/aarch64/Instruction.hh ++++ b/src/include/simeng/arch/aarch64/Instruction.hh +@@ -301,7 +301,7 @@ class Instruction : public simeng::Instruction { + BranchType getBranchType() const override; + + /** Retrieve a branch target from the instruction's metadata if known. */ +- uint64_t getKnownTarget() const override; ++ uint64_t getKnownOffset() const override; + + /** Is this a store address operation (a subcategory of store operations which + * deal with the generation of store addresses to store data at)? */ +diff --git a/src/include/simeng/arch/riscv/Architecture.hh b/src/include/simeng/arch/riscv/Architecture.hh +index de6c76c7..3bdb6287 100644 +--- a/src/include/simeng/arch/riscv/Architecture.hh ++++ b/src/include/simeng/arch/riscv/Architecture.hh +@@ -6,27 +6,18 @@ + #include + + #include "simeng/arch/Architecture.hh" +-#include "simeng/arch/riscv/ExceptionHandler.hh" ++ + #include "simeng/arch/riscv/Instruction.hh" + #include "simeng/kernel/Linux.hh" + + using csh = size_t; + ++#include "simeng/arch/riscv/SystemRegister.hh" ++#include "simeng/arch/riscv/ExceptionHandler.hh" ++ + namespace simeng { + namespace arch { + namespace riscv { +- +-enum riscv_sysreg { +- SYSREG_MSTATUS = 0x300, +- SYSREG_MSTATUSH = 0x310, +- SYSREG_MEPC = 0x341, +- SYSREG_MCAUSE = 0x342, +- SYSREG_MHARTID = 0xF14, +- SYSREG_CYCLE = 0xC00, +- SYSREG_TIME = 0xC01, +- SYSREG_INSTRRET = 0xC02 +-}; +- + struct constantsPool { + const uint8_t alignMask = 0x3; + const uint8_t alignMaskCompressed = 0x1; +@@ -45,7 +36,7 @@ struct archConstants { + /* A basic RISC-V implementation of the `Architecture` interface. */ + class Architecture : public arch::Architecture { + public: +- Architecture(kernel::Linux& kernel, YAML::Node config); ++ Architecture(kernel::Linux& kernel, YAML::Node config, std::shared_ptr& dataMemory); + ~Architecture(); + /** Pre-decode instruction memory into a macro-op of `Instruction` + * instances. Returns the number of bytes consumed to produce it (always 4), +@@ -60,6 +51,9 @@ class Architecture : public arch::Architecture { + /** Returns a zero-indexed register tag for a system register encoding. */ + int32_t getSystemRegisterTag(uint16_t reg) const override; + ++ /** Returns a System Register index from a system register tag. */ ++ uint16_t getSystemRegisterIdFromTag(int32_t tag) const; ++ + /** Returns the number of system registers that have a mapping. */ + uint16_t getNumSystemRegisters() const override; + +@@ -77,8 +71,11 @@ class Architecture : public arch::Architecture { + /** Returns the maximum size of a valid instruction in bytes. */ + uint8_t getMaxInstructionSize() const override; + +- /** Updates System registers of any system-based timers. */ +- void updateSystemTimerRegisters(RegisterFileSet* regFile, ++ /** Returns the minimum size of a valid instruction in bytes. */ ++ uint8_t getMinInstructionSize() const override; ++ ++ /** Updates System registers of any system-based timers. Return +ve id if interrupt occurs */ ++ int16_t updateSystemTimerRegisters(RegisterFileSet* regFile, + const uint64_t iterations) const override; + + /** Returns the physical register structure as defined within the config file +@@ -117,6 +114,18 @@ class Architecture : public arch::Architecture { + /** A mapping from system register encoding to a zero-indexed tag. */ + std::unordered_map systemRegisterMap_; + ++ /** Ordered map of memory mapped system regsiters banks **/ ++ std::map memoryMappedSystemRegisterBlocks; ++ ++ /* Memory Interface through which memory mapped system registers are accessed */ ++ std::shared_ptr systemRegisterMemoryInterface; ++ ++ /* Optional Clint block which replicates that functionality in spike */ ++ std::shared_ptr clint; ++ ++ /* Optional Host Target Interface block which replicates that functionality in spike */ ++ std::shared_ptr htif; ++ + /** A map to hold the relationship between aarch64 instruction groups and + * user-defined execution information. */ + std::unordered_map groupExecutionInfo_; +diff --git a/src/include/simeng/arch/riscv/ExceptionHandler.hh b/src/include/simeng/arch/riscv/ExceptionHandler.hh +index 02d29c93..36cfd5d1 100644 +--- a/src/include/simeng/arch/riscv/ExceptionHandler.hh ++++ b/src/include/simeng/arch/riscv/ExceptionHandler.hh +@@ -57,6 +57,9 @@ class ExceptionHandler : public simeng::arch::ExceptionHandler { + */ + bool readBufferThen(uint64_t ptr, uint64_t length, std::function then, + bool firstCall = true); ++ ++ /** generate system register changes associated with taking an exception **/ ++ void takeException(uint64_t causecode); + + /** A data buffer used for reading data from memory. */ + std::vector dataBuffer; +diff --git a/src/include/simeng/arch/riscv/Instruction.hh b/src/include/simeng/arch/riscv/Instruction.hh +index 3f023d28..60966ce0 100644 +--- a/src/include/simeng/arch/riscv/Instruction.hh ++++ b/src/include/simeng/arch/riscv/Instruction.hh +@@ -48,7 +48,8 @@ enum class InstructionException { + HypervisorCall, + SecureMonitorCall, + UnmappedSysReg, +- NoAvailablePort ++ NoAvailablePort, ++ Interrupt + }; + + enum CInstructionFormat { +@@ -87,6 +88,18 @@ class Instruction : public simeng::Instruction { + * processing this instruction. */ + virtual InstructionException getException() const; + ++ /** Raise an interrupt. */ ++ void raiseInterrupt(int16_t& interruptId) ++ { ++ interruptId_ = interruptId; ++ exceptionEncountered_ = true; ++ exception_ = InstructionException::Interrupt; ++ interruptId = -1; ++ } ++ ++ /** Get Id of this interrupr */ ++ int16_t getInterruptId() const { return interruptId_; } ++ + /** Retrieve the source registers this instruction reads. */ + const span getOperandRegisters() const override; + +@@ -139,8 +152,8 @@ class Instruction : public simeng::Instruction { + /** Retrieve branch type. */ + BranchType getBranchType() const override; + +- /** Retrieve a branch target from the instruction's metadata if known. */ +- uint64_t getKnownTarget() const override; ++ /** Retrieve an offset of branch target from the instruction's metadata if known. */ ++ uint64_t getKnownOffset() const override; + + /** Is this a store address operation (a subcategory of store operations which + * deal with the generation of store addresses to store data at)? */ +@@ -186,6 +199,8 @@ class Instruction : public simeng::Instruction { + /** ONLY valid after decode. Return regByteWidth */ + uint8_t getArchRegWidth() const; + ++ const Architecture& getArchitecture() const; ++ + private: + /** The maximum number of source registers any supported RISC-V instruction + * can have. */ +@@ -292,7 +307,9 @@ class Instruction : public simeng::Instruction { + std::vector memoryData; + + /** Return integer register value, to support both 32-bit and 64-bit mode */ +- int64_t getSignedInt(RegisterValue& value) const; ++ int64_t getSignedInt(RegisterValue& value) const; ++ ++ int16_t interruptId_; + }; + + } // namespace riscv +diff --git a/src/include/simeng/arch/riscv/SystemRegister.hh b/src/include/simeng/arch/riscv/SystemRegister.hh +new file mode 100644 +index 00000000..0556156e +--- /dev/null ++++ b/src/include/simeng/arch/riscv/SystemRegister.hh +@@ -0,0 +1,229 @@ ++#pragma once ++ ++#include ++#include ++#include ++#include ++ ++#include "simeng/arch/Architecture.hh" ++ ++#include "simeng/arch/riscv/Instruction.hh" ++#include "simeng/kernel/Linux.hh" ++ ++namespace simeng { ++namespace arch { ++namespace riscv { ++ ++// Should probably move to Capstone ++ ++enum riscv_sysreg { ++ SYSREG_MSTATUS = 0x300, ++ SYSREG_MIE = 0x304, ++ SYSREG_MTVEC = 0x305, ++ SYSREG_MSTATUSH = 0x310, ++ SYSREG_MSCRATCH = 0x340, ++ SYSREG_MEPC = 0x341, ++ SYSREG_MCAUSE = 0x342, ++ SYSREG_MHARTID = 0xF14, ++ SYSREG_MXCPTSC = 0xFC2, ++ SYSREG_CYCLE = 0xC00, ++ SYSREG_TIME = 0xC01, ++ SYSREG_INSTRRET = 0xC02 ++}; ++ ++enum riscv_causecode_enum { ++ CAUSE_IADDRESS_MISALIGN = 0, ++ CAUSE_IACCESS_FAULT = 1, ++ CAUSE_ILLEGAL_INSTRUCTION = 2, ++ CAUSE_BREAKPOINT = 3, ++ CAUSE_LDADDRESS_MISALIGN = 4, ++ CAUSE_LDACCESS_FAULT = 5, ++ CAUSE_STADDRESS_MISALIGN = 6, ++ CAUSE_STACCESS_FAULT = 7, ++ CAUSE_ECALL_FROM_M = 11 ++}; ++ ++enum class InterruptId { ++ HALT = 1, ++ TIMER = 7 ++}; ++ ++enum riscv_sysreg_masks { ++ MSTATUS_MIE_MASK = 0x8, ++ MSTATUS_MPIE_MASK = 0x80 ++}; ++ ++typedef uint16_t riscv_causecode; ++ ++class MemoryMappedSystemRegister { ++ public: ++ MemoryMappedSystemRegister(const RegisterValue& val) : state(val) {} ++ bool size() { return state.size(); } ++ virtual void put(const RegisterValue& val) { state = val; } ++ virtual const RegisterValue& get() { return state; } ++ private: ++ RegisterValue state; ++}; ++ ++class MemoryMappedSystemRegisterBlock { ++ public: ++ MemoryMappedSystemRegisterBlock(size_t sz) : size_(sz) {} ++ size_t size() { return size_; } ++ virtual bool put(uint16_t, const RegisterValue&); ++ virtual bool get(uint16_t, RegisterValue&); ++ virtual void tick() {} ++ protected: ++ /** Ordered map of memory mapped system regsiters **/ ++ std::map memoryMappedSystemRegisters; ++ size_t size_; ++}; ++ ++class SystemRegisterMemoryInterface : public MemoryInterface { ++ public: ++ SystemRegisterMemoryInterface( ++ std::shared_ptr& dataMemory, ++ std::map& memoryMappedSystemRegisterBlocks ++ ) : ++ dataMemory_(dataMemory), ++ memoryMappedSystemRegisterBlocks_(memoryMappedSystemRegisterBlocks) ++ {} ++ ++ /** Request a read from the supplied target location. */ ++ virtual void requestRead(const MemoryAccessTarget& target, ++ uint64_t requestId = 0) ++ { ++ RegisterValue data(0,target.size); ++ if (getMemoryMappedSystemRegister(target.address, data)) ++ completedReads_.push_back({target, data, requestId}); ++ else ++ dataMemory_.get()->requestRead(target,requestId); ++ } ++ ++ /** Request a write of `data` to the target location. */ ++ virtual void requestWrite(const MemoryAccessTarget& target, ++ const RegisterValue& data) ++ { ++ if (!putMemoryMappedSystemRegister(target.address, data)) ++ dataMemory_.get()->requestWrite(target,data); ++ } ++ ++ /** Retrieve all completed read requests. */ ++ virtual const span getCompletedReads() const ++ { ++ if (completedReads_.empty()) ++ return dataMemory_.get()->getCompletedReads(); ++ else ++ return {const_cast(completedReads_.data()), completedReads_.size()}; ++ } ++ ++ /** Clear the completed reads. */ ++ virtual void clearCompletedReads() ++ { ++ if (completedReads_.empty()) ++ dataMemory_.get()->clearCompletedReads(); ++ else ++ completedReads_.clear(); ++ } ++ ++ /** Returns true if there are any oustanding memory requests in-flight. */ ++ virtual bool hasPendingRequests() const ++ { ++ return dataMemory_.get()->hasPendingRequests(); ++ } ++ ++ /** Tick the memory interface to allow it to process internal tasks. ++ * ++ * TODO: Move ticking out of the memory interface and into a central "memory ++ * system" covering a set of related interfaces. ++ */ ++ virtual void tick() ++ { ++ dataMemory_.get()->tick(); ++ } ++ ++ private : ++ /** Put/Get Memory Mapped Registers */ ++ bool putMemoryMappedSystemRegister(uint64_t address, const RegisterValue& value); ++ bool getMemoryMappedSystemRegister(uint64_t address, RegisterValue& value); ++ ++ std::shared_ptr dataMemory_; ++ ++ /** Address map of all system register blocks */ ++ std::map& memoryMappedSystemRegisterBlocks_; ++ ++ /** A vector containing all completed read requests. */ ++ std::vector completedReads_; ++}; ++ ++class Architecture; ++ ++class HostTargetInterface : public MemoryMappedSystemRegisterBlock { ++ public: ++ enum { ++ PAYLOAD_OFFSET = 0, ++ DEVICEID_OFFSET = 4 ++ }; ++ ++ HostTargetInterface(Architecture& architecture) ++ : ++ MemoryMappedSystemRegisterBlock(8), ++ architecture_(architecture), ++ isHalted_(false) ++ { ++ memoryMappedSystemRegisters[PAYLOAD_OFFSET] = new MemoryMappedSystemRegister(static_cast(0)); ++ memoryMappedSystemRegisters[DEVICEID_OFFSET] = new MemoryMappedSystemRegister(static_cast(0)); ++ } ++ ++ bool put(uint16_t offset, const RegisterValue&value); ++ ++ int16_t updateSystemTimerRegisters(RegisterFileSet* regFile, const uint64_t iterations) { ++ if (isHalted_) ++ return static_cast(InterruptId::HALT); ++ return -1; ++ } ++ ++ private : ++ Architecture& architecture_; ++ bool isHalted_; ++}; ++ ++class Clint : public MemoryMappedSystemRegisterBlock { ++ public: ++ enum { ++ CLINT_BASE = 0x02000000, ++ CLINT_SIZE = 0x0000c000, ++ MTIMECMP_OFFSET = 0x4000, ++ MTIME_OFFSET = 0xbff8 ++ }; ++ ++ Clint(Architecture& architecture) ++ : ++ MemoryMappedSystemRegisterBlock(CLINT_SIZE), ++ architecture_(architecture), ++ mtime_(static_cast(0)), ++ mtimecmp_(static_cast(0)), ++ mtime_freq(100), ++ mtime_count(0), ++ last_tick(0) ++ { ++ memoryMappedSystemRegisters[MTIME_OFFSET] = &mtime_; ++ memoryMappedSystemRegisters[MTIMECMP_OFFSET] = &mtimecmp_; ++ } ++ ++ int16_t updateSystemTimerRegisters(RegisterFileSet* regFile, const uint64_t iterations); ++ ++ private : ++ Architecture& architecture_; ++ ++ MemoryMappedSystemRegister mtime_; ++ MemoryMappedSystemRegister mtimecmp_; ++ ++ uint32_t mtime_freq; ++ uint32_t mtime_count; ++ uint64_t last_tick; ++}; ++ ++ ++} // namespace riscv ++} // namespace arch ++} // namespace simeng +diff --git a/src/include/simeng/kernel/Linux.hh b/src/include/simeng/kernel/Linux.hh +index 0908d590..635bd427 100644 +--- a/src/include/simeng/kernel/Linux.hh ++++ b/src/include/simeng/kernel/Linux.hh +@@ -93,6 +93,8 @@ struct LinuxProcessState { + std::vector fileDescriptorTable; + /** Set of deallocated virtual file descriptors available for reuse. */ + std::set freeFileDescriptors; ++ /** Pointer to LinuxProcess from which ProcessState derived*/ ++ const LinuxProcess* process; + }; + + /** Fixed-width definition of 'rusage' (from ). */ +@@ -236,6 +238,9 @@ class Linux { + /** The maximum size of a filesystem path. */ + static const size_t LINUX_PATH_MAX = 4096; + ++ /** Lookup symbol value from table in elf file. */ ++ bool lookupSymbolValue(const std::string symbol, uint64_t& value); ++ + private: + /** Resturn correct Dirfd depending on given pathname abd dirfd given to + * syscall. */ +diff --git a/src/include/simeng/kernel/LinuxProcess.hh b/src/include/simeng/kernel/LinuxProcess.hh +index 9796b529..d6b2c4a9 100644 +--- a/src/include/simeng/kernel/LinuxProcess.hh ++++ b/src/include/simeng/kernel/LinuxProcess.hh +@@ -77,6 +77,9 @@ class LinuxProcess { + /** Check whether the process image was created successfully. */ + bool isValid() const; + ++ /** Lookup symbol value from table in elf file. */ ++ bool lookupSymbolValue(const std::string symbol, uint64_t& value) const; ++ + private: + /** The size of the stack, in bytes. */ + const uint64_t STACK_SIZE; +@@ -113,6 +116,8 @@ class LinuxProcess { + + /** Shared pointer to processImage. */ + std::shared_ptr processImage_; ++ ++ std::unordered_map symbols_; + }; + + } // namespace kernel +diff --git a/src/include/simeng/models/emulation/Core.hh b/src/include/simeng/models/emulation/Core.hh +index c4a4acc4..1db10d23 100644 +--- a/src/include/simeng/models/emulation/Core.hh ++++ b/src/include/simeng/models/emulation/Core.hh +@@ -108,6 +108,9 @@ class Core : public simeng::Core { + + /** The number of branches executed. */ + uint64_t branchesExecuted_ = 0; ++ ++ /** Set to interruptId when interrupt occurs, otherwise -1 */ ++ int16_t interruptId_; + }; + + } // namespace emulation +diff --git a/src/include/simeng/models/mcu/Core.hh b/src/include/simeng/models/mcu/Core.hh +new file mode 100644 +index 00000000..de6a53d3 +--- /dev/null ++++ b/src/include/simeng/models/mcu/Core.hh +@@ -0,0 +1,181 @@ ++#pragma once ++ ++#include ++ ++#include "simeng/ArchitecturalRegisterFileSet.hh" ++#include "simeng/Core.hh" ++#include "simeng/FlatMemoryInterface.hh" ++#include "simeng/pipeline_hi/DecodeUnit.hh" ++#include "simeng/pipeline_hi/ExecuteUnit.hh" ++#include "simeng/pipeline_hi/FetchUnit.hh" ++#include "simeng/pipeline_hi/WritebackUnit.hh" ++#include "simeng/pipeline_hi/StaticPredictor.hh" ++#include "simeng/pipeline_hi/LoadStoreQueue.hh" ++#include "simeng/pipeline_hi/RegDepMap.hh" ++ ++#include "simeng/arch/riscv/Architecture.hh" ++ ++namespace simeng { ++namespace models { ++namespace mcu { ++ ++/** An entry in the reservation station. */ ++struct dependencyEntry1 { ++ /** The instruction to execute. */ ++ std::shared_ptr uop; ++ ++ /** The operand waiting on a value. */ ++ uint16_t operandIndex; ++}; ++ ++/** A simple scalar in-order pipelined core model. */ ++class Core : public simeng::Core { ++ public: ++ /** Construct a core model, providing an ISA and branch predictor to use, ++ * along with a pointer and size of instruction memory, and a pointer to ++ * process memory. */ ++ Core(MemoryInterface& instructionMemory, MemoryInterface& dataMemory, ++ uint64_t processMemorySize, uint64_t entryPoint, ++ const arch::Architecture& isa, BranchPredictor& branchPredictor, YAML::Node config); ++ ++ /** Tick the core. Ticks each of the pipeline stages sequentially, then ticks ++ * the buffers between them. Checks for and executes pipeline flushes at the ++ * end of each cycle. */ ++ void tick() override; ++ ++ /** Check whether the program has halted. */ ++ bool hasHalted() const override; ++ ++ /** Retrieve the architectural register file set. */ ++ const ArchitecturalRegisterFileSet& getArchitecturalRegisterFileSet() ++ const override; ++ ++ /** Retrieve the number of instructions retired. */ ++ uint64_t getInstructionsRetiredCount() const override; ++ ++ /** Retrieve the simulated nanoseconds elapsed since the core started. */ ++ uint64_t getSystemTimer() const override; ++ ++ /** Generate a map of statistics to report. */ ++ std::map getStats() const override; ++ ++ private: ++ /** Raise an exception to the core, providing the generating instruction. */ ++ void raiseException(const std::shared_ptr& instruction); ++ ++ /** Handle an exception raised during the cycle. */ ++ void handleException(); ++ ++ /** Load and supply memory data requested by an instruction. */ ++ void loadData(const std::shared_ptr& instruction); ++ /** Store data supplied by an instruction to memory. */ ++ void storeData(const std::shared_ptr& instruction); ++ ++ /** Forward operands to the most recently decoded instruction. */ ++ void forwardOperands(const span& destinations, ++ const span& values); ++ ++ bool canIssue(const std::shared_ptr& instruction); ++ void removeDep(const std::shared_ptr& instruction); ++ ++ /** Read pending registers for the most recently decoded instruction. */ ++ void readRegisters(); ++ ++ /** Process the active exception handler. */ ++ void processExceptionHandler(); ++ ++ /** Apply changes to the process state. */ ++ void applyStateChange(const arch::ProcessStateChange& change); ++ ++ /** Handle requesting/execution of a load instruction. */ ++ void handleLoad(const std::shared_ptr& instruction); ++ ++ void addInstrOrderQ(const std::shared_ptr& instruction); ++ bool removeInstrOrderQ(const std::shared_ptr& instruction); ++ ++ /** The process memory. */ ++ MemoryInterface& dataMemory_; ++ ++ /** A reference to the core's architecture. */ ++ const arch::Architecture& isa_; ++ ++ /** The core's register file set. */ ++ RegisterFileSet registerFileSet_; ++ ++ /** An architectural register file set, serving as a simple wrapper around the ++ * register file set. */ ++ ArchitecturalRegisterFileSet architecturalRegisterFileSet_; ++ ++ /** The process memory. */ ++ span processMemory; ++ ++ /** The buffer between fetch and decode. */ ++ pipeline_hi::PipelineBuffer fetchToDecodeBuffer_; ++ ++ /** The buffer between decode and execute. */ ++ pipeline_hi::PipelineBuffer> decodeToExecuteBuffer_; ++ ++ /** The buffer between execute and writeback. */ ++ std::vector>> ++ completionSlots_; ++ ++ /** The previously generated addresses. */ ++ std::queue previousAddresses_; ++ ++ /** The register dependency map. */ ++ pipeline_hi::RegDepMap regDepMap_; ++ ++ /** The fetch unit; fetches instructions from memory. */ ++ pipeline_hi::FetchUnit fetchUnit_; ++ ++ /** The decode unit; decodes instructions into uops and reads operands. */ ++ pipeline_hi::DecodeUnit decodeUnit_; ++ ++ /** The execute unit; executes uops and sends to writeback, also forwarding ++ * results. */ ++ pipeline_hi::ExecuteUnit executeUnit_; ++ ++ /** The writeback unit; writes uop results to the register files. */ ++ pipeline_hi::WritebackUnit writebackUnit_; ++ ++ pipeline_hi::LoadStoreQueue loadStoreQueue_; ++ ++ /** The number of times the pipeline has been flushed. */ ++ uint64_t flushes_ = 0; ++ ++ /** The number of times this core has been ticked. */ ++ uint64_t ticks_ = 0; ++ ++ uint64_t lastCommitTick_ = 0; ++ ++ /** Whether an exception was generated during the cycle. */ ++ bool exceptionGenerated_ = false; ++ ++ /** A pointer to the instruction responsible for generating the exception. */ ++ std::shared_ptr exceptionGeneratingInstruction_; ++ ++ /** Whether the core has halted. */ ++ bool hasHalted_ = false; ++ ++ /** The active exception handler. */ ++ std::shared_ptr exceptionHandler_; ++ ++ std::deque> inorderIQ_; ++ ++ void checkHalting(); ++ bool enableHaltCheck = false; ++ uint64_t maxStallCycleTimeout; ++ uint64_t maxSimCycleTimeout; ++ uint64_t maxInstrTimeout; ++ ++ /** Set to interruptId when interrupt occurs, otherwise -1 */ ++ int16_t interruptId_; ++ ++ /** Return interrupt id of the pending interrupt*/ ++ int16_t isInterruptPending(); ++ ++}; ++ ++} // namespace mcu ++} // namespace models ++} // namespace simeng +diff --git a/src/include/simeng/pipeline_hi/DecodeUnit.hh b/src/include/simeng/pipeline_hi/DecodeUnit.hh +new file mode 100644 +index 00000000..728dff88 +--- /dev/null ++++ b/src/include/simeng/pipeline_hi/DecodeUnit.hh +@@ -0,0 +1,66 @@ ++#pragma once ++ ++#include ++#include ++ ++#include "simeng/arch/Architecture.hh" ++#include "simeng/pipeline_hi/PipelineBuffer.hh" ++ ++namespace simeng { ++namespace pipeline_hi { ++ ++/** A decode unit for a pipelined processor. Splits pre-decoded macro-ops into ++ * uops. */ ++class DecodeUnit { ++ public: ++ /** Constructs a decode unit with references to input/output buffers and the ++ * current branch predictor. */ ++ DecodeUnit(PipelineBuffer& input, ++ PipelineBuffer>& output, ++ BranchPredictor& predictor, ++ std::function&)> canIssue); ++ ++ /** Ticks the decode unit. Breaks macro-ops into uops, and performs early ++ * branch misprediction checks. */ ++ void tick(); ++ ++ /** Check whether the core should be flushed this cycle. */ ++ bool shouldFlush() const; ++ ++ /** Retrieve the target instruction address associated with the most recently ++ * discovered misprediction. */ ++ uint64_t getFlushAddress() const; ++ ++ /** Retrieve the number of times that the decode unit requested a flush due to ++ * discovering a branch misprediction early. */ ++ uint64_t getEarlyFlushes() const; ++ ++ /** Clear the microOps_ queue. */ ++ void purgeFlushed(); ++ ++ private: ++ /** A buffer of macro-ops to split into uops. */ ++ PipelineBuffer& input_; ++ /** An internal buffer for storing one or more uops. */ ++ std::deque> microOps_; ++ /** A buffer for writing decoded uops into. */ ++ PipelineBuffer>& output_; ++ ++ /** A reference to the current branch predictor. */ ++ BranchPredictor& predictor_; ++ ++ /** Whether the core should be flushed after this cycle. */ ++ bool shouldFlush_; ++ ++ /** The target instruction address the PC should be updated to upon flush. */ ++ uint64_t pc_; ++ ++ /** The number of times that the decode unit requested a flush due to ++ * discovering a branch misprediction early. */ ++ uint64_t earlyFlushes_ = 0; ++ ++ std::function&)> canIssue_; ++}; ++ ++} // namespace pipeline_hi ++} // namespace simeng +diff --git a/src/include/simeng/pipeline_hi/DispatchIssueUnit.hh b/src/include/simeng/pipeline_hi/DispatchIssueUnit.hh +new file mode 100644 +index 00000000..132358fd +--- /dev/null ++++ b/src/include/simeng/pipeline_hi/DispatchIssueUnit.hh +@@ -0,0 +1,150 @@ ++#pragma once ++ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include "simeng/Instruction.hh" ++#include "simeng/pipeline_hi/PipelineBuffer.hh" ++#include "simeng/pipeline_hi/PortAllocator.hh" ++#include "yaml-cpp/yaml.h" ++ ++namespace simeng { ++namespace pipeline_hi { ++ ++/** A reservation station issue port */ ++struct ReservationStationPort { ++ /** Issue port this port maps to */ ++ uint16_t issuePort; ++ /** Queue of instructions that are ready to be ++ * issued */ ++ std::deque> ready; ++}; ++ ++/** A reservation station */ ++struct ReservationStation { ++ /** Size of reservation station */ ++ uint16_t capacity; ++ /** Number of instructions that can be dispatched to this unit per cycle. */ ++ uint16_t dispatchRate; ++ /** Current number of non-stalled instructions ++ * in reservation station */ ++ uint16_t currentSize; ++ /** Issue ports belonging to reservation station */ ++ std::vector ports; ++}; ++ ++/** An entry in the reservation station. */ ++struct dependencyEntry { ++ /** The instruction to execute. */ ++ std::shared_ptr uop; ++ /** The port to issue to. */ ++ uint16_t port; ++ /** The operand waiting on a value. */ ++ uint16_t operandIndex; ++}; ++ ++/** A dispatch/issue unit for an out-of-order pipelined processor. Reads ++ * instruction operand and performs scoreboarding. Issues instructions to the ++ * execution unit once ready. */ ++class DispatchIssueUnit { ++ public: ++ /** Construct a dispatch/issue unit with references to input/output buffers, ++ * the register file, the port allocator, and a description of the number of ++ * physical registers the scoreboard needs to reflect. */ ++ DispatchIssueUnit( ++ PipelineBuffer>& fromRename, ++ std::vector>>& issuePorts, ++ const RegisterFileSet& registerFileSet, PortAllocator& portAllocator, ++ const std::vector& physicalRegisterStructure, ++ YAML::Node config); ++ ++ /** Ticks the dispatch/issue unit. Reads available input operands for ++ * instructions and sets scoreboard flags for destination registers. */ ++ void tick(); ++ ++ /** Identify the oldest ready instruction in the reservation station and issue ++ * it. */ ++ void issue(); ++ ++ /** Forwards operands and performs register reads for the currently queued ++ * instruction. */ ++ void forwardOperands(const span& destinations, ++ const span& values); ++ ++ /** Set the scoreboard entry for the provided register as ready. */ ++ void setRegisterReady(Register reg); ++ ++ /** Clear the RS of all flushed instructions. */ ++ void purgeFlushed(); ++ ++ /** Retrieve the number of cycles this unit stalled due to insufficient RS ++ * space. */ ++ uint64_t getRSStalls() const; ++ ++ /** Retrieve the number of cycles no instructions were issued due to an empty ++ * RS. */ ++ uint64_t getFrontendStalls() const; ++ ++ /** Retrieve the number of cycles no instructions were issued due to ++ * dependencies or a lack of available ports. */ ++ uint64_t getBackendStalls() const; ++ ++ /** Retrieve the number of times an instruction was unable to issue due to a ++ * busy port. */ ++ uint64_t getPortBusyStalls() const; ++ ++ /** Retrieve the current sizes and capacities of the reservation stations*/ ++ void getRSSizes(std::vector&) const; ++ ++ private: ++ /** A buffer of instructions to dispatch and read operands for. */ ++ PipelineBuffer>& input_; ++ ++ /** Ports to the execution units, for writing ready instructions to. */ ++ std::vector>>& issuePorts_; ++ ++ /** A reference to the physical register file set. */ ++ const RegisterFileSet& registerFileSet_; ++ ++ /** The register availability scoreboard. */ ++ std::vector> scoreboard_; ++ ++ /** Reservation stations */ ++ std::vector reservationStations_; ++ ++ /** A mapping from port to RS port */ ++ std::vector> portMapping_; ++ ++ /** A dependency matrix, containing all the instructions waiting on an ++ * operand. For a register `{type,tag}`, the vector of dependents may be found ++ * at `dependencyMatrix[type][tag]`. */ ++ std::vector>> dependencyMatrix_; ++ ++ /** A map to collect flushed instructions for each reservation station. */ ++ std::unordered_map>> ++ flushed_; ++ ++ /** A reference to the execution port allocator. */ ++ PortAllocator& portAllocator_; ++ ++ /** The number of cycles stalled due to a full reservation station. */ ++ uint64_t rsStalls_ = 0; ++ ++ /** The number of cycles no instructions were issued due to an empty RS. */ ++ uint64_t frontendStalls_ = 0; ++ ++ /** The number of cycles no instructions were issued due to dependencies or a ++ * lack of available ports. */ ++ uint64_t backendStalls_ = 0; ++ ++ /** The number of times an instruction was unable to issue due to a busy port. ++ */ ++ uint64_t portBusyStalls_ = 0; ++}; ++ ++} // namespace pipeline_hi ++} // namespace simeng +diff --git a/src/include/simeng/pipeline_hi/ExecuteUnit.hh b/src/include/simeng/pipeline_hi/ExecuteUnit.hh +new file mode 100644 +index 00000000..da51db34 +--- /dev/null ++++ b/src/include/simeng/pipeline_hi/ExecuteUnit.hh +@@ -0,0 +1,147 @@ ++#pragma once ++ ++#include ++#include ++ ++#include "simeng/BranchPredictor.hh" ++#include "simeng/Instruction.hh" ++#include "simeng/pipeline_hi/PipelineBuffer.hh" ++ ++namespace simeng { ++namespace pipeline_hi { ++ ++/** An execution unit pipeline entry, containing an instruction, and an ++ * indication of when it's reached the front of the execution pipeline. */ ++struct ExecutionUnitPipelineEntry { ++ /** The instruction queued for execution. */ ++ std::shared_ptr insn; ++ /** The tick number this instruction will reach the front of the queue at. */ ++ uint64_t readyAt; ++}; ++ ++/** An execute unit for a pipelined processor. Executes instructions and ++ * forwards results. */ ++class ExecuteUnit { ++ public: ++ /** Constructs an execute unit with references to an input and output buffer, ++ * the currently used branch predictor, and handlers for forwarding operands, ++ * loads/stores, and exceptions. */ ++ ExecuteUnit( ++ PipelineBuffer>& input, ++ PipelineBuffer>& output, ++ std::function, span)> forwardOperands, ++ std::function&)> handleLoad, ++ std::function&)> handleStore, ++ std::function&)> raiseException, ++ std::function&)> addInstrOrderQ, ++ std::function isInterruptPending, ++ BranchPredictor& predictor, bool pipelined = true, ++ const std::vector& blockingGroups = {}); ++ ++ /** Tick the execute unit. Places incoming instructions into the pipeline and ++ * executes an instruction that has reached the head of the pipeline, if ++ * present. */ ++ void tick(); ++ ++ /** Query whether a branch misprediction was discovered this cycle. */ ++ bool shouldFlush() const; ++ ++ /** Retrieve the target instruction address associated with the most recently ++ * discovered misprediction. */ ++ uint64_t getFlushAddress() const; ++ ++ /** Retrieve the sequence ID associated with the most recently discovered ++ * misprediction. */ ++ uint64_t getFlushSeqId() const; ++ ++ /** Purge flushed instructions from the internal pipeline and clear any active ++ * stall, if applicable. */ ++ void purgeFlushed(); ++ ++ /** Retrieve the number of branch instructions that have been executed. */ ++ uint64_t getBranchExecutedCount() const; ++ ++ /** Retrieve the number of branch mispredictions. */ ++ uint64_t getBranchMispredictedCount() const; ++ ++ /** Retrieve the number of active execution cycles. */ ++ uint64_t getCycles() const; ++ ++ private: ++ /** Execute the supplied uop, write it into the output buffer, and forward ++ * results back to dispatch/issue. */ ++ void execute(std::shared_ptr& uop); ++ ++ /** A buffer of instructions to execute. */ ++ PipelineBuffer>& input_; ++ ++ /** A buffer for writing executed instructions into. */ ++ PipelineBuffer>& output_; ++ ++ /** A function handle called when forwarding operands. */ ++ std::function, span)> forwardOperands_; ++ ++ /** A function handle called after generating the addresses for a load. */ ++ std::function&)> handleLoad_; ++ /** A function handle called after acquiring the data for a store. */ ++ std::function&)> handleStore_; ++ ++ /** A function handle called upon exception generation. */ ++ std::function&)> raiseException_; ++ ++ /** A function to add the executed instruction into an ordering queue. */ ++ std::function&)> addInstrOrderQ_; ++ ++ /** Check if any interrupts are pending */ ++ std::function isInterruptPending_; ++ ++ /** A reference to the branch predictor, for updating with prediction results. ++ */ ++ BranchPredictor& predictor_; ++ ++ /** Whether this unit is pipelined, or if all instructions should stall until ++ * complete. */ ++ bool pipelined_; ++ ++ /** The execution unit's internal pipeline, holding instructions until their ++ * execution latency has expired and they are ready for their final results to ++ * be calculated and forwarded. */ ++ std::deque pipeline_; ++ ++ /** A group of operation types that are blocked whilst a similar operation ++ * is being executed. */ ++ std::vector blockingGroups_; ++ ++ /** A queue to hold blocked instructions of a similar group type to ++ * blockingGroup_. */ ++ std::deque> operationsStalled_; ++ ++ /** Whether the core should be flushed after this cycle. */ ++ bool shouldFlush_ = false; ++ ++ /** The target instruction address the PC should be reset to after this cycle. ++ */ ++ uint64_t pc_; ++ ++ /** The sequence ID of the youngest instruction that should remain after the ++ * current flush. */ ++ uint64_t flushAfter_; ++ ++ /** The number of times this unit has been ticked. */ ++ uint64_t tickCounter_ = 0; ++ ++ /** The cycle this unit will become unstalled. */ ++ uint64_t stallUntil_ = 0; ++ ++ /** The number of branch instructions that were executed. */ ++ uint64_t branchesExecuted_ = 0; ++ ++ /** The number of branch mispredictions that were observed. */ ++ uint64_t branchMispredicts_ = 0; ++ ++ /** The number of active execution cycles that were observed. */ ++ uint64_t cycles_ = 0; ++}; ++ ++} // namespace pipeline_hi ++} // namespace simeng +diff --git a/src/include/simeng/pipeline_hi/FetchUnit.hh b/src/include/simeng/pipeline_hi/FetchUnit.hh +new file mode 100644 +index 00000000..1c8f40c2 +--- /dev/null ++++ b/src/include/simeng/pipeline_hi/FetchUnit.hh +@@ -0,0 +1,127 @@ ++#pragma once ++ ++#include ++ ++#include "simeng/MemoryInterface.hh" ++#include "simeng/arch/Architecture.hh" ++#include "simeng/pipeline_hi/PipelineBuffer.hh" ++ ++namespace simeng { ++namespace pipeline_hi { ++ ++/** The various states of the loop buffer. */ ++enum class LoopBufferState { ++ IDLE = 0, // No operations ++ WAITING, // Waiting to find boundary instruction in fetch stream ++ FILLING, // Filling loop buffer with loop body ++ SUPPLYING // Feeding loop buffer content to output buffer ++}; ++ ++// Struct to hold information about a fetched instruction ++struct loopBufferEntry { ++ // Encoding of the instruction ++ const uint64_t encoding; ++ ++ // Size of the instruction ++ const uint16_t instructionSize; ++ ++ // PC of the instruction ++ const uint64_t address; ++ ++ // Branch prediction made for instruction ++ const BranchPrediction prediction; ++}; ++ ++/** A fetch and pre-decode unit for a pipelined processor. Responsible for ++ * reading instruction memory and maintaining the program counter. */ ++class FetchUnit { ++ public: ++ /** Construct a fetch unit with a reference to an output buffer, the ISA, and ++ * the current branch predictor, and information on the instruction memory. */ ++ FetchUnit(PipelineBuffer& output, MemoryInterface& instructionMemory, ++ uint64_t programByteLength, uint64_t entryPoint, uint8_t blockSize, ++ const arch::Architecture& isa, BranchPredictor& branchPredictor); ++ ++ ~FetchUnit(); ++ ++ /** Tick the fetch unit. Retrieves and pre-decodes the instruction at the ++ * current program counter. */ ++ void tick(); ++ ++ /** Function handle to retrieve branch that represents loop boundary. */ ++ void registerLoopBoundary(uint64_t branchAddress); ++ ++ /** Check whether the program has ended. Returns `true` if the current PC is ++ * outside of instruction memory. */ ++ bool hasHalted() const; ++ ++ /** Update the program counter to the specified address. */ ++ void updatePC(uint64_t address); ++ ++ /** Request instructions at the current program counter for a future cycle. */ ++ void requestFromPC(); ++ ++ /** Retrieve the number of cycles fetch terminated early due to a predicted ++ * branch. */ ++ uint64_t getBranchStalls() const; ++ ++ /** Clear the loop buffer. */ ++ void flushLoopBuffer(); ++ ++ /** */ ++ void flushPredictor(uint64_t address); ++ ++ private: ++ /** An output buffer connecting this unit to the decode unit. */ ++ PipelineBuffer& output_; ++ ++ /** The current program counter. */ ++ uint64_t pc_ = 0; ++ ++ /** An interface to the instruction memory. */ ++ MemoryInterface& instructionMemory_; ++ ++ /** The length of the available instruction memory. */ ++ uint64_t programByteLength_; ++ ++ /** Reference to the currently used ISA. */ ++ const arch::Architecture& isa_; ++ ++ /** Reference to the current branch predictor. */ ++ BranchPredictor& branchPredictor_; ++ ++ /** A loop buffer to supply a detected loop instruction stream. */ ++ std::deque loopBuffer_; ++ ++ /** State of the loop buffer. */ ++ LoopBufferState loopBufferState_ = LoopBufferState::IDLE; ++ ++ /** The branch instruction that forms the loop. */ ++ uint64_t loopBoundaryAddress_ = 0; ++ ++ /** The current program halt state. Set to `true` when the PC leaves the ++ * instruction memory region, and set back to `false` if the PC is returned to ++ * the instruction region. */ ++ bool hasHalted_ = false; ++ ++ bool waitSCEval_ = false; ++ ++ /** The number of cycles fetch terminated early due to a predicted branch. */ ++ uint64_t branchStalls_ = 0; ++ ++ /** The size of a fetch block, in bytes. */ ++ uint8_t blockSize_; ++ ++ /** A mask of the bits of the program counter to use for obtaining the block ++ * address to fetch. */ ++ uint64_t blockMask_; ++ ++ /** The buffer used to hold fetched instruction data. */ ++ uint8_t* fetchBuffer_; ++ ++ /** The amount of data currently in the fetch buffer. */ ++ uint8_t bufferedBytes_ = 0; ++}; ++ ++} // namespace pipeline_hi ++} // namespace simeng +diff --git a/src/include/simeng/pipeline_hi/LoadStoreQueue.hh b/src/include/simeng/pipeline_hi/LoadStoreQueue.hh +new file mode 100644 +index 00000000..211b1ef7 +--- /dev/null ++++ b/src/include/simeng/pipeline_hi/LoadStoreQueue.hh +@@ -0,0 +1,235 @@ ++#pragma once ++ ++#include ++#include ++#include ++#include ++#include ++ ++#include "simeng/Instruction.hh" ++#include "simeng/MemoryInterface.hh" ++#include "simeng/pipeline_hi/PipelineBuffer.hh" ++ ++namespace simeng { ++namespace pipeline_hi { ++ ++/** The memory access types which are processed. */ ++enum accessType { LOAD = 0, STORE }; ++ ++/** A requestQueue_ entry. */ ++struct requestEntry { ++ /** The memory address(es) to be accessed. */ ++ std::queue reqAddresses; ++ /** The instruction sending the request(s). */ ++ std::shared_ptr insn; ++}; ++/** A requestQueue_ entry. */ ++struct requestEntry1 { ++ /** The memory address(es) to be accessed. */ ++ std::queue reqAddresses; ++ /** The memory address(es) to be accessed. */ ++ std::queue data; ++ /** The instruction sending the request(s). */ ++ std::shared_ptr insn; ++ accessType type; ++ uint64_t reqtick; ++ bool isMisAligned; ++}; ++/** A load store queue (known as "load/store buffers" or "memory order buffer"). ++ * Holds in-flight memory access requests to ensure load/store consistency. */ ++class LoadStoreQueue { ++ public: ++ /** Constructs a combined load/store queue model, simulating a shared queue ++ * for both load and store instructions, supplying completion slots for loads ++ * and an operand forwarding handler. */ ++ LoadStoreQueue( ++ unsigned int maxCombinedSpace, MemoryInterface& memory, ++ span>> completionSlots, ++ std::function, span)> forwardOperands, ++ bool exclusive = false, uint16_t loadBandwidth = UINT16_MAX, ++ uint16_t storeBandwidth = UINT16_MAX, ++ uint16_t permittedRequests = UINT16_MAX, ++ uint16_t permittedLoads = UINT16_MAX, ++ uint16_t permittedStores = UINT16_MAX); ++ ++ /** Constructs a split load/store queue model, simulating discrete queues for ++ * load and store instructions, supplying completion slots for loads and an ++ * operand forwarding handler. */ ++ LoadStoreQueue( ++ unsigned int maxLoadQueueSpace, unsigned int maxStoreQueueSpace, ++ MemoryInterface& memory, ++ span>> completionSlots, ++ std::function, span)> forwardOperands, ++ bool exclusive = false, uint16_t loadBandwidth = UINT16_MAX, ++ uint16_t storeBandwidth = UINT16_MAX, ++ uint16_t permittedRequests = UINT16_MAX, ++ uint16_t permittedLoads = UINT16_MAX, ++ uint16_t permittedStores = UINT16_MAX); ++ ++ /** Retrieve the available space for load uops. For combined queue this is the ++ * total remaining space. */ ++ unsigned int getLoadQueueSpace() const; ++ ++ /** Retrieve the available space for store uops. For a combined queue this is ++ * the total remaining space. */ ++ unsigned int getStoreQueueSpace() const; ++ ++ /** Retrieve the available space for any memory uops. For a split queue this ++ * is the sum of the space in both queues. */ ++ unsigned int getTotalSpace() const; ++ ++ /** Add a load uop to the queue. */ ++ void addLoad(const std::shared_ptr& insn); ++ ++ /** Add a store uop to the queue. */ ++ void addStore(const std::shared_ptr& insn); ++ ++ /** Add the load instruction's memory requests to the requestQueue_. */ ++ void startLoad(const std::shared_ptr& insn); ++ ++ /** Supply the data to be stored by a store operation. */ ++ void supplyStoreData(const std::shared_ptr& insn); ++ ++ /** Commit and write the oldest store instruction to memory, removing it from ++ * the store queue. Returns `true` if memory disambiguation has discovered a ++ * memory order violation during the commit. */ ++ bool commitStore(const std::shared_ptr& uop); ++ ++ /** Remove the oldest load instruction from the load queue. */ ++ void commitLoad(const std::shared_ptr& uop); ++ ++ /** Remove all flushed instructions from the queues. */ ++ void purgeFlushed(); ++ ++ /** Whether this is a combined load/store queue. */ ++ bool isCombined() const; ++ ++ /** Process received load data and send any completed loads for writeback. */ ++ void tick(); ++ ++ /** Retrieve the load instruction associated with the most recently discovered ++ * memory order violation. */ ++ std::shared_ptr getViolatingLoad() const; ++ ++ void processResponse(); ++ ++ bool activeMisAlignedOpr() const; ++ ++ bool isBusy() const; ++ ++ float getAvgLdLat() const { return (totalLdLatency)/numLoads; }; ++ ++ uint32_t getMaxLdLat() const { return maxLdLatency; }; ++ uint32_t getMinLdLat() const { return minLdLatency; }; ++ ++ private: ++ /** The load queue: holds in-flight load instructions. */ ++ std::deque> loadQueue_; ++ ++ /** The store queue: holds in-flight store instructions with its associated ++ * data. */ ++ std::deque, ++ span>> ++ storeQueue_; ++ ++ /** Slots to write completed load instructions into for writeback. */ ++ span>> completionSlots_; ++ ++ /** Map of loads that have requested their data, keyed by sequence ID. */ ++ std::unordered_map> requestedLoads_; ++ ++ /** Map of loads that have requested their data, keyed by sequence ID. */ ++ std::unordered_map latencyLoads_; ++ ++ /** A function handler to call to forward the results of a completed load. */ ++ std::function, span)> forwardOperands_; ++ ++ /** The maximum number of loads that can be in-flight. Undefined if this ++ * is a combined queue. */ ++ unsigned int maxLoadQueueSpace_; ++ ++ /** The maximum number of stores that can be in-flight. Undefined if this is a ++ * combined queue. */ ++ unsigned int maxStoreQueueSpace_; ++ ++ /** The maximum number of memory ops that can be in-flight. Undefined if this ++ * is a split queue. */ ++ unsigned int maxCombinedSpace_; ++ ++ /** Whether this queue is combined or split. */ ++ bool combined_; ++ ++ /** Retrieve the load queue space for a split queue. */ ++ unsigned int getLoadQueueSplitSpace() const; ++ ++ /** Retrieve the store queue space for a split queue. */ ++ unsigned int getStoreQueueSplitSpace() const; ++ ++ /** Retrieve the total memory uop space available for a combined queue. */ ++ unsigned int getCombinedSpace() const; ++ ++ /** A pointer to process memory. */ ++ MemoryInterface& memory_; ++ ++ /** The load instruction associated with the most recently discovered memory ++ * order violation. */ ++ std::shared_ptr violatingLoad_ = nullptr; ++ ++ /** The number of times this unit has been ticked. */ ++ uint64_t tickCounter_ = 0; ++ ++ /** A map to hold load instructions that are stalled due to a detected ++ * memory reordering confliction. First key is a store's sequence id and the ++ * second key the conflicting address. The value takes the form of a vector of ++ * pairs containing a pointer to the conflicted load and the size of the data ++ * needed at that address by the load. */ ++ std::unordered_map< ++ uint64_t, ++ std::unordered_map< ++ uint64_t, ++ std::vector, uint16_t>>>> ++ conflictionMap_; ++ ++ /** A map between LSQ cycles and load requests ready on that cycle. */ ++ std::map> requestLoadQueue_; ++ ++ /** A map between LSQ cycles and store requests ready on that cycle. */ ++ std::map> requestStoreQueue_; ++ ++ /** A queue of completed loads ready for writeback. */ ++ std::queue> completedLoads_; ++ ++ /** Whether the LSQ can only process loads xor stores within a cycle. */ ++ bool exclusive_; ++ ++ /** The amount of data readable from the L1D cache per cycle. */ ++ uint16_t loadBandwidth_; ++ ++ /** The amount of data writable to the L1D cache per cycle. */ ++ uint16_t storeBandwidth_; ++ ++ /** The combined limit of loads and store requests permitted per cycle. */ ++ uint16_t totalLimit_; ++ ++ /** The number of loads and stores permitted per cycle. */ ++ std::array reqLimits_; ++ ++ /** A map between LSQ cycles and load or store requests ready on that cycle. */ ++ std::deque requestQueue_; ++ ++ /* Identifier for request to memory*/ ++ uint8_t busReqId = 0; ++ ++ //bool activeMisAlignedStore = false; ++ ++ //Stats ++ uint64_t numLoads = 0; ++ double totalLdLatency = 0; ++ uint32_t maxLdLatency = 0; ++ uint32_t minLdLatency = 0xFFFF; ++ float averageAccessLdLatency = 0.0; ++}; ++ ++ ++} // namespace pipeline_hi ++} // namespace simeng +diff --git a/src/include/simeng/pipeline_hi/PipelineBuffer.hh b/src/include/simeng/pipeline_hi/PipelineBuffer.hh +new file mode 100644 +index 00000000..dd2ed70c +--- /dev/null ++++ b/src/include/simeng/pipeline_hi/PipelineBuffer.hh +@@ -0,0 +1,107 @@ ++#pragma once ++ ++#include ++#include ++#include ++ ++namespace simeng { ++namespace pipeline_hi { ++ ++/** A tickable pipelined buffer. Values are shifted from the tail slot to the ++ * head slot each time `tick()` is called. */ ++template ++class PipelineBuffer { ++ public: ++ /** Construct a pipeline buffer of width `width`, and fill all slots with ++ * `initialValue`. */ ++ PipelineBuffer(int width, const T& initialValue) ++ : width(width), buffer(width * defaultLength_, initialValue), ++ length_(defaultLength_), headIndex_(defaultLength_-1), ++ tailIndex_(0) {} ++ ++ PipelineBuffer(int width, const T& initialValue, int length) ++ : width(width), buffer(width * length, initialValue), length_(length), ++ headIndex_(length_-1), tailIndex_(0) { ++ assert(length_ != 0 && "Pipeline buffer length cannot be 0"); ++ } ++ ++ /** Tick the buffer and move head/tail pointers, or do nothing if it's ++ * stalled. */ ++ void tick() { ++ if (isStalled_) return; ++ ++ //length ==1 shortcut? condition check cost ++ ++ if (headIndex_) { // when headIndex != 0 ++ headIndex_--; ++ } else { ++ headIndex_ = length_ - 1; ++ } ++ if (tailIndex_) { // when tailIndex != 0 ++ tailIndex_--; ++ } else { ++ tailIndex_ = length_ - 1; ++ } ++ } ++ ++ /** Get a tail slots pointer. */ ++ T* getTailSlots() { ++ T* ptr = buffer.data(); ++ return &ptr[tailIndex_ * width]; ++ } ++ ++ /** Get a const tail slots pointer. */ ++ const T* getTailSlots() const { ++ const T* ptr = buffer.data(); ++ return &ptr[tailIndex_ * width]; ++ } ++ ++ /** Get a head slots pointer. */ ++ T* getHeadSlots() { ++ T* ptr = buffer.data(); ++ return &ptr[headIndex_ * width]; ++ } ++ ++ /** Get a const head slots pointer. */ ++ const T* getHeadSlots() const { ++ const T* ptr = buffer.data(); ++ return &ptr[headIndex_ * width]; ++ } ++ ++ /** Check if the buffer is stalled. */ ++ bool isStalled() const { return isStalled_; } ++ ++ /** Set the buffer's stall flag to `stalled`. */ ++ void stall(bool stalled) { isStalled_ = stalled; } ++ ++ /** Fill the buffer with a specified value. */ ++ void fill(const T& value) { std::fill(buffer.begin(), buffer.end(), value); } ++ ++ /** Get the width of the buffer slots. */ ++ unsigned short getWidth() const { return width; } ++ ++ private: ++ /** The width of each row of slots. */ ++ unsigned short width; ++ ++ /** The buffer. */ ++ std::vector buffer; ++ ++ /** Whether the buffer is stalled or not. */ ++ bool isStalled_ = false; ++ ++ /** Buffer length */ ++ const unsigned int length_; ++ ++ /** */ ++ unsigned int headIndex_; ++ ++ /** */ ++ unsigned int tailIndex_; ++ ++ /** The number of stages in the pipeline. */ ++ static const unsigned int defaultLength_ = 2; ++}; ++ ++} // namespace pipeline_hi ++} // namespace simeng +diff --git a/src/include/simeng/pipeline_hi/PipelineBuffer1.hh b/src/include/simeng/pipeline_hi/PipelineBuffer1.hh +new file mode 100644 +index 00000000..dfb465a3 +--- /dev/null ++++ b/src/include/simeng/pipeline_hi/PipelineBuffer1.hh +@@ -0,0 +1,133 @@ ++#pragma once ++ ++#include ++#include ++#include ++ ++namespace simeng { ++namespace pipeline_hi { ++ ++// TODO: Extend to allow specifying the number of cycles it will take for ++// information to move from tail to head (currently fixed at 1 by ++// implementation) ++ ++/** A tickable pipelined buffer. Values are shifted from the tail slot to the ++ * head slot each time `tick()` is called. */ ++template ++class PipelineBuffer { ++ public: ++ /** Construct a pipeline buffer of width `width`, and fill all slots with ++ * `initialValue`. */ ++ PipelineBuffer(int width, const T& initialValue) ++ : width(width), buffer(width * defaultLength_, initialValue), ++ length_(defaultLength_) {} ++ ++ //TODO:currently length > 2 is not working, oscillate between 0 and 1 ++ PipelineBuffer(int width, const T& initialValue, int length) ++ : width(width), buffer(width * length, initialValue), length_(length), ++ useDefaultLength_(false) { ++ assert(length_ != 0 && "Pipeline buffer length cannot be 0"); ++ } ++ ++ /** Tick the buffer and move head/tail pointers, or do nothing if it's ++ * stalled. */ ++ void tick() { ++ if (useDefaultLength_) { ++ if (isStalled_) return; ++ ++ headIsStart = !headIsStart; ++ } else { ++ if (length_ == 1) { ++ return; ++ } else if (length_ > 2) { ++ //TODO ++ } ++ } ++ } ++ ++ /** Get a tail slots pointer. */ ++ T* getTailSlots() { ++ T* ptr = buffer.data(); ++ if (useDefaultLength_) { ++ return &ptr[headIsStart * width]; ++ } else { ++ if (length_ == 1) { ++ return &ptr[0]; ++ } ++ } ++ } ++ ++ /** Get a const tail slots pointer. */ ++ const T* getTailSlots() const { ++ const T* ptr = buffer.data(); ++ if (useDefaultLength_) { ++ return &ptr[headIsStart * width]; ++ } else { ++ if (length_ == 1) { ++ return &ptr[0]; ++ } ++ } ++ } ++ ++ ++ /** Get a head slots pointer. */ ++ T* getHeadSlots() { ++ T* ptr = buffer.data(); ++ if (useDefaultLength_) { ++ return &ptr[!headIsStart * width]; ++ } else { ++ if (length_ == 1) { ++ return &ptr[0]; ++ } ++ } ++ } ++ ++ /** Get a const head slots pointer. */ ++ const T* getHeadSlots() const { ++ const T* ptr = buffer.data(); ++ if (useDefaultLength_) { ++ return &ptr[!headIsStart * width]; ++ } else { ++ if (length_ == 1) { ++ return &ptr[0]; ++ } ++ } ++ } ++ ++ /** Check if the buffer is stalled. */ ++ bool isStalled() const { return isStalled_; } ++ ++ /** Set the buffer's stall flag to `stalled`. */ ++ void stall(bool stalled) { isStalled_ = stalled; } ++ ++ /** Fill the buffer with a specified value. */ ++ void fill(const T& value) { std::fill(buffer.begin(), buffer.end(), value); } ++ ++ /** Get the width of the buffer slots. */ ++ unsigned short getWidth() const { return width; } ++ ++ private: ++ /** The width of each row of slots. */ ++ unsigned short width; ++ ++ /** The buffer. */ ++ std::vector buffer; ++ ++ /** The offset of the head pointer; either 0 or 1. */ ++ bool headIsStart = 0; ++ ++ /** Whether the buffer is stalled or not. */ ++ bool isStalled_ = false; ++ ++ /** Buffer length */ ++ const unsigned int length_; ++ ++ /** True if using default length (== 2) */ ++ bool useDefaultLength_ = true; ++ ++ /** The number of stages in the pipeline. */ ++ static const unsigned int defaultLength_ = 2; ++}; ++ ++} // namespace pipeline_hi ++} // namespace simeng +diff --git a/src/include/simeng/pipeline_hi/PortAllocator.hh b/src/include/simeng/pipeline_hi/PortAllocator.hh +new file mode 100644 +index 00000000..bc985c0a +--- /dev/null ++++ b/src/include/simeng/pipeline_hi/PortAllocator.hh +@@ -0,0 +1,43 @@ ++#pragma once ++ ++#include ++#include ++ ++namespace simeng { ++namespace pipeline_hi { ++ ++namespace PortType { ++/** Instructions have to match the exact group(s) in set. */ ++const uint8_t COMPULSORY = 0; ++/** Instructions can optional match group(s) in set. */ ++const uint8_t OPTIONAL = 1; ++} // namespace PortType ++ ++/** An abstract execution port allocator interface. */ ++class PortAllocator { ++ public: ++ virtual ~PortAllocator(){}; ++ ++ /** Allocate a port for the specified instruction group; returns the allocated ++ * port. */ ++ virtual uint16_t allocate(const std::vector& ports) = 0; ++ ++ /** Inform the allocator that an instruction was issued to the specified port. ++ */ ++ virtual void issued(uint16_t port) = 0; ++ ++ /** Inform the allocator that an instruction will not issue to its ++ * allocated port. */ ++ virtual void deallocate(uint16_t port) = 0; ++ ++ /** Set function from DispatchIssueUnit to retrieve reservation ++ * station sizes during execution. */ ++ virtual void setRSSizeGetter( ++ std::function&)> rsSizes) = 0; ++ ++ /** Tick the port allocator to allow it to process internal tasks. */ ++ virtual void tick() = 0; ++}; ++ ++} // namespace pipeline_hi ++} // namespace simeng +diff --git a/src/include/simeng/pipeline_hi/RegDepMap.hh b/src/include/simeng/pipeline_hi/RegDepMap.hh +new file mode 100644 +index 00000000..7145fd19 +--- /dev/null ++++ b/src/include/simeng/pipeline_hi/RegDepMap.hh +@@ -0,0 +1,57 @@ ++#pragma once ++ ++#include ++#include ++#include ++#include ++ ++#include "simeng/Instruction.hh" ++ ++namespace simeng { ++namespace pipeline_hi { ++ ++typedef std::shared_ptr InstrPtr; ++class RegDepMap ++{ ++ public: ++ RegDepMap(const std::vector registerFileStructures, ++ const RegisterFileSet& registerFileSet); ++ ~RegDepMap(); ++ ++ /** Clear the Entire Map */ ++ void clear(); ++ ++ /** Insert all of a instruction's destination registers into map*/ ++ void insert(InstrPtr instr); ++ ++ /** Remove all of a instruction's destination registers into map*/ ++ void remove(InstrPtr instr); ++ ++ /** Is the current instruction able to read from this ++ * destination register? ++ */ ++ bool canRead(InstrPtr instr); ++ ++ /** Is the current instruction able to write to this ++ * destination register? ++ */ ++ bool canWrite(InstrPtr instr); ++ ++ /* Is there any instr that can forward the data for this instr. If yes, set ++ * the data*/ ++ bool canForward(InstrPtr instr); ++ ++ void purgeFlushed(); ++ ++ void dump(); ++ ++ private: ++ const std::vector registerFileStructures_; ++ const RegisterFileSet& registerFileSet_; ++ typedef std::vector > DepMap; ++ std::vector regMap_; ++ uint32_t outstandingDep_ = 0; ++}; ++ ++} // namespace pipeline_hi ++} // namespace simeng +diff --git a/src/include/simeng/pipeline_hi/RegisterAliasTable.hh b/src/include/simeng/pipeline_hi/RegisterAliasTable.hh +new file mode 100644 +index 00000000..1b2327fc +--- /dev/null ++++ b/src/include/simeng/pipeline_hi/RegisterAliasTable.hh +@@ -0,0 +1,69 @@ ++#pragma once ++ ++#include ++ ++#include "simeng/RegisterFileSet.hh" ++ ++namespace simeng { ++namespace pipeline_hi { ++ ++/** A Register Alias Table (RAT) implementation. Contains information on ++ * the current register renaming state. */ ++class RegisterAliasTable { ++ public: ++ /** Construct a RAT, supplying a description of the architectural register ++ * structure, and the corresponding numbers of physical registers that should ++ * be available. */ ++ RegisterAliasTable(std::vector architecturalStructure, ++ std::vector physicalStructure); ++ ++ /** Retrieve the current physical register assigned to the provided ++ * architectural register. */ ++ Register getMapping(Register architectural) const; ++ ++ /** Determine whether it's possible to allocate `quantity` physical registers ++ * of type `type` this cycle. */ ++ bool canAllocate(uint8_t type, unsigned int quantity) const; ++ ++ /** Check whether registers of type `type` can be renamed by this RAT. */ ++ bool canRename(uint8_t type) const; ++ ++ /** Allocate a physical register for the provided architectural register. */ ++ Register allocate(Register architectural); ++ ++ /** Get the number of free registers available for allocation this cycle. */ ++ unsigned int freeRegistersAvailable(uint8_t type) const; ++ ++ /** Commit the provided physical register. This register now holds the ++ * committed state of the corresponding architectural register, and previous ++ * physical register is freed. */ ++ void commit(Register physical); ++ ++ /** Rewind the allocation of a physical register. The former physical register ++ * is reinstated to the mapping table, and the provided register is freed. */ ++ void rewind(Register physical); ++ ++ /** Free the provided physical register. */ ++ void free(Register physical); ++ ++ private: ++ /** The register mapping tables. Holds a map of architectural -> physical ++ * register mappings for each register type. */ ++ std::vector> mappingTable_; ++ ++ /** The register history tables. Each table holds an entry for each physical ++ * register, recording the physical register formerly assigned to its ++ * architectural register; one table is available per register type. */ ++ std::vector> historyTable_; ++ ++ /** The register destination tables. Holds a map of physical -> architectural ++ * register mappings for each register type. Used for rewind behaviour. */ ++ std::vector> destinationTable_; ++ ++ /** The free register queues. Holds a list of unallocated physical registers ++ * for each register type. */ ++ std::vector> freeQueues_; ++}; ++ ++} // namespace pipeline_hi ++} // namespace simeng +diff --git a/src/include/simeng/pipeline_hi/ReorderBuffer.hh b/src/include/simeng/pipeline_hi/ReorderBuffer.hh +new file mode 100644 +index 00000000..1e5fd840 +--- /dev/null ++++ b/src/include/simeng/pipeline_hi/ReorderBuffer.hh +@@ -0,0 +1,136 @@ ++#pragma once ++ ++#include ++#include ++ ++#include "simeng/Instruction.hh" ++#include "simeng/pipeline_hi/LoadStoreQueue.hh" ++#include "simeng/pipeline_hi/RegisterAliasTable.hh" ++ ++namespace simeng { ++namespace pipeline_hi { ++ ++/** A branch prediction outcome with an associated instruction address. */ ++struct latestBranch { ++ /** Branch instruction address. */ ++ uint64_t address; ++ ++ /** Outcome of the branch. */ ++ BranchPrediction outcome; ++ ++ /** The related instructionsCommitted_ value that this instruction was ++ * committed on. */ ++ uint64_t commitNumber; ++}; ++ ++/** A Reorder Buffer (ROB) implementation. Contains an in-order queue of ++ * in-flight instructions. */ ++class ReorderBuffer { ++ public: ++ /** Constructs a reorder buffer of maximum size `maxSize`, supplying a ++ * reference to the register alias table. */ ++ ReorderBuffer( ++ unsigned int maxSize, RegisterAliasTable& rat, LoadStoreQueue& lsq, ++ std::function&)> raiseException, ++ std::function sendLoopBoundary, ++ BranchPredictor& predictor, uint16_t loopBufSize, ++ uint16_t loopDetectionThreshold); ++ ++ /** Add the provided instruction to the ROB. */ ++ void reserve(const std::shared_ptr& insn); ++ ++ void commitMicroOps(uint64_t insnId); ++ ++ /** Commit and remove up to `maxCommitSize` instructions. */ ++ unsigned int commit(unsigned int maxCommitSize); ++ ++ /** Flush all instructions with a sequence ID greater than `afterSeqId`. */ ++ void flush(uint64_t afterSeqId); ++ ++ /** Retrieve the current size of the ROB. */ ++ unsigned int size() const; ++ ++ /** Retrieve the current amount of free space in the ROB. */ ++ unsigned int getFreeSpace() const; ++ ++ /** Query whether a memory order violation was discovered in the most recent ++ * cycle. */ ++ bool shouldFlush() const; ++ ++ /** Retrieve the instruction address associated with the most recently ++ * discovered memory order violation. */ ++ uint64_t getFlushAddress() const; ++ ++ /** Retrieve the sequence ID associated with the most recently discovered ++ * memory order violation. */ ++ uint64_t getFlushSeqId() const; ++ ++ /** Get the number of instructions the ROB has committed. */ ++ uint64_t getInstructionsCommittedCount() const; ++ ++ /** Get the number of speculated loads which violated load-store ordering. */ ++ uint64_t getViolatingLoadsCount() const; ++ ++ private: ++ /** A reference to the register alias table. */ ++ RegisterAliasTable& rat_; ++ ++ /** A reference to the load/store queue. */ ++ LoadStoreQueue& lsq_; ++ ++ /** The maximum size of the ROB. */ ++ unsigned int maxSize_; ++ ++ /** A function to call upon exception generation. */ ++ std::function)> raiseException_; ++ ++ /** A function to send an instruction at a detected loop boundary. */ ++ std::function sendLoopBoundary_; ++ ++ /** Whether or not a loop has been detected. */ ++ bool loopDetected_ = false; ++ ++ /** A reference to the current branch predictor. */ ++ BranchPredictor& predictor_; ++ ++ /** The buffer containing in-flight instructions. */ ++ std::deque> buffer_; ++ ++ /** Whether the core should be flushed after the most recent commit. */ ++ bool shouldFlush_ = false; ++ ++ /** The target instruction address the PC should be reset to after the most ++ * recent commit. ++ */ ++ uint64_t pc_; ++ ++ /** The sequence ID of the youngest instruction that should remain after the ++ * current flush. */ ++ uint64_t flushAfter_; ++ ++ /** Latest retired branch outcome with a counter. */ ++ std::pair branchCounter_ = {{0, {false, 0}, 0}, 0}; ++ ++ /** Loop buffer size. */ ++ uint16_t loopBufSize_; ++ ++ /** Amount of times a branch must be seen without interruption for it to be ++ * considered a loop. */ ++ uint16_t loopDetectionThreshold_; ++ ++ /** The next available sequence ID. */ ++ uint64_t seqId_ = 0; ++ ++ /** The next available instruction ID. Used to identify in-order groups of ++ * micro-operations. */ ++ uint64_t insnId_ = 0; ++ ++ /** The number of instructions committed. */ ++ uint64_t instructionsCommitted_ = 0; ++ ++ /** The number of speculatived loads which violated load-store ordering. */ ++ uint64_t loadViolations_ = 0; ++}; ++ ++} // namespace pipeline_hi ++} // namespace simeng +diff --git a/src/include/simeng/pipeline_hi/StaticPredictor.hh b/src/include/simeng/pipeline_hi/StaticPredictor.hh +new file mode 100644 +index 00000000..d8923dc2 +--- /dev/null ++++ b/src/include/simeng/pipeline_hi/StaticPredictor.hh +@@ -0,0 +1,53 @@ ++#pragma once ++ ++#include ++ ++#include "simeng/BranchPredictor.hh" ++#include "yaml-cpp/yaml.h" ++ ++namespace simeng { ++namespace pipeline_hi { ++ ++/** A static branch predictor; configurable in YAML config ++ */ ++class StaticPredictor : public BranchPredictor { ++ public: ++ StaticPredictor(uint8_t sType); //TODO: temp constructor, get rid of yaml, delete it later ++ StaticPredictor(YAML::Node config); ++ ~StaticPredictor(); ++ ++ BranchPrediction predict(uint64_t address, BranchType type, ++ uint64_t knownTarget, uint8_t byteLength) override; ++ ++ /** Generate a branch prediction for the specified instruction address; will ++ * behave based on the configuration */ ++ BranchPrediction predict(uint64_t address, BranchType type, ++ uint64_t knownTarget) override; ++ ++ /** Provide branch results to update the prediction model for the specified ++ * instruction address. As this model is static, this does nothing. */ ++ void update(uint64_t address, bool taken, uint64_t targetAddress, ++ BranchType type) override; ++ ++ /** Provide flush logic for branch prediction scheme. The behaviour will ++ * be based on the configuration */ ++ void flush(uint64_t address) override; ++ ++ private: ++ /** Decide which static predictor will be in use */ ++ uint8_t staticType_; ++ ++ /** A return address stack. */ ++ std::deque ras_; ++ ++ /** RAS history with instruction address as the keys. A non-zero value ++ * represents the target prediction for a return instruction and a 0 entry for ++ * a branch-and-link instruction. */ ++ std::map rasHistory_; ++ ++ /** The size of the RAS. */ ++ uint64_t rasSize_ = 1000; ++}; ++ ++} // namespace pipeline_hi ++} // namespace simeng +diff --git a/src/include/simeng/pipeline_hi/WritebackUnit.hh b/src/include/simeng/pipeline_hi/WritebackUnit.hh +new file mode 100644 +index 00000000..0816d3b5 +--- /dev/null ++++ b/src/include/simeng/pipeline_hi/WritebackUnit.hh +@@ -0,0 +1,62 @@ ++#pragma once ++ ++#include ++ ++#include "simeng/Instruction.hh" ++#include "simeng/pipeline_hi/PipelineBuffer.hh" ++#include ++ ++namespace simeng { ++namespace pipeline_hi { ++ ++/** A writeback pipeline unit. Responsible for writing instruction results to ++ * the register files. */ ++class WritebackUnit { ++ public: ++ /** Constructs a writeback unit with references to an input buffer and ++ * register file to write to. */ ++ WritebackUnit(std::vector>>& ++ completionSlots, ++ RegisterFileSet& registerFileSet, ++ std::function flagMicroOpCommits, ++ std::function&)> removeDep, ++ std::function&)> removeInstrOrderQ); ++ ++ /** Tick the writeback unit to perform its operation for this cycle. */ ++ void tick(); ++ ++ /** Retrieve a count of the number of instructions retired. */ ++ uint64_t getInstructionsWrittenCount() const; ++ ++ /** Retrieve instruction(s) to be printed out to the trace */ ++ std::vector> getInstsForTrace(); ++ ++ /** Clear the container for tracing */ ++ void traceFinished(); //Might be safer to update trace within WritebackUnit ++ ++ private: ++ /** Buffers of completed instructions to process. */ ++ std::vector>>& completionSlots_; ++ ++ /** The register file set to write results into. */ ++ RegisterFileSet& registerFileSet_; ++ ++ /** A function handle called to determine if uops associated to an instruction ++ * ID can now be committed. */ ++ std::function flagMicroOpCommits_; ++ ++ /** A function to remove the commited instruction from dependency queue. */ ++ std::function&)> removeDep_; ++ ++ /** A function to remove the commited instruction from ordering queue. */ ++ std::function&)> removeInstrOrderQ_; ++ ++ /** The number of instructions processed and retired by this stage. */ ++ uint64_t instructionsWritten_ = 0; ++ ++ /** Instruction(s) to be printed out to the trace */ ++ std::deque> committedInstsForTrace_; ++}; ++ ++} // namespace pipeline_hi ++} // namespace simeng +diff --git a/src/lib/CMakeLists.txt b/src/lib/CMakeLists.txt +index 1fbf2865..97de63eb 100644 +--- a/src/lib/CMakeLists.txt ++++ b/src/lib/CMakeLists.txt +@@ -9,6 +9,7 @@ set(SIMENG_SOURCES + arch/aarch64/MicroDecoder.cc + arch/riscv/Architecture.cc + arch/riscv/ExceptionHandler.cc ++ arch/riscv/SystemRegister.cc + arch/riscv/Instruction.cc + arch/riscv/Instruction_address.cc + arch/riscv/Instruction_decode.cc +@@ -18,6 +19,7 @@ set(SIMENG_SOURCES + kernel/LinuxProcess.cc + models/emulation/Core.cc + models/inorder/Core.cc ++ models/mcu/Core.cc + models/outoforder/Core.cc + pipeline/A64FXPortAllocator.cc + pipeline/BalancedPortAllocator.cc +@@ -32,6 +34,16 @@ set(SIMENG_SOURCES + pipeline/RenameUnit.cc + pipeline/ReorderBuffer.cc + pipeline/WritebackUnit.cc ++ pipeline_hi/DecodeUnit.cc ++ pipeline_hi/DispatchIssueUnit.cc ++ pipeline_hi/ExecuteUnit.cc ++ pipeline_hi/FetchUnit.cc ++ pipeline_hi/LoadStoreQueue.cc ++ pipeline_hi/RegDepMap.cc ++ pipeline_hi/RegisterAliasTable.cc ++ pipeline_hi/ReorderBuffer.cc ++ pipeline_hi/StaticPredictor.cc ++ pipeline_hi/WritebackUnit.cc + AlwaysNotTakenPredictor.cc + ArchitecturalRegisterFileSet.cc + CMakeLists.txt +diff --git a/src/lib/CoreInstance.cc b/src/lib/CoreInstance.cc +index 8ba06c8e..e8f91d34 100644 +--- a/src/lib/CoreInstance.cc ++++ b/src/lib/CoreInstance.cc +@@ -90,6 +90,10 @@ void CoreInstance::setSimulationMode() { + "outoforder") { + mode_ = SimulationMode::OutOfOrder; + modeString_ = "Out-of-Order"; ++ } else if (config_["Core"]["Simulation-Mode"].as() == ++ "mcu") { ++ mode_ = SimulationMode::MCU; ++ modeString_ = "MCU"; + } + + return; +@@ -236,7 +240,7 @@ void CoreInstance::createCore() { + if (config_["Core"]["ISA"].as() == "rv64" || + config_["Core"]["ISA"].as() == "rv32") { + arch_ = +- std::make_unique(kernel_, config_); ++ std::make_unique(kernel_, config_,dataMemory_); + } else if (config_["Core"]["ISA"].as() == "AArch64") { + arch_ = + std::make_unique(kernel_, config_); +@@ -244,6 +248,9 @@ void CoreInstance::createCore() { + + // Construct branch predictor object + predictor_ = std::make_unique(config_); ++ if (mode_ == SimulationMode::MCU) { ++ predictor_ = std::make_unique(2); //config_ ++ } + + // Extract port arrangement from config file + auto config_ports = config_["Ports"]; +@@ -268,6 +275,10 @@ void CoreInstance::createCore() { + core_ = std::make_shared( + *instructionMemory_, *dataMemory_, processMemorySize_, entryPoint, + *arch_, *predictor_); ++ } else if (mode_ == SimulationMode::MCU) { ++ core_ = std::make_shared( ++ *instructionMemory_, *dataMemory_, processMemorySize_, entryPoint, ++ *arch_, *predictor_, config_); + } else if (mode_ == SimulationMode::OutOfOrder) { + core_ = std::make_shared( + *instructionMemory_, *dataMemory_, processMemorySize_, entryPoint, +diff --git a/src/lib/Elf.cc b/src/lib/Elf.cc +index 62815984..901f370e 100644 +--- a/src/lib/Elf.cc ++++ b/src/lib/Elf.cc +@@ -2,6 +2,7 @@ + + #include + #include ++#include + + namespace simeng { + +@@ -13,7 +14,8 @@ namespace simeng { + * https://man7.org/linux/man-pages/man5/elf.5.html + */ + +-Elf::Elf(std::string path, char** imagePointer) { ++Elf::Elf(std::string path, char** imagePointer, std::unordered_map& symbols) ++{ + std::ifstream file(path, std::ios::binary); + + if (!file.is_open()) { +@@ -174,120 +176,69 @@ Elf::Elf(std::string path, char** imagePointer) { + } + } + } else { +- /** +- * Starting from the 24th byte of the ELF header a 32-bit value +- * represents the virtual address to which the system first transfers +- * control, thus starting the process. +- * In `elf32_hdr` this value maps to the member `Elf32_Addr e_entry`. +- */ ++ file.seekg(0); + +- // Seek to the entry point of the file. +- // The information in between is discarded +- file.seekg(0x18); +- file.read(reinterpret_cast(&entryPoint32_), sizeof(entryPoint32_)); ++ Elf32_Ehdr eheader; ++ file.read(reinterpret_cast(&eheader), sizeof(eheader)); + +- /** +- * Starting from the 32nd byte of the ELF Header a 64-bit value +- * represents the offset of the ELF Program header or +- * Program header table in the ELF file. +- * In `elf32_hdr` this value maps to the member `Elf32_Addr e_phoff`. +- */ +- +- // Seek to the byte representing the start of the header offset table. +- uint32_t headerOffset; +- file.read(reinterpret_cast(&headerOffset), sizeof(headerOffset)); +- +- /** +- * Starting 42th byte of the ELF Header a 16-bit value indicates +- * the size of each entry in the ELF Program header. In the `elf32_hdr` +- * struct this value maps to the member `Elf32_Half e_phentsize`. All +- * header entries have the same size. +- * Starting from the 44th byte a 16-bit value represents the number +- * of header entries in the ELF Program header. In the `elf32_hdr` +- * struct this value maps to `Elf32_Half e_phnum`. +- */ +- +- // Seek to the byte representing header entry size. +- file.seekg(0x2a); +- uint16_t headerEntrySize; +- file.read(reinterpret_cast(&headerEntrySize), sizeof(headerEntrySize)); +- uint16_t headerEntries; +- file.read(reinterpret_cast(&headerEntries), sizeof(headerEntries)); +- +- // Resize the header to equal the number of header entries. +- headers32_.resize(headerEntries); ++ entryPoint32_ = eheader.e_entry; ++ + processImageSize_ = 0; + +- // Loop over all headers and extract them. +- for (size_t i = 0; i < headerEntries; i++) { +- // Since all headers entries have the same size. +- // We can extract the nth header using the header offset +- // and header entry size. +- file.seekg(headerOffset + (i * headerEntrySize)); +- auto& header = headers32_[i]; ++ // Loop over pheaders and extract them. ++ file.seekg(eheader.e_phoff); ++ std::vector pheaders(eheader.e_phnum); ++ for (auto &ph : pheaders) { ++ file.read(reinterpret_cast(&ph), sizeof(ph)); ++ if ((ph.p_type == PT_LOAD) && (ph.p_vaddr+ph.p_memsz > processImageSize_)) ++ processImageSize_ = ph.p_vaddr+ph.p_memsz; ++ } + +- /** +- * Like the ELF Header, the ELF Program header is also defined +- * using a struct: +- * typedef struct { +- * uint32_t p_type; +- * Elf32_Off p_offset; +- * Elf32_Addr p_vaddr; +- * Elf32_Addr p_paddr; +- * uint32_t p_filesz; +- * uint32_t p_memsz; +- * uint32_t p_flags; +- * uint32_t p_align; +- * } Elf32_Phdr; +- * +- * The ELF Program header table is an array of structures, +- * each describing a segment or other information the system +- * needs to prepare the program for execution. A segment +- * contains one or more sections (ELF Program Section). +- * +- * The `p_vaddr` field holds the virtual address at which the first +- * byte of the segment resides in memory and the `p_memsz` field +- * holds the number of bytes in the memory image of the segment. +- * It may be zero. The `p_offset` member holds the offset from the +- * beginning of the file at which the first byte of the segment resides. +- */ ++ *imagePointer = (char*)malloc(processImageSize_ * sizeof(char)); + +- // Each address-related field is 4 bytes in a 32-bit ELF file +- const int fieldBytes = 4; +- file.read(reinterpret_cast(&(header.type)), sizeof(header.type)); +- file.read(reinterpret_cast(&(header.offset)), fieldBytes); +- file.read(reinterpret_cast(&(header.virtualAddress)), fieldBytes); +- file.read(reinterpret_cast(&(header.physicalAddress)), fieldBytes); +- file.read(reinterpret_cast(&(header.fileSize)), fieldBytes); +- file.read(reinterpret_cast(&(header.memorySize)), fieldBytes); ++ for (const auto& ph : pheaders) { ++ if (ph.p_type == PT_LOAD) { ++ file.seekg(ph.p_offset); ++ // Read `fileSize` bytes from `file` into the appropriate place in process memory ++ file.read(*imagePointer+ph.p_vaddr, ph.p_filesz); + +- // To construct the process we look for the largest virtual address and +- // add it to the memory size of the header. This way we obtain a very +- // large array which can hold data at large virtual address. +- // However, this way we end up creating a sparse array, in which most +- // of the entries are unused. Also SimEng internally treats these +- // virtual address as physical addresses to index into this large array. +- if (header.virtualAddress + header.memorySize > processImageSize_) { +- processImageSize_ = header.virtualAddress + header.memorySize; ++ if (ph.p_memsz>ph.p_filesz) ++ // Need to padd the rest of the section memory with zeros ++ memset(*imagePointer+ph.p_vaddr+ph.p_filesz, 0, ph.p_memsz-ph.p_filesz); + } + } + +- *imagePointer = (char*)malloc(processImageSize_ * sizeof(char)); +- /** +- * The ELF Program header has a member called `p_type`, which represents +- * the kind of data or memory segments described by the program header. +- * The value PT_LOAD=1 represents a loadable segment. In other words, +- * it contains initialized data that contributes to the program's +- * memory image. +- */ +- +- // Process headers; only observe LOAD sections for this basic implementation +- for (const auto& header : headers32_) { +- if (header.type == 1) { // LOAD +- file.seekg(header.offset); +- // Read `fileSize` bytes from `file` into the appropriate place in process +- // memory +- file.read(*imagePointer + header.virtualAddress, header.fileSize); ++ // read section headers ++ Elf32_Shdr* sh_strtab = NULL; ++ Elf32_Shdr* sh_symtab = NULL; ++ file.seekg(eheader.e_shoff); ++ std::vector sheaders(eheader.e_shnum); ++ unsigned int sh_idx = 0; ++ for (auto &sh : sheaders) { ++ file.read(reinterpret_cast(&sh), sizeof(sh)); ++ ++ // find section header for strings to use for symbol table. ++ if (sh.sh_type==SHT_SYMTAB) ++ sh_symtab = &sh; ++ else if (sh.sh_type==SHT_STRTAB && sh_idx!=eheader.e_shstrndx) ++ sh_strtab = &sh; ++ sh_idx++; ++ }; ++ ++ // Read strings table ++ file.seekg(sh_strtab->sh_offset); ++ std::vector strtab(sh_strtab->sh_size); ++ file.read(&strtab[0], sh_strtab->sh_size); ++ ++ // Read symbols tables ++ file.seekg(sh_symtab->sh_offset); ++ unsigned num_symbols = sh_symtab->sh_size/sh_symtab->sh_entsize; ++ Elf32_Sym sym; ++ while(num_symbols--) { ++ file.read(reinterpret_cast(&sym), sizeof(sym)); ++ if (strtab[sym.st_name]) { ++ std::string name(&strtab[sym.st_name]); ++ symbols[name] = sym.st_value; + } + } + } +diff --git a/src/lib/GenericPredictor.cc b/src/lib/GenericPredictor.cc +index 2539d7ae..4b93d832 100644 +--- a/src/lib/GenericPredictor.cc ++++ b/src/lib/GenericPredictor.cc +@@ -110,4 +110,11 @@ void GenericPredictor::flush(uint64_t address) { + } + } + ++ ++BranchPrediction GenericPredictor::predict(uint64_t address, BranchType type, ++ uint64_t knownTarget, ++ uint8_t byteLength) { ++ return predict(address, type, knownTarget); ++} ++ + } // namespace simeng +diff --git a/src/lib/Instruction.cc b/src/lib/Instruction.cc +index ac923c11..d1b7b112 100644 +--- a/src/lib/Instruction.cc ++++ b/src/lib/Instruction.cc +@@ -57,5 +57,8 @@ bool Instruction::isLastMicroOp() const { return isLastMicroOp_; } + void Instruction::setWaitingCommit() { waitingCommit_ = true; } + bool Instruction::isWaitingCommit() const { return waitingCommit_; } + int Instruction::getMicroOpIndex() const { return microOpIndex_; } ++bool Instruction::isDiv() const { return isDiv_; } ++bool Instruction::isMul() const { return isMul_; } ++bool Instruction::isSysCall() const { return isSysCall_; } + + } // namespace simeng +diff --git a/src/lib/ModelConfig.cc b/src/lib/ModelConfig.cc +index 88cc1f7d..34247634 100644 +--- a/src/lib/ModelConfig.cc ++++ b/src/lib/ModelConfig.cc +@@ -69,7 +69,7 @@ void ModelConfig::validate() { + configFile_[root][subFields[0]], subFields[0], + std::vector({"AArch64", "rv64", "rv32"}), ExpectedValue::String); + nodeChecker(configFile_[root][subFields[1]], subFields[1], +- {"emulation", "inorderpipelined", "outoforder"}, ++ {"emulation", "inorderpipelined", "mcu", "outoforder"}, + ExpectedValue::String); + nodeChecker(configFile_[root][subFields[2]], subFields[2], + std::make_pair(0.f, 10.f), ExpectedValue::Float); +diff --git a/src/lib/arch/aarch64/Architecture.cc b/src/lib/arch/aarch64/Architecture.cc +index 23ebf86a..5ad11c70 100644 +--- a/src/lib/arch/aarch64/Architecture.cc ++++ b/src/lib/arch/aarch64/Architecture.cc +@@ -281,11 +281,13 @@ ProcessStateChange Architecture::getInitialState() const { + + uint8_t Architecture::getMaxInstructionSize() const { return 4; } + ++uint8_t Architecture::getMinInstructionSize() const { return 4; } ++ + uint64_t Architecture::getVectorLength() const { return VL_; } + + uint64_t Architecture::getStreamingVectorLength() const { return SVL_; } + +-void Architecture::updateSystemTimerRegisters(RegisterFileSet* regFile, ++int16_t Architecture::updateSystemTimerRegisters(RegisterFileSet* regFile, + const uint64_t iterations) const { + // Update the Processor Cycle Counter to total cycles completed. + regFile->set(PCCreg_, iterations); +@@ -293,6 +295,8 @@ void Architecture::updateSystemTimerRegisters(RegisterFileSet* regFile, + if (iterations % (uint64_t)vctModulo_ == 0) { + regFile->set(VCTreg_, regFile->get(VCTreg_).get() + 1); + } ++ // interrupts NYI ++ return -1; + } + + std::vector +diff --git a/src/lib/arch/aarch64/Instruction.cc b/src/lib/arch/aarch64/Instruction.cc +index 909f5263..602bdc7f 100644 +--- a/src/lib/arch/aarch64/Instruction.cc ++++ b/src/lib/arch/aarch64/Instruction.cc +@@ -43,7 +43,7 @@ Instruction::Instruction(const Instruction& insn) + branchAddress_ = insn.branchAddress_; + branchTaken_ = insn.branchTaken_; + branchType_ = insn.branchType_; +- knownTarget_ = insn.knownTarget_; ++ knownOffset_ = insn.knownOffset_; + sequenceId_ = insn.sequenceId_; + flushed_ = insn.flushed_; + latency_ = insn.latency_; +@@ -182,7 +182,7 @@ std::tuple Instruction::checkEarlyBranchMisprediction() const { + + BranchType Instruction::getBranchType() const { return branchType_; } + +-uint64_t Instruction::getKnownTarget() const { return knownTarget_; } ++uint64_t Instruction::getKnownOffset() const { return knownOffset_; } + + uint16_t Instruction::getGroup() const { + // Use identifiers to decide instruction group +diff --git a/src/lib/arch/aarch64/Instruction_decode.cc b/src/lib/arch/aarch64/Instruction_decode.cc +index e3f0529e..ca869629 100644 +--- a/src/lib/arch/aarch64/Instruction_decode.cc ++++ b/src/lib/arch/aarch64/Instruction_decode.cc +@@ -364,7 +364,7 @@ void Instruction::decode() { + switch (metadata.opcode) { + case Opcode::AArch64_B: // b label + branchType_ = BranchType::Unconditional; +- knownTarget_ = metadata.operands[0].imm; ++ knownOffset_ = metadata.operands[0].imm; + break; + case Opcode::AArch64_BR: { // br xn + branchType_ = BranchType::Unconditional; +@@ -372,7 +372,7 @@ void Instruction::decode() { + } + case Opcode::AArch64_BL: // bl #imm + branchType_ = BranchType::SubroutineCall; +- knownTarget_ = metadata.operands[0].imm; ++ knownOffset_ = metadata.operands[0].imm; + break; + case Opcode::AArch64_BLR: { // blr xn + branchType_ = BranchType::SubroutineCall; +@@ -383,7 +383,7 @@ void Instruction::decode() { + branchType_ = BranchType::LoopClosing; + else + branchType_ = BranchType::Conditional; +- knownTarget_ = metadata.operands[0].imm; ++ knownOffset_ = metadata.operands[0].imm; + break; + } + case Opcode::AArch64_CBNZW: // cbnz wn, #imm +@@ -397,7 +397,7 @@ void Instruction::decode() { + branchType_ = BranchType::LoopClosing; + else + branchType_ = BranchType::Conditional; +- knownTarget_ = metadata.operands[1].imm; ++ knownOffset_ = metadata.operands[1].imm; + break; + } + case Opcode::AArch64_TBNZW: // tbnz wn, #imm, label +@@ -411,7 +411,7 @@ void Instruction::decode() { + branchType_ = BranchType::LoopClosing; + else + branchType_ = BranchType::Conditional; +- knownTarget_ = metadata.operands[2].imm; ++ knownOffset_ = metadata.operands[2].imm; + break; + } + case Opcode::AArch64_RET: { // ret {xr} +diff --git a/src/lib/arch/riscv/Architecture.cc b/src/lib/arch/riscv/Architecture.cc +index d1a18777..84afcc09 100644 +--- a/src/lib/arch/riscv/Architecture.cc ++++ b/src/lib/arch/riscv/Architecture.cc +@@ -4,6 +4,7 @@ + #include + #include + #include ++#include + + #include "InstructionMetadata.hh" + +@@ -14,8 +15,10 @@ namespace riscv { + std::unordered_map Architecture::decodeCache; + std::forward_list Architecture::metadataCache; + +-Architecture::Architecture(kernel::Linux& kernel, YAML::Node config) +- : linux_(kernel) { ++Architecture::Architecture(kernel::Linux& kernel, YAML::Node config, std::shared_ptr& dataMemory) ++: ++ linux_(kernel) ++{ + is32Bit_ = ARCH_64BIT; + if (config["Core"]["ISA"].as() == "rv32") { + is32Bit_ = ARCH_32BIT; +@@ -46,14 +49,39 @@ Architecture::Architecture(kernel::Linux& kernel, YAML::Node config) + + // Generate zero-indexed system register map + systemRegisterMap_[SYSREG_MSTATUS] = systemRegisterMap_.size(); ++ systemRegisterMap_[SYSREG_MIE] = systemRegisterMap_.size(); ++ systemRegisterMap_[SYSREG_MTVEC] = systemRegisterMap_.size(); + systemRegisterMap_[SYSREG_MSTATUSH] = systemRegisterMap_.size(); ++ systemRegisterMap_[SYSREG_MSCRATCH] = systemRegisterMap_.size(); + systemRegisterMap_[SYSREG_MEPC] = systemRegisterMap_.size(); + systemRegisterMap_[SYSREG_MCAUSE] = systemRegisterMap_.size(); + systemRegisterMap_[SYSREG_MHARTID] = systemRegisterMap_.size(); ++ systemRegisterMap_[SYSREG_MXCPTSC] = systemRegisterMap_.size(); + systemRegisterMap_[SYSREG_CYCLE] = systemRegisterMap_.size(); + systemRegisterMap_[SYSREG_TIME] = systemRegisterMap_.size(); + systemRegisterMap_[SYSREG_INSTRRET] = systemRegisterMap_.size(); + ++ // Memory Mapped System Register Blocks ++ ++ // if elf file includes the label tohost then assume that this binary supports HTIF protocol (used by spike) and include an HTI block ++ uint64_t htifAddress; ++ if (linux_.lookupSymbolValue("tohost",htifAddress)) ++ { ++ std::cout << "[SimEng] HTIF detected at: " << std::hex << htifAddress << std::endl; ++ htif = std::make_shared(*this); ++ memoryMappedSystemRegisterBlocks[htifAddress] = htif.get(); ++ } ++ ++ // Install CLINT into memort map, this is optional ++ clint = std::make_shared(*this); ++ memoryMappedSystemRegisterBlocks[Clint::CLINT_BASE] = clint.get(); ++ ++ if (!memoryMappedSystemRegisterBlocks.empty()) ++ { ++ systemRegisterMemoryInterface = std::make_shared(dataMemory, memoryMappedSystemRegisterBlocks); ++ dataMemory = systemRegisterMemoryInterface; ++ } ++ + // Instantiate an executionInfo entry for each group in the InstructionGroup + // namespace. + for (int i = 0; i < NUM_GROUPS; i++) { +@@ -145,7 +173,7 @@ Architecture::Architecture(kernel::Linux& kernel, YAML::Node config) + } + } + } +- if (config["Core"]["Trace"].as()) { ++ if (config["Core"]["Trace"].IsDefined() && config["Core"]["Trace"].as()) { + traceFile_ = new std::ofstream(); + traceFile_->open("./trace.log"); + traceOn_ = true; +@@ -164,6 +192,7 @@ Architecture::~Architecture() { + uint8_t Architecture::predecode(const void* ptr, uint8_t bytesAvailable, + uint64_t instructionAddress, + MacroOp& output) const { ++ + // Check that instruction address is 4-byte aligned as required by RISC-V + // 2-byte when Compressed ISA is supported + if (instructionAddress & constants_.alignMask) { +@@ -221,9 +250,11 @@ uint8_t Architecture::predecode(const void* ptr, uint8_t bytesAvailable, + output.resize(1); + auto& uop = output[0]; + +- // Retrieve the cached instruction and write to output +- uop = std::make_shared(iter->second); ++ // Retrieve the cached instruction ++ auto newinsn = std::make_shared(iter->second); + ++ // write to output ++ uop = newinsn; + uop->setInstructionAddress(instructionAddress); + + return iter->second.getMetadata().lenBytes; +@@ -265,8 +296,19 @@ int32_t Architecture::getSystemRegisterTag(uint16_t reg) const { + // Check below is done for speculative instructions that may be passed into + // the function but will not be executed. If such invalid speculative + // instructions get through they can cause an out-of-range error. +- if (!systemRegisterMap_.count(reg)) return 0; +- return systemRegisterMap_.at(reg); ++ if (systemRegisterMap_.count(reg)) ++ return systemRegisterMap_.at(reg); ++ else ++ return -1; ++} ++ ++/** Returns a System Register index from a system register tag. ++ reverse lookup slow but only used in printing so will be fine */ ++uint16_t Architecture::getSystemRegisterIdFromTag(int32_t tag) const { ++ for (auto it = systemRegisterMap_.begin();it != systemRegisterMap_.end();it++) ++ if (it->second == tag) ++ return it->first; ++ assert(0 && "Tag not found in systemRegisterMap"); + } + + ProcessStateChange Architecture::getInitialState() const { +@@ -289,6 +331,8 @@ ProcessStateChange Architecture::getInitialState() const { + + uint8_t Architecture::getMaxInstructionSize() const { return 4; } + ++uint8_t Architecture::getMinInstructionSize() const { return 2; } ++ + std::vector + Architecture::getConfigPhysicalRegisterStructure(YAML::Node config) const { + return {{constants_.regWidth, config["Register-Set"]["GeneralPurpose-Count"].as()}, +@@ -306,9 +350,21 @@ uint16_t Architecture::getNumSystemRegisters() const { + return static_cast(systemRegisterMap_.size()); + } + +-// Left blank as no implementation necessary +-void Architecture::updateSystemTimerRegisters(RegisterFileSet* regFile, ++int16_t Architecture::updateSystemTimerRegisters(RegisterFileSet* regFile, + const uint64_t iterations) const { ++ int16_t interruptId = -1; ++ ++ if (htif) ++ { ++ interruptId = htif->updateSystemTimerRegisters(regFile, iterations); ++ if (interruptId>=0) ++ return interruptId; ++ } ++ ++ if (clint) ++ interruptId = clint->updateSystemTimerRegisters(regFile, iterations); ++ ++ return interruptId; + } + + void Architecture::updateInstrTrace(const std::shared_ptr& instruction, +@@ -346,7 +402,7 @@ void Architecture::updateInstrTrace(const std::shared_ptr& + } else if(reg.type == RegisterType::FLOAT) { + s << "f" << std::dec << std::setfill('0') << std::setw(2) << reg.tag << "=0x"; + } else if(reg.type == RegisterType::SYSTEM) { +- s << "csr_0x" << std::hex << std::setfill('0') << std::setw(3) << metadata.csr << "=0x"; ++ s << "csr_0x" << std::hex << std::setfill('0') << std::setw(3) << getSystemRegisterIdFromTag(reg.tag) << "=0x"; + } + s << std::hex << std::setfill('0') << std::setw(8) << regFile->get(reg).get(); + if(i < (num_dest-1)) { +@@ -364,7 +420,7 @@ void Architecture::updateInstrTrace(const std::shared_ptr& + } else if(reg.type == RegisterType::FLOAT) { + s << "f" << std::dec << std::setfill('0') << std::setw(2) << reg.tag << "=0x"; + } else if(reg.type == RegisterType::SYSTEM) { +- s << "csr_0x" << std::hex << std::setfill('0') << std::setw(3) << metadata.csr << "=0x"; ++ s << "csr_0x" << std::hex << std::setfill('0') << std::setw(3) << getSystemRegisterIdFromTag(reg.tag) << "=0x"; + } + s << std::hex << std::setfill('0') << std::setw(8) << regFile->get(reg).get(); + if(i < (num_src-1)) { +diff --git a/src/lib/arch/riscv/ExceptionHandler.cc b/src/lib/arch/riscv/ExceptionHandler.cc +index c8844804..9ba22008 100644 +--- a/src/lib/arch/riscv/ExceptionHandler.cc ++++ b/src/lib/arch/riscv/ExceptionHandler.cc +@@ -1,5 +1,5 @@ ++#include "simeng/arch/riscv/Architecture.hh" + #include "simeng/arch/riscv/ExceptionHandler.hh" +- + #include + #include + +@@ -646,6 +646,18 @@ bool ExceptionHandler::init() { + } + + return concludeSyscall(stateChange); ++ ++ } else if (exception == InstructionException::SecureMonitorCall) { ++ printException(instruction_); ++ takeException(CAUSE_BREAKPOINT); ++ return true; ++ } else if (exception == InstructionException::Interrupt) { ++ printException(instruction_); ++ if (instruction_.getInterruptId() == static_cast(InterruptId::HALT)) ++ return fatal(); ++ uint64_t mcause_val = static_cast(instruction_.getInterruptId()) | (1<<(8*instruction_.getArchRegWidth()-1)); ++ takeException(mcause_val); ++ return true; + } + + printException(instruction_); +@@ -745,6 +757,45 @@ void ExceptionHandler::readLinkAt(span path) { + concludeSyscall(stateChange); + } + ++void ExceptionHandler::takeException(uint64_t causecode) ++{ ++ const auto& registerFileSet = core.getArchitecturalRegisterFileSet(); ++ auto& architecture = instruction_.getArchitecture(); ++ uint16_t mtvec_tag = static_cast(architecture.getSystemRegisterTag(SYSREG_MTVEC)); ++ uint16_t mstatus_tag = static_cast(architecture.getSystemRegisterTag(SYSREG_MSTATUS)); ++ uint16_t mepc_tag = static_cast(architecture.getSystemRegisterTag(SYSREG_MEPC)); ++ uint16_t mcause_tag = static_cast(architecture.getSystemRegisterTag(SYSREG_MCAUSE)); ++ uint64_t mcause_val = static_cast(causecode); ++ ++ auto mstatus_bits = registerFileSet.get( { RegisterType::SYSTEM, mstatus_tag } ).get(); ++ ++ // mpie=mie, mie=0 ++ mstatus_bits &= ~MSTATUS_MPIE_MASK; ++ if (mstatus_bits & MSTATUS_MIE_MASK) ++ mstatus_bits |= MSTATUS_MPIE_MASK; ++ mstatus_bits &= ~MSTATUS_MIE_MASK; ++ ++ RegisterValue mstatus (mstatus_bits, architecture.getConstants().regWidth); ++ RegisterValue mepc (instruction_.getInstructionAddress(), architecture.getConstants().regWidth); ++ RegisterValue mcause (mcause_val, architecture.getConstants().regWidth); ++ ++ uint64_t mtvec = registerFileSet.get( { RegisterType::SYSTEM, mtvec_tag } ).get(); ++ ++ ProcessStateChange changes = { ++ ChangeType::REPLACEMENT, ++ { ++ { RegisterType::SYSTEM, mstatus_tag }, ++ { RegisterType::SYSTEM, mepc_tag }, ++ { RegisterType::SYSTEM, mcause_tag } ++ }, ++ {mstatus, mepc, mcause} ++ }; ++ ++ result_ = {false, mtvec, changes}; ++ //result_ = {false, instruction_.getInstructionAddress(), changes}; ++} ++ ++ + bool ExceptionHandler::readBufferThen(uint64_t ptr, uint64_t length, + std::function then, + bool firstCall) { +@@ -827,6 +878,9 @@ void ExceptionHandler::printException(const Instruction& insn) const { + case InstructionException::NoAvailablePort: + std::cout << "unsupported execution port"; + break; ++ case InstructionException::Interrupt: ++ std::cout << "interrupt (id: " << insn.getInterruptId() << ")"; ++ break; + case InstructionException::UnmappedSysReg: + std::cout << "unmapped system register"; + break; +diff --git a/src/lib/arch/riscv/Instruction.cc b/src/lib/arch/riscv/Instruction.cc +index 6cfc173b..e292b889 100644 +--- a/src/lib/arch/riscv/Instruction.cc ++++ b/src/lib/arch/riscv/Instruction.cc +@@ -131,7 +131,7 @@ std::tuple Instruction::checkEarlyBranchMisprediction() const { + + BranchType Instruction::getBranchType() const { return branchType_; } + +-uint64_t Instruction::getKnownTarget() const { return knownTarget_; } ++uint64_t Instruction::getKnownOffset() const { return knownOffset_; } + + uint16_t Instruction::getGroup() const { + uint16_t base = InstructionGroups::INT; +@@ -171,6 +171,10 @@ void Instruction::setArchRegWidth(uint8_t len) { archRegWidth_ = len; } + + uint8_t Instruction::getArchRegWidth() const { return archRegWidth_; } + ++const Architecture& Instruction::getArchitecture() const { ++ return architecture_; ++} ++ + } // namespace riscv + } // namespace arch + } // namespace simeng +diff --git a/src/lib/arch/riscv/InstructionMetadata.cc b/src/lib/arch/riscv/InstructionMetadata.cc +index f2b5a9b7..d293bc7f 100644 +--- a/src/lib/arch/riscv/InstructionMetadata.cc ++++ b/src/lib/arch/riscv/InstructionMetadata.cc +@@ -264,7 +264,10 @@ void InstructionMetadata::alterPseudoInstructions(const cs_insn& insn) { + csr = ((uint32_t)encoding[3] << 4) | ((uint32_t)encoding[2] >> 4); + //If there are less than 2 operands provided add necessary x0 operand + if(operandCount == 1) { +- if(strcmp(mnemonic, "csrr") == 0) { //csrrs rd,csr,x0 ++ if((strcmp(mnemonic, "rdinstret") == 0) || ++ (strcmp(mnemonic, "rdcycle") == 0) || ++ (strcmp(mnemonic, "rdtime") == 0) || ++ (strcmp(mnemonic, "csrr") == 0)) { //csrrs rd,csr,x0 + operands[1].type = RISCV_OP_REG; + operands[1].reg = 1; + } else { //csrrxx x0,csr,rs/imm +diff --git a/src/lib/arch/riscv/Instruction_decode.cc b/src/lib/arch/riscv/Instruction_decode.cc +index 8bdd5041..9efa7f5c 100644 +--- a/src/lib/arch/riscv/Instruction_decode.cc ++++ b/src/lib/arch/riscv/Instruction_decode.cc +@@ -143,6 +143,24 @@ void Instruction::decode() { + case Opcode::RISCV_SD: + isStore_ = true; + break; ++ //identify MULs/DIVs ++ case Opcode::RISCV_MUL: ++ case Opcode::RISCV_MULH: ++ case Opcode::RISCV_MULHU: ++ case Opcode::RISCV_MULHSU: ++ case Opcode::RISCV_MULW: ++ isMultiply_ = true; ++ isMul_ = true; //this one is for simeng/Instruction.hh ++ break; ++ case Opcode::RISCV_DIV: ++ case Opcode::RISCV_DIVU: ++ case Opcode::RISCV_DIVUW: ++ case Opcode::RISCV_DIVW: ++ isDivide_ = true; ++ isDiv_ = true; //this one is for simeng/Instruction.hh ++ break; ++ case Opcode::RISCV_ECALL: ++ isSysCall_ = true; + } + + if (Opcode::RISCV_AMOADD_D <= metadata.opcode && +@@ -257,6 +275,16 @@ void Instruction::decode() { + isCompare_ = true; + } + ++ if (Opcode::RISCV_MRET == metadata.opcode) { ++ uint16_t mepc_tag = static_cast(architecture_.getSystemRegisterTag(SYSREG_MEPC)); ++ uint16_t mstatus_tag = static_cast(architecture_.getSystemRegisterTag(SYSREG_MSTATUS)); ++ sourceRegisters[sourceRegisterCount++] = { RegisterType::SYSTEM, mepc_tag }; ++ sourceRegisters[sourceRegisterCount++] = { RegisterType::SYSTEM, mstatus_tag }; ++ destinationRegisters[destinationRegisterCount++] = { RegisterType::SYSTEM, mstatus_tag }; ++ operandsPending += 2; ++ isBranch_ = true; ++ } ++ + // Set branch type + switch (metadata.opcode) { + case Opcode::RISCV_BEQ: +@@ -266,12 +294,24 @@ void Instruction::decode() { + case Opcode::RISCV_BGE: + case Opcode::RISCV_BGEU: + branchType_ = BranchType::Conditional; +- knownTarget_ = instructionAddress_ + metadata.operands[2].imm; ++ knownOffset_ = metadata.operands[2].imm; + break; + case Opcode::RISCV_JAL: ++ branchType_ = BranchType::SubroutineCall; ++ knownOffset_ = metadata.operands[1].imm; ++ break; + case Opcode::RISCV_JALR: +- branchType_ = BranchType::Unconditional; +- knownTarget_ = instructionAddress_ + metadata.operands[1].imm; ++ { ++ //jalr x0, 0(x1) == ret ++ if (metadata.operands[0].reg == RISCV_REG_X0 && metadata.operands[1].reg == RISCV_REG_X1 && metadata.operands[2].imm == 0) { ++ branchType_ = BranchType::Return; ++ } else { ++ branchType_ = BranchType::SubroutineCall; ++ } ++ break; ++ } ++ case Opcode::RISCV_MRET: ++ branchType_ = BranchType::Unknown; //TODO: think which type it fits / create new type + break; + } + } +@@ -292,10 +332,14 @@ bool Instruction::decode16() { + "Invalid operand for JR,JALR:- CR instructions"); + sourceRegisters[sourceRegisterCount++] = csRegToRegister(metadata.operands[0].reg); + operandsPending++; ++ branchType_ = BranchType::SubroutineCall; + if (metadata.opcode == Opcode::RISCV_C_JALR) { + destinationRegisters[destinationRegisterCount++] = Instruction::RA_REGISTER; ++ } else { //case C_JR ++ if (metadata.operands[0].reg == RISCV_REG_X1 ) { ++ branchType_ = BranchType::Return; ++ } + } +- branchType_ = BranchType::Unconditional; + break; + case Opcode::RISCV_C_MV: + instFormat_ = CIF_CR; +@@ -309,7 +353,7 @@ bool Instruction::decode16() { + sourceRegisters[sourceRegisterCount++] = csRegToRegister(metadata.operands[1].reg); + operandsPending++; + break; +- case Opcode::RISCV_C_EBREAK://TODO ++ case Opcode::RISCV_C_EBREAK: + instFormat_ = CIF_CR; + break; + case Opcode::RISCV_C_ADD: +@@ -410,7 +454,7 @@ bool Instruction::decode16() { + operandsPending++; + c_imm = metadata.operands[1].imm; + branchType_ = BranchType::Conditional; +- knownTarget_ = instructionAddress_ + metadata.operands[1].imm; ++ knownOffset_ = metadata.operands[1].imm; + break; + case Opcode::RISCV_C_FLD: + case Opcode::RISCV_C_FLW: +@@ -503,9 +547,11 @@ bool Instruction::decode16() { + c_imm = metadata.operands[0].imm; + if (metadata.opcode == Opcode::RISCV_C_JAL) { + destinationRegisters[destinationRegisterCount++] = Instruction::RA_REGISTER; ++ branchType_ = BranchType::SubroutineCall; ++ } else { // case C_J ++ branchType_ = BranchType::Unconditional; + } +- branchType_ = BranchType::Unconditional; +- knownTarget_ = instructionAddress_ + metadata.operands[0].imm; ++ knownOffset_ = metadata.operands[0].imm; + break; + case Opcode::RISCV_C_UNIMP: + break; +@@ -523,7 +569,7 @@ bool Instruction::decodeCsr() { + } + + isCsr_ = true; +- uint32_t sysRegTag = architecture_.getSystemRegisterTag(metadata.csr); ++ int32_t sysRegTag = architecture_.getSystemRegisterTag(metadata.csr); + if (sysRegTag == -1) { + exceptionEncountered_ = true; + exception_ = InstructionException::UnmappedSysReg; +@@ -539,16 +585,16 @@ bool Instruction::decodeCsr() { + destinationRegisters[destinationRegisterCount++] = { + RegisterType::SYSTEM, static_cast(sysRegTag)}; + +- // First operand from metadata is rd, second operand from metadata is rs1 +- if (csRegToRegister(metadata.operands[1].reg) != Instruction::ZERO_REGISTER) { ++ // First operand (0) from metadata is rd, second operand (1) from metadata is rs1 ++ if (csRegToRegister(metadata.operands[0].reg) != Instruction::ZERO_REGISTER) { + destinationRegisters[destinationRegisterCount++] = +- csRegToRegister(metadata.operands[1].reg); ++ csRegToRegister(metadata.operands[0].reg); + } + +- if(metadata.operands[0].type == RISCV_OP_IMM) { +- c_imm = metadata.operands[0].imm; +- } else if (metadata.operands[0].type == RISCV_OP_REG) { +- sourceRegisters[sourceRegisterCount] = csRegToRegister(metadata.operands[0].reg); ++ if(metadata.operands[1].type == RISCV_OP_IMM) { ++ c_imm = metadata.operands[1].imm; ++ } else if (metadata.operands[1].type == RISCV_OP_REG) { ++ sourceRegisters[sourceRegisterCount] = csRegToRegister(metadata.operands[1].reg); + if (sourceRegisters[sourceRegisterCount] == + Instruction::ZERO_REGISTER) { + // Catch zero register references and pre-complete those operands +diff --git a/src/lib/arch/riscv/Instruction_execute.cc b/src/lib/arch/riscv/Instruction_execute.cc +index b7a4a822..a37d3750 100644 +--- a/src/lib/arch/riscv/Instruction_execute.cc ++++ b/src/lib/arch/riscv/Instruction_execute.cc +@@ -4,6 +4,7 @@ + + #include "InstructionMetadata.hh" + #include "simeng/arch/riscv/Instruction.hh" ++#include "simeng/arch/riscv/SystemRegister.hh" + + namespace simeng { + namespace arch { +@@ -358,7 +359,9 @@ void Instruction::execute() { + } + case Opcode::RISCV_SLTIU: { // SLTIU rd,rs1,imm + const uint64_t rs1 = operands[0].get(); +- const uint64_t imm = static_cast(metadata.operands[2].imm); ++ uint64_t imm = metadata.operands[2].imm; ++ if (archRegWidth_==4) ++ imm = static_cast(imm); + if (rs1 < imm) { + results[0] = RegisterValue(static_cast(1), archRegWidth_); + } else { +@@ -460,13 +463,28 @@ void Instruction::execute() { + results[0] = RegisterValue(instructionAddress_ + 4, archRegWidth_); + break; + } +- // TODO EBREAK ++ case Opcode::RISCV_EBREAK: { // EBREAK + // used to return control to a debugging environment pg27 20191213 ++ exceptionEncountered_ = true; ++ exception_ = InstructionException::SecureMonitorCall; ++ break; ++ } + case Opcode::RISCV_ECALL: { // ECALL + exceptionEncountered_ = true; + exception_ = InstructionException::SupervisorCall; + break; + } ++ case Opcode::RISCV_MRET: { // MRET ++ branchAddress_ = (operands[0].get()) & ~1; // Set LSB of result to 0 ++ branchTaken_ = true; ++ ++ auto mstatus = operands[1].get(); ++ if (mstatus & MSTATUS_MPIE_MASK) ++ mstatus |= MSTATUS_MIE_MASK; ++ ++ results[0] = RegisterValue(mstatus, archRegWidth_); ++ break; ++ } + case Opcode::RISCV_FENCE: { // FENCE + // TODO currently modelled as a NOP as all codes are currently single + // threaded "Informally, no other RISC-V hart or external device can +@@ -709,28 +727,49 @@ void Instruction::execute() { + results[0] = RegisterValue(static_cast(rs1 * rs2), archRegWidth_); + break; + } +- // case Opcode::RISCV_MULH: {//MULH rd,rs1,rs2 +- // return executionNYI(); +- // +- // const int64_t rs1 = operands[0].get(); +- // const int64_t rs2 = operands[1].get(); +- // results[0] = RegisterValue(mulhiss(rs1, rs2); +- // break; +- // } ++ case Opcode::RISCV_MULH: {//MULH rd,rs1,rs2 ++ int64_t result; ++ if (archRegWidth_==4) ++ { ++ const int64_t rs1 = operands[0].get(); ++ const int64_t rs2 = operands[1].get(); ++ result = (rs1*rs2)>>32; ++ } else { ++ const int64_t rs1 = operands[0].get(); ++ const int64_t rs2 = operands[1].get(); ++ //result = mulhiss(rs1, rs2); ++ return executionNYI(); ++ } ++ results[0] = RegisterValue(result, archRegWidth_); ++ break; ++ } + case Opcode::RISCV_MULHU: { // MULHU rd,rs1,rs2 + const uint64_t rs1 = operands[0].get(); + const uint64_t rs2 = operands[1].get(); +- results[0] = RegisterValue(mulhiuu(rs1, rs2), archRegWidth_); ++ uint64_t result; ++ if (archRegWidth_==4) ++ result = (rs1*rs2)>>32; ++ else ++ result = mulhiuu(rs1, rs2); ++ results[0] = RegisterValue(result, archRegWidth_); ++ break; ++ } ++ case Opcode::RISCV_MULHSU: {//MULHSU rd,rs1,rs2 ++ int64_t result; ++ if (archRegWidth_==4) ++ { ++ const int64_t rs1 = operands[0].get(); ++ const uint64_t rs2 = operands[1].get(); ++ result = (rs1*rs2)>>32; ++ } else { ++ const int64_t rs1 = operands[0].get(); ++ const uint64_t rs2 = operands[1].get(); ++ //result = mulhisu(rs1, rs2); ++ return executionNYI(); ++ } ++ results[0] = RegisterValue(result, archRegWidth_); + break; + } +- // case Opcode::RISCV_MULHSU: {//MULHSU rd,rs1,rs2 +- // return executionNYI(); +- // +- // const int64_t rs1 = operands[0].get(); +- // const uint64_t rs2 = operands[1].get(); +- // results[0] = RegisterValue(mulhisu(rs1, rs2); +- // break; +- // } + case Opcode::RISCV_MULW: { // MULW rd,rs1,rs2 + const uint32_t rs1 = operands[0].get(); + const uint32_t rs2 = operands[1].get(); +@@ -852,12 +891,14 @@ void Instruction::execute() { + uint32_t new_csr_value = old_csr_value & ~(operands[1].get()); + results[0] = RegisterValue(new_csr_value, 4); + results[1] = RegisterValue(old_csr_value, 4); ++ break; + } + case Opcode::RISCV_CSRRCI: { + uint32_t old_csr_value = operands[0].get(); + uint32_t new_csr_value = old_csr_value & ~(c_imm); + results[0] = RegisterValue(new_csr_value, 4); + results[1] = RegisterValue(old_csr_value, 4); ++ break; + } + case Opcode::RISCV_CSRRS: { + uint32_t old_csr_value = operands[0].get(); +@@ -938,8 +979,12 @@ void Instruction::execute() { + } + break; + } +- case Opcode::RISCV_C_EBREAK: ++ case Opcode::RISCV_C_EBREAK: { ++ // used to return control to a debugging environment pg27 20191213 ++ exceptionEncountered_ = true; ++ exception_ = InstructionException::SecureMonitorCall; + break; ++ } + case Opcode::RISCV_C_FLD: + break; + case Opcode::RISCV_C_FLDSP: +diff --git a/src/lib/arch/riscv/SystemRegister.cc b/src/lib/arch/riscv/SystemRegister.cc +new file mode 100644 +index 00000000..05de188d +--- /dev/null ++++ b/src/lib/arch/riscv/SystemRegister.cc +@@ -0,0 +1,124 @@ ++#include "simeng/arch/riscv/Architecture.hh" ++ ++namespace simeng { ++namespace arch { ++namespace riscv { ++ ++bool MemoryMappedSystemRegisterBlock::put(uint16_t offset, const RegisterValue& value) ++{ ++ auto it = memoryMappedSystemRegisters.upper_bound(offset); ++ if (it != memoryMappedSystemRegisters.begin() ) ++ { ++ it--; ++ if (offset-it->first < it->second->size()) { ++ it->second->put(value); ++ return true; ++ } ++ return false; ++ } ++ return false; ++} ++ ++bool MemoryMappedSystemRegisterBlock::get(uint16_t offset, RegisterValue& value) ++{ ++ auto it = memoryMappedSystemRegisters.upper_bound(offset); ++ if (it != memoryMappedSystemRegisters.begin() ) ++ { ++ it--; ++ if (offset-it->first < it->second->size()) { ++ value = it->second->get(); ++ return true; ++ } ++ return false; ++ } ++ return false; ++} ++ ++/** Put/Get Memory Mapped Registers */ ++bool SystemRegisterMemoryInterface::putMemoryMappedSystemRegister(uint64_t address, const RegisterValue& value) ++{ ++ auto it = memoryMappedSystemRegisterBlocks_.upper_bound(address); ++ if (it != memoryMappedSystemRegisterBlocks_.begin() ) ++ { ++ it--; ++ if (address-it->first < it->second->size()) { ++ it->second->put(static_cast(address-it->first),value); ++ return true; ++ } ++ return false; ++ } ++ return false; ++} ++ ++bool SystemRegisterMemoryInterface::getMemoryMappedSystemRegister(uint64_t address, RegisterValue& value) ++{ ++ auto it = memoryMappedSystemRegisterBlocks_.upper_bound(address); ++ if (it != memoryMappedSystemRegisterBlocks_.begin() ) ++ { ++ it--; ++ if (address-it->first < it->second->size()) { ++ it->second->get(static_cast(address-it->first),value); ++ return true; ++ } ++ return false; ++ } ++ return false; ++} ++ ++bool HostTargetInterface::put(uint16_t offset, const RegisterValue&value) ++{ ++ switch(offset) { ++ case PAYLOAD_OFFSET : ++ { ++ char ch = value.getAsVector()[0]; ++ if (ch==3 || ch==1) ++ isHalted_ = true; ++ else ++ putchar(ch); ++ return true; ++ } ++ default : ++ return MemoryMappedSystemRegisterBlock::put(offset, value); ++ } ++} ++ ++int16_t Clint::updateSystemTimerRegisters(RegisterFileSet* regFile, const uint64_t iterations) ++{ ++ uint64_t ticks = iterations-last_tick; ++ uint64_t mtime_val = mtime_.get().get(); ++ bool ticked = false; ++ ++ last_tick = iterations; ++ ++ // if large time passed then multiple timer ticks might be needed ++ while (ticks>=mtime_count) ++ { ++ ticks -= mtime_count; ++ mtime_count = mtime_freq; ++ mtime_val += 1; ++ ticked = true; ++ } ++ ++ // any remaining ticks taken of mtime countdown ++ if (ticks) ++ mtime_count -= ticks; ++ ++ mtime_.put(mtime_val); ++ ++ if (ticked) ++ { ++ // to improve execution speed only do interrupt checks when the timer ticks ++ // check if interrupts enabled ++ uint16_t mstatus_tag = static_cast(architecture_.getSystemRegisterTag(SYSREG_MSTATUS)); ++ auto mstatus_bits = regFile->get( { RegisterType::SYSTEM, mstatus_tag } ).get(); ++ if (mstatus_bits & MSTATUS_MIE_MASK) ++ if (mtime_val >= mtimecmp_.get().get()) ++ return static_cast(InterruptId::TIMER); ++ } ++ ++ return -1; ++} ++ ++} // namespace riscv ++} // namespace arch ++} // namespace simeng +diff --git a/src/lib/kernel/Linux.cc b/src/lib/kernel/Linux.cc +index 02de8950..bc060bba 100644 +--- a/src/lib/kernel/Linux.cc ++++ b/src/lib/kernel/Linux.cc +@@ -29,10 +29,12 @@ void Linux::createProcess(const LinuxProcess& process) { + .currentBrk = process.getHeapStart(), + .initialStackPointer = process.getStackPointer(), + .mmapRegion = process.getMmapStart(), +- .pageSize = process.getPageSize()}); ++ .pageSize = process.getPageSize(), ++ }); + processStates_.back().fileDescriptorTable.push_back(STDIN_FILENO); + processStates_.back().fileDescriptorTable.push_back(STDOUT_FILENO); + processStates_.back().fileDescriptorTable.push_back(STDERR_FILENO); ++ processStates_.back().process = &process; + + // Define vector of all currently supported special file paths & files. + supportedSpecialFiles_.insert( +@@ -649,5 +651,11 @@ int64_t Linux::writev(int64_t fd, const void* iovdata, int iovcnt) { + return ::writev(hfd, reinterpret_cast(iovdata), iovcnt); + } + ++/** Lookup symbol value from table in elf file. */ ++bool Linux::lookupSymbolValue(const std::string symbol, uint64_t& value) ++{ ++ processStates_[0].process->lookupSymbolValue(symbol,value); ++} ++ + } // namespace kernel + } // namespace simeng +diff --git a/src/lib/kernel/LinuxProcess.cc b/src/lib/kernel/LinuxProcess.cc +index 31e36d7f..3279652a 100644 +--- a/src/lib/kernel/LinuxProcess.cc ++++ b/src/lib/kernel/LinuxProcess.cc +@@ -24,7 +24,7 @@ LinuxProcess::LinuxProcess(const std::vector& commandLine, + // Parse ELF file + assert(commandLine.size() > 0); + char* unwrappedProcImgPtr; +- Elf elf(commandLine[0], &unwrappedProcImgPtr); ++ Elf elf(commandLine[0], &unwrappedProcImgPtr,symbols_); + if (!elf.isValid()) { + return; + } +@@ -178,5 +178,17 @@ void LinuxProcess::createStack(char** processImage) { + (*processImage) + stackPointer_); + } + ++bool LinuxProcess::lookupSymbolValue(const std::string symbol, uint64_t& value) const ++{ ++ auto lookup = symbols_.find(symbol); ++ if (lookup==symbols_.end()) ++ return false; ++ else ++ { ++ value = lookup->second; ++ return true; ++ } ++} ++ + } // namespace kernel + } // namespace simeng +diff --git a/src/lib/models/emulation/Core.cc b/src/lib/models/emulation/Core.cc +index 0eff31d5..d9268da2 100644 +--- a/src/lib/models/emulation/Core.cc ++++ b/src/lib/models/emulation/Core.cc +@@ -20,7 +20,8 @@ Core::Core(MemoryInterface& instructionMemory, MemoryInterface& dataMemory, + isa_(isa), + pc_(entryPoint), + registerFileSet_(isa.getRegisterFileStructures()), +- architecturalRegisterFileSet_(registerFileSet_) { ++ architecturalRegisterFileSet_(registerFileSet_), ++ interruptId_(-1) { + // Pre-load the first instruction + instructionMemory_.requestRead({pc_, FETCH_SIZE}); + +@@ -144,11 +145,16 @@ void Core::tick() { + } + + execute(uop); +- isa_.updateSystemTimerRegisters(®isterFileSet_, ticks_); ++ ++ interruptId_ = isa_.updateSystemTimerRegisters(®isterFileSet_, ticks_); + } + + void Core::execute(std::shared_ptr& uop) { +- uop->execute(); ++ ++ if (interruptId_>=0) ++ uop->raiseInterrupt(interruptId_); ++ else ++ uop->execute(); + + if (uop->exceptionEncountered()) { + instructionsExecuted_++; +diff --git a/src/lib/models/mcu/Core.cc b/src/lib/models/mcu/Core.cc +new file mode 100644 +index 00000000..a085d7a3 +--- /dev/null ++++ b/src/lib/models/mcu/Core.cc +@@ -0,0 +1,515 @@ ++#include "simeng/models/mcu/Core.hh" ++ ++#include ++#include ++#include ++#include ++ ++#include "simeng/arch/riscv/SystemRegister.hh" ++ ++namespace simeng { ++namespace models { ++namespace mcu { ++ ++// TODO: Replace with config options ++const unsigned int blockSize = 16; ++const unsigned int clockFrequency = 2.5 * 1e9; ++ ++Core::Core(MemoryInterface& instructionMemory, MemoryInterface& dataMemory, ++ uint64_t processMemorySize, uint64_t entryPoint, ++ const arch::Architecture& isa, BranchPredictor& branchPredictor, YAML::Node config) ++ : dataMemory_(dataMemory), ++ isa_(isa), ++ registerFileSet_(isa.getRegisterFileStructures()), ++ architecturalRegisterFileSet_(registerFileSet_), ++ fetchToDecodeBuffer_(1, {}), ++ decodeToExecuteBuffer_(1, nullptr, 1), ++ completionSlots_(2, {1, nullptr}), ++ regDepMap_(isa.getRegisterFileStructures(), registerFileSet_), ++ fetchUnit_(fetchToDecodeBuffer_, instructionMemory, processMemorySize, ++ entryPoint, blockSize, isa, branchPredictor), ++ decodeUnit_(fetchToDecodeBuffer_, decodeToExecuteBuffer_, ++ branchPredictor, ++ [this](auto instruction) { return canIssue(instruction); }), ++ writebackUnit_(completionSlots_, registerFileSet_, [](auto insnId) {}, ++ [this](auto instruction) {removeDep(instruction);}, ++ [this](auto instruction) { return removeInstrOrderQ(instruction); }), ++ loadStoreQueue_(4, dataMemory, { completionSlots_.data()+1, 1 }, [this](auto regs, auto values) { forwardOperands(regs, values); }, false, 4, 4, 2, 1, 1), ++ executeUnit_( ++ decodeToExecuteBuffer_, completionSlots_[0], ++ [this](auto regs, auto values) { forwardOperands(regs, values); }, ++ [this](auto instruction) { loadStoreQueue_.addLoad(instruction); }, ++ [this](auto instruction) { loadStoreQueue_.addStore(instruction); }, ++ [this](auto instruction) { raiseException(instruction); }, ++ [this](auto instruction) { addInstrOrderQ(instruction); }, ++ [this]() { return isInterruptPending(); }, ++ branchPredictor, false), ++ interruptId_(-1) { ++ // Query and apply initial state ++ auto state = isa.getInitialState(); ++ applyStateChange(state); ++ ++ maxStallCycleTimeout = -1; ++ maxSimCycleTimeout = -1; ++ maxInstrTimeout = -1; ++ if(config["Core"]["EnableHaltCheck"].IsDefined() && config["Core"]["EnableHaltCheck"].as()) { ++ enableHaltCheck = true; ++ if(config["Core"]["MaxStallCycleTimeout"].IsDefined()) { ++ maxStallCycleTimeout = config["Core"]["MaxStallCycleTimeout"].as(); ++ } ++ if(config["Core"]["MaxSimCycleTimeout"].IsDefined()) { ++ maxSimCycleTimeout = config["Core"]["MaxSimCycleTimeout"].as(); ++ } ++ if(config["Core"]["MaxInstrTimeout"].IsDefined()) { ++ maxInstrTimeout = config["Core"]["MaxInstrTimeout"].as(); ++ } ++ } ++}; ++ ++void Core::checkHalting() { ++ if(!enableHaltCheck) return; ++ ++ if (((ticks_ - lastCommitTick_) > maxStallCycleTimeout)) { ++ std::cout << std::dec << "[SimEng:Core] Max Pipeline stall cycle timeout reached at tick: " << (ticks_ - lastCommitTick_) << std::endl; ++ hasHalted_ = true; ++ } ++ ++ if((ticks_ > maxSimCycleTimeout)) { ++ std::cout << std::dec << "[SimEng:Core] Max Simulation cycle timeout reached at tick: " << ticks_ << std::endl; ++ hasHalted_ = true; ++ } ++ ++ if((getInstructionsRetiredCount() > maxInstrTimeout)) { ++ std::cout << std::dec << "[SimEng:Core] Max Instruction count timeout reached at tick: " << ticks_ << std::endl; ++ hasHalted_ = true; ++ } ++} ++ ++void Core::tick() { ++ ticks_++; ++ ++ checkHalting(); ++ ++ if (hasHalted_) return; ++ ++ if (exceptionHandler_ != nullptr) { ++ processExceptionHandler(); ++ return; ++ } ++ ++ // Writeback must be ticked at start of cycle, to ensure decode reads the ++ // correct values ++ // writebackUnit_.tick(); ++ // for(std::shared_ptr inst: writebackUnit_.getInstsForTrace()) { ++ // uint16_t sysreg_instrret = isa_.getSystemRegisterTag(arch::riscv32::riscv_sysreg::SYSREG_INSTRRET); ++ // uint16_t sysreg_cycle = isa_.getSystemRegisterTag(arch::riscv32::riscv_sysreg::SYSREG_CYCLE); ++ // registerFileSet_.set(Register{0x2, sysreg_instrret}, RegisterValue(static_cast(writebackUnit_.getInstructionsWrittenCount()), 4)); ++ // registerFileSet_.set(Register{0x2, sysreg_cycle}, RegisterValue(static_cast(ticks_), 4)); ++ // isa_.updateInstrTrace(inst, ®isterFileSet_, ticks_); ++ // if(inst->isLoad()) { ++ // loadStoreQueue_.commitLoad(inst); ++ // } else if(inst->isStoreData()) { ++ // loadStoreQueue_.commitStore(inst); ++ // } ++ // lastCommitTick_ = ticks_; ++ // } ++ // writebackUnit_.traceFinished(); ++ ++ ++ loadStoreQueue_.processResponse(); ++ completionSlots_[1].tick(); ++ ++ // Tick units ++ fetchUnit_.tick(); ++ decodeUnit_.tick(); ++ executeUnit_.tick(); ++ ++ // Wipe any data read responses, as they will have been handled by this point ++ //dataMemory_.clearCompletedReads(); ++ ++ loadStoreQueue_.tick(); ++ // Writeback must be ticked at start of cycle, to ensure decode reads the ++ // correct values ++ writebackUnit_.tick(); ++ for(std::shared_ptr inst: writebackUnit_.getInstsForTrace()) { ++ uint16_t sysreg_instrret = isa_.getSystemRegisterTag(arch::riscv::riscv_sysreg::SYSREG_INSTRRET); ++ uint16_t sysreg_cycle = isa_.getSystemRegisterTag(arch::riscv::riscv_sysreg::SYSREG_CYCLE); ++ registerFileSet_.set(Register{0x2, sysreg_instrret}, RegisterValue(static_cast(writebackUnit_.getInstructionsWrittenCount()), 4)); ++ registerFileSet_.set(Register{0x2, sysreg_cycle}, RegisterValue(static_cast(ticks_), 4)); ++ isa_.updateInstrTrace(inst, ®isterFileSet_, ticks_); ++ if(inst->isLoad()) { ++ loadStoreQueue_.commitLoad(inst); ++ } else if(inst->isStoreData()) { ++ loadStoreQueue_.commitStore(inst); ++ } ++ lastCommitTick_ = ticks_; ++ } ++ // writebackUnit_.traceFinished(); ++ // Read pending registers for ready-to-execute uop; must happen after execute ++ // to allow operand forwarding to take place first ++ // readRegisters(); ++ ++ // Tick buffers ++ // Each unit must have wiped the entries at the head of the buffer after use, ++ // as these will now loop around and become the tail. ++ fetchToDecodeBuffer_.tick(); ++ decodeToExecuteBuffer_.tick(); ++ completionSlots_[0].tick(); ++ // for (auto& buffer : completionSlots_) { ++ // buffer.tick(); ++ // } ++ ++ // if (exceptionGenerated_) { ++ // handleException(); ++ // //fetchUnit_.requestFromPC(); ++ // return; ++ // } ++ ++ // Check for flush ++ if (executeUnit_.shouldFlush()) { ++ // Flush was requested at execute stage ++ // Update PC and wipe younger buffers (Fetch/Decode, Decode/Execute) ++ auto targetAddress = executeUnit_.getFlushAddress(); ++ ++ fetchUnit_.flushLoopBuffer(); ++ fetchUnit_.updatePC(targetAddress); ++ fetchUnit_.flushPredictor(targetAddress); ++ // Ensure instructions in the buffer if any are set to be flushed before being removed, this helps with removing the respective dependencies if any ++ decodeUnit_.purgeFlushed(); ++ executeUnit_.purgeFlushed(); ++ fetchToDecodeBuffer_.fill({}); ++ decodeToExecuteBuffer_.fill(nullptr); ++ loadStoreQueue_.purgeFlushed(); ++ regDepMap_.purgeFlushed(); ++ ++ flushes_++; ++ } else if (decodeUnit_.shouldFlush()) { ++ assert(false && "Decode unit should not generate flush"); ++ // Flush was requested at decode stage ++ // Update PC and wipe Fetch/Decode buffer. ++ auto targetAddress = decodeUnit_.getFlushAddress(); ++ ++ fetchUnit_.flushLoopBuffer(); ++ fetchUnit_.updatePC(targetAddress); ++ fetchToDecodeBuffer_.fill({}); ++ ++ flushes_++; ++ } ++ ++ if (exceptionGenerated_) { ++ handleException(); ++ //fetchUnit_.requestFromPC(); ++ return; ++ } ++ ++ fetchUnit_.requestFromPC(); ++ interruptId_ = isa_.updateSystemTimerRegisters(®isterFileSet_, ticks_); ++} ++ ++bool Core::hasHalted() const { ++ if (hasHalted_) { ++ return true; ++ } ++ ++ // Core is considered to have halted when the fetch unit has halted, there ++ // are no uops at the head of any buffer, and no exception is currently being ++ // handled. ++ bool decodePending = fetchToDecodeBuffer_.getHeadSlots()[0].size() > 0; ++ bool executePending = decodeToExecuteBuffer_.getHeadSlots()[0] != nullptr; ++ bool writebackPending = completionSlots_[0].getHeadSlots()[0] != nullptr; ++ writebackPending |= completionSlots_[1].getHeadSlots()[0] != nullptr; ++ ++ return (fetchUnit_.hasHalted() && !decodePending && !writebackPending && ++ !executePending && exceptionHandler_ == nullptr); ++} ++ ++const ArchitecturalRegisterFileSet& Core::getArchitecturalRegisterFileSet() ++ const { ++ return architecturalRegisterFileSet_; ++} ++ ++uint64_t Core::getInstructionsRetiredCount() const { ++ return writebackUnit_.getInstructionsWrittenCount(); ++} ++ ++uint64_t Core::getSystemTimer() const { ++ // TODO: This will need to be changed if we start supporting DVFS. ++ return ticks_ / (clockFrequency / 1e9); ++} ++ ++std::map Core::getStats() const { ++ auto retired = writebackUnit_.getInstructionsWrittenCount(); ++ auto ipc = retired / static_cast(ticks_); ++ std::ostringstream ipcStr; ++ ipcStr << std::setprecision(2) << ipc; ++ ++ // Sum up the branch stats reported across the execution units. ++ uint64_t totalBranchesExecuted = 0; ++ uint64_t totalBranchMispredicts = 0; ++ totalBranchesExecuted += executeUnit_.getBranchExecutedCount(); ++ totalBranchMispredicts += executeUnit_.getBranchMispredictedCount(); ++ auto branchMissRate = 100.0f * static_cast(totalBranchMispredicts) / ++ static_cast(totalBranchesExecuted); ++ std::ostringstream branchMissRateStr; ++ branchMissRateStr << std::setprecision(3) << branchMissRate << "%"; ++ ++ return {{"cycles", std::to_string(ticks_)}, ++ {"retired", std::to_string(retired)}, ++ {"ipc", ipcStr.str()}, ++ {"flushes", std::to_string(flushes_)}, ++ {"branch.executed", std::to_string(totalBranchesExecuted)}, ++ {"branch.mispredict", std::to_string(totalBranchMispredicts)}, ++ {"branch.missrate", branchMissRateStr.str()}, ++ {"lsu.ldminlatency", std::to_string(loadStoreQueue_.getMinLdLat())}, ++ {"lsu.ldmaxlatency", std::to_string(loadStoreQueue_.getMaxLdLat())}, ++ {"lsu.ldavglatency", std::to_string(loadStoreQueue_.getAvgLdLat())}}; ++} ++ ++void Core::raiseException(const std::shared_ptr& instruction) { ++ exceptionGenerated_ = true; ++ exceptionGeneratingInstruction_ = instruction; ++} ++ ++void Core::handleException() { ++ exceptionGenerated_ = false; ++ ++ exceptionHandler_ = ++ isa_.handleException(exceptionGeneratingInstruction_, *this, dataMemory_); ++ ++ processExceptionHandler(); ++// isa_.updateInstrTrace(exceptionGeneratingInstruction_, ®isterFileSet_, ticks_); ++// lastCommitTick_ = ticks_; ++// assert(removeInstrOrderQ(exceptionGeneratingInstruction_) && "Unexpected instruction at the top of inorder instr queue on exception"); ++ ++ //TODO: This is not a good point to flush the pipeline if the exception is not changing the PC. ++ ++ // Flush pipeline ++// decodeUnit_.purgeFlushed(); ++// executeUnit_.purgeFlushed(); ++// fetchToDecodeBuffer_.fill({}); ++// decodeToExecuteBuffer_.fill(nullptr); ++// loadStoreQueue_.purgeFlushed(); ++// completionSlots_[0].fill(nullptr); ++// completionSlots_[1].fill(nullptr); ++// regDepMap_.purgeFlushed(); ++} ++ ++void Core::processExceptionHandler() { ++ assert(exceptionHandler_ != nullptr && ++ "Attempted to process an exception handler that wasn't present"); ++ if (dataMemory_.hasPendingRequests()) { ++ // Must wait for all memory requests to complete before processing the ++ // exception ++ return; ++ } ++ ++ auto success = exceptionHandler_->tick(); ++ if (!success) { ++ // Exception handler requires further ticks to complete ++ return; ++ } ++ ++ const auto& result = exceptionHandler_->getResult(); ++ ++ if (result.fatal) { ++ hasHalted_ = true; ++ std::cout << "[SimEng:Core] Halting due to fatal exception" << std::endl; ++ } else { ++ //fetchUnit_.flushLoopBuffer(); ++ fetchUnit_.updatePC(result.instructionAddress); ++ applyStateChange(result.stateChange); ++ } ++ ++ exceptionHandler_ = nullptr; ++} ++ ++void Core::loadData(const std::shared_ptr& instruction) { ++ const auto& addresses = instruction->getGeneratedAddresses(); ++ for (const auto& target : addresses) { ++ dataMemory_.requestRead(target); ++ } ++ ++ // NOTE: This model only supports zero-cycle data memory models, and will not ++ // work unless data requests are handled synchronously. ++ for (const auto& response : dataMemory_.getCompletedReads()) { ++ instruction->supplyData(response.target.address, response.data); ++ } ++ ++ assert(instruction->hasAllData() && ++ "Load instruction failed to obtain all data this cycle"); ++ ++ instruction->execute(); ++ ++ if (instruction->isStoreData()) { ++ storeData(instruction); ++ } ++} ++ ++void Core::storeData(const std::shared_ptr& instruction) { ++ if (instruction->isStoreAddress()) { ++ auto addresses = instruction->getGeneratedAddresses(); ++ for (auto const& target : addresses) { ++ previousAddresses_.push(target); ++ } ++ } ++ if (instruction->isStoreData()) { ++ const auto data = instruction->getData(); ++ for (size_t i = 0; i < data.size(); i++) { ++ dataMemory_.requestWrite(previousAddresses_.front(), data[i]); ++ previousAddresses_.pop(); ++ } ++ } ++} ++ ++void Core::forwardOperands(const span& registers, ++ const span& values) { ++ return; ++ // assert(registers.size() == values.size() && ++ // "Mismatched register and value vector sizes"); ++ ++ // const auto& uop = decodeToExecuteBuffer_.getTailSlots()[0]; ++ // if (uop == nullptr) { ++ // return; ++ // } ++ ++ // auto sourceRegisters = uop->getOperandRegisters(); ++ // for (size_t i = 0; i < registers.size(); i++) { ++ // // Check each forwarded register vs source operands and supply for each ++ // // match ++ // for (size_t operand = 0; operand < sourceRegisters.size(); operand++) { ++ // const auto& sourceReg = sourceRegisters[operand]; ++ // if (uop->canExecute()) { ++ // return; ++ // } ++ // if (sourceReg == registers[i] && !uop->isOperandReady(operand)) { ++ // // Supply the operand ++ // uop->supplyOperand(operand, values[i]); ++ // } ++ // } ++ // } ++} ++ ++bool Core::canIssue(const std::shared_ptr& uop) { ++ if (uop->isSysCall() && inorderIQ_.size() > 0) { ++ return false; ++ } ++ if((uop->isLoad() || uop->isStoreData()) && loadStoreQueue_.isBusy()) { ++ return false; ++ } ++ if (regDepMap_.canRead(uop) && regDepMap_.canWrite(uop)) { ++ regDepMap_.insert(uop); ++ return true; ++ } ++ return false; ++} ++ ++void Core::removeDep(const std::shared_ptr& uop) { ++ regDepMap_.remove(uop); ++} ++ ++void Core::readRegisters() { ++ if (decodeToExecuteBuffer_.isStalled()) { ++ return; ++ } ++ ++ const auto& uop = decodeToExecuteBuffer_.getTailSlots()[0]; ++ if (uop == nullptr) { ++ return; ++ } ++ ++ // Register read ++ // Identify missing registers and supply values ++ const auto& sourceRegisters = uop->getOperandRegisters(); ++ for (size_t i = 0; i < sourceRegisters.size(); i++) { ++ const auto& reg = sourceRegisters[i]; ++ if (!uop->isOperandReady(i)) { ++ uop->supplyOperand(i, registerFileSet_.get(reg)); ++ } ++ } ++} ++ ++void Core::applyStateChange(const arch::ProcessStateChange& change) { ++ // Update registers in accoradance with the ProcessStateChange type ++ switch (change.type) { ++ case arch::ChangeType::INCREMENT: { ++ for (size_t i = 0; i < change.modifiedRegisters.size(); i++) { ++ registerFileSet_.set( ++ change.modifiedRegisters[i], ++ registerFileSet_.get(change.modifiedRegisters[i]).get() + ++ change.modifiedRegisterValues[i].get()); ++ } ++ break; ++ } ++ case arch::ChangeType::DECREMENT: { ++ for (size_t i = 0; i < change.modifiedRegisters.size(); i++) { ++ registerFileSet_.set( ++ change.modifiedRegisters[i], ++ registerFileSet_.get(change.modifiedRegisters[i]).get() - ++ change.modifiedRegisterValues[i].get()); ++ } ++ break; ++ } ++ default: { // arch::ChangeType::REPLACEMENT ++ // If type is ChangeType::REPLACEMENT, set new values ++ for (size_t i = 0; i < change.modifiedRegisters.size(); i++) { ++ registerFileSet_.set(change.modifiedRegisters[i], ++ change.modifiedRegisterValues[i]); ++ } ++ break; ++ } ++ } ++ ++ // Update memory ++ // TODO: Analyse if ChangeType::INCREMENT or ChangeType::DECREMENT case is ++ // required for memory changes ++ for (size_t i = 0; i < change.memoryAddresses.size(); i++) { ++ dataMemory_.requestWrite(change.memoryAddresses[i], ++ change.memoryAddressValues[i]); ++ } ++} ++ ++void Core::handleLoad(const std::shared_ptr& instruction) { ++ loadData(instruction); ++ if (instruction->exceptionEncountered()) { ++ raiseException(instruction); ++ return; ++ } ++ ++ forwardOperands(instruction->getDestinationRegisters(), ++ instruction->getResults()); ++ // Manually add the instruction to the writeback input buffer ++ completionSlots_[1].getTailSlots()[0] = instruction; ++} ++ ++void Core::addInstrOrderQ(const std::shared_ptr& insn) { ++ //std::cout << std::dec << ticks_ << ": Adding instruction at address: 0x" << std::hex << insn->getInstructionAddress() << std::endl; ++ inorderIQ_.push_back(insn); ++} ++ ++bool Core::removeInstrOrderQ(const std::shared_ptr& insn) { ++ if (insn == inorderIQ_.front()) { ++ //std::cout << std::dec << ticks_ << ": Removing instruction at address: 0x" << std::hex << insn->getInstructionAddress() << std::endl; ++ // if(insn->exceptionEncountered()) { ++ // exceptionGenerated_ = true; ++ // exceptionGeneratingInstruction_ = insn; ++ // handleException(); ++ // } ++ inorderIQ_.pop_front(); ++ return true; ++ } else { ++ return false; ++ } ++} ++ ++int16_t Core::isInterruptPending() { ++ if (interruptId_>=0) { ++ std::cout << std::dec << "[SimEng:Core] Interrupt Pending id: " << interruptId_ << ", at tick: " << ticks_ << std::endl; ++ return interruptId_; ++ } else { ++ return -1; ++ } ++} ++ ++} // namespace mcu ++} // namespace models ++} // namespace simeng +diff --git a/src/lib/pipeline/FetchUnit.cc b/src/lib/pipeline/FetchUnit.cc +index ade3d307..28d2eaba 100644 +--- a/src/lib/pipeline/FetchUnit.cc ++++ b/src/lib/pipeline/FetchUnit.cc +@@ -129,7 +129,7 @@ void FetchUnit::tick() { + BranchPrediction prediction = {false, 0}; + if (macroOp[0]->isBranch()) { + prediction = branchPredictor_.predict(pc_, macroOp[0]->getBranchType(), +- macroOp[0]->getKnownTarget()); ++ macroOp[0]->getKnownOffset()); + macroOp[0]->setBranchPrediction(prediction); + } + +diff --git a/src/lib/pipeline_hi/DecodeUnit.cc b/src/lib/pipeline_hi/DecodeUnit.cc +new file mode 100644 +index 00000000..86a298a1 +--- /dev/null ++++ b/src/lib/pipeline_hi/DecodeUnit.cc +@@ -0,0 +1,117 @@ ++#include "simeng/pipeline_hi/DecodeUnit.hh" ++ ++#include ++ ++namespace simeng { ++namespace pipeline_hi { ++ ++DecodeUnit::DecodeUnit(PipelineBuffer& input, ++ PipelineBuffer>& output, ++ BranchPredictor& predictor, ++ std::function&)> canIssue) ++ : input_(input), output_(output), predictor_(predictor), canIssue_(canIssue){}; ++ ++void DecodeUnit::tick() { ++ // Stall if output buffer is stalled ++ if (output_.isStalled()) { ++ input_.stall(true); ++ return; ++ } ++ ++ shouldFlush_ = false; ++ input_.stall(false); ++ ++ // Stall if internal uop is overpopulated, otherwise add uops from input to ++ // internal buffer ++ if (microOps_.size() >= output_.getWidth()) { ++ input_.stall(true); ++ } else { ++ // Populate uop buffer with newly fetched macro-ops ++ for (size_t slot = 0; slot < input_.getWidth(); slot++) { ++ auto& macroOp = input_.getHeadSlots()[slot]; ++ ++ if (macroOp.size() == 0) { ++ // Nothing to process for this macro-op ++ continue; ++ } ++ ++ for (uint8_t index = 0; index < macroOp.size(); index++) { ++ microOps_.push_back(std::move(macroOp[index])); ++ } ++ ++ input_.getHeadSlots()[slot].clear(); ++ } ++ } ++ ++ // Process uops in buffer ++ for (size_t slot = 0; slot < output_.getWidth(); slot++) { ++ // If there's no more uops to decode, exit loop early ++ if (!microOps_.size()) break; ++ ++ //Check for dependencies before forwarding to next stage ++ //Stop-gap implementation ++ if (!canIssue_(microOps_.front())) break; ++ ++ // Move uop to output buffer and remove from internal buffer ++ auto& uop = (output_.getTailSlots()[slot] = std::move(microOps_.front())); ++ microOps_.pop_front(); ++ ++ // Check preliminary branch prediction results now that the instruction is ++ // decoded. Identifies: ++ // - Non-branch instructions mistakenly predicted as branches ++ // - Incorrect targets for immediate branches ++ // auto [misprediction, correctAddress] = uop->checkEarlyBranchMisprediction(); ++ // if (misprediction) { ++ // earlyFlushes_++; ++ // shouldFlush_ = true; ++ // pc_ = correctAddress; ++ ++ // if (!uop->isBranch()) { ++ // // Non-branch incorrectly predicted as a branch; let the predictor know ++ // predictor_.update(uop->getInstructionAddress(), false, pc_, ++ // uop->getBranchType()); ++ // } ++ // // Remove macro-operations in microOps_ buffer after macro-operation ++ // // decoded in this cycle ++ // auto uopIt = microOps_.begin(); ++ // // Find first microOps_ entry not belonging to same address as flushing ++ // // instruction ++ // while (uopIt != microOps_.end()) { ++ // if ((*uopIt)->getInstructionAddress() != uop->getInstructionAddress()) { ++ // break; ++ // } else { ++ // uopIt++; ++ // } ++ // } ++ // // Remove all entries after first macro-operation in buffer ++ // while (uopIt != microOps_.end()) { ++ // uopIt = microOps_.erase(uopIt); ++ // } ++ ++ // // Skip processing remaining uops, as they need to be flushed ++ // break; ++ // } ++ } ++} ++ ++bool DecodeUnit::shouldFlush() const { return shouldFlush_; } ++uint64_t DecodeUnit::getFlushAddress() const { return pc_; } ++uint64_t DecodeUnit::getEarlyFlushes() const { return earlyFlushes_; }; ++ ++void DecodeUnit::purgeFlushed() { ++ if (output_.getTailSlots()[0] != nullptr) { ++ output_.getTailSlots()[0]->setFlushed(); ++ } ++ ++ if (input_.getHeadSlots()[0].size() != 0) { ++ input_.getHeadSlots()[0][0]->setFlushed(); ++ } ++ ++ if (microOps_.size()) ++ microOps_.front()->setFlushed(); ++ microOps_.clear(); ++ input_.stall(false); ++} ++ ++} // namespace pipeline_hi ++} // namespace simeng +diff --git a/src/lib/pipeline_hi/DispatchIssueUnit.cc b/src/lib/pipeline_hi/DispatchIssueUnit.cc +new file mode 100644 +index 00000000..93ce9fa3 +--- /dev/null ++++ b/src/lib/pipeline_hi/DispatchIssueUnit.cc +@@ -0,0 +1,269 @@ ++#include "simeng/pipeline_hi/DispatchIssueUnit.hh" ++ ++#include ++#include ++ ++namespace simeng { ++namespace pipeline_hi { ++ ++DispatchIssueUnit::DispatchIssueUnit( ++ PipelineBuffer>& fromRename, ++ std::vector>>& issuePorts, ++ const RegisterFileSet& registerFileSet, PortAllocator& portAllocator, ++ const std::vector& physicalRegisterStructure, YAML::Node config) ++ : input_(fromRename), ++ issuePorts_(issuePorts), ++ registerFileSet_(registerFileSet), ++ scoreboard_(physicalRegisterStructure.size()), ++ dependencyMatrix_(physicalRegisterStructure.size()), ++ portAllocator_(portAllocator) { ++ // Initialise scoreboard ++ for (size_t type = 0; type < physicalRegisterStructure.size(); type++) { ++ scoreboard_[type].assign(physicalRegisterStructure[type], true); ++ dependencyMatrix_[type].resize(physicalRegisterStructure[type]); ++ } ++ // Create set of reservation station structs with correct issue port ++ // mappings ++ for (size_t i = 0; i < config["Reservation-Stations"].size(); i++) { ++ // Iterate over each reservation station in config ++ auto reservation_station = config["Reservation-Stations"][i]; ++ // Create ReservationStation struct to be stored ++ ReservationStation rs = { ++ reservation_station["Size"].as(), ++ reservation_station["Dispatch-Rate"].as(), ++ 0, ++ {}}; ++ // Resize rs port attribute to match what's defined in config file ++ rs.ports.resize(reservation_station["Ports"].size()); ++ for (size_t j = 0; j < reservation_station["Ports"].size(); j++) { ++ // Iterate over issue ports in config ++ uint16_t issue_port = reservation_station["Ports"][j].as(); ++ rs.ports[j].issuePort = issue_port; ++ // Add port mapping entry, resizing vector if needed ++ if ((issue_port + 1) > portMapping_.size()) { ++ portMapping_.resize((issue_port + 1)); ++ } ++ portMapping_[issue_port] = {i, j}; ++ } ++ reservationStations_.push_back(rs); ++ } ++ for (uint16_t i = 0; i < reservationStations_.size(); i++) ++ flushed_.emplace(i, std::initializer_list>{}); ++} ++ ++void DispatchIssueUnit::tick() { ++ input_.stall(false); ++ ++ /** Stores the number of instructions dispatched for each ++ * reservation station. */ ++ std::vector dispatches( ++ static_cast(reservationStations_.size()), 0); ++ ++ for (size_t slot = 0; slot < input_.getWidth(); slot++) { ++ auto& uop = input_.getHeadSlots()[slot]; ++ if (uop == nullptr) { ++ continue; ++ } ++ ++ const std::vector& supportedPorts = uop->getSupportedPorts(); ++ if (uop->exceptionEncountered()) { ++ // Exception; mark as ready to commit, and remove from pipeline ++ uop->setCommitReady(); ++ input_.getHeadSlots()[slot] = nullptr; ++ continue; ++ } ++ // Allocate issue port to uop ++ uint16_t port = portAllocator_.allocate(supportedPorts); ++ uint16_t RS_Index = portMapping_[port].first; ++ uint16_t RS_Port = portMapping_[port].second; ++ assert(RS_Index < reservationStations_.size() && ++ "Allocated port inaccessible"); ++ ReservationStation& rs = reservationStations_[RS_Index]; ++ ++ // When appropriate, stall uop or input buffer if stall buffer full ++ if (rs.currentSize == rs.capacity || ++ dispatches[RS_Index] == rs.dispatchRate) { ++ // Deallocate port given ++ portAllocator_.deallocate(port); ++ input_.stall(true); ++ rsStalls_++; ++ return; ++ } ++ ++ // Assume the uop will be ready ++ bool ready = true; ++ ++ // Register read ++ // Identify remaining missing registers and supply values ++ auto& sourceRegisters = uop->getOperandRegisters(); ++ for (uint16_t i = 0; i < sourceRegisters.size(); i++) { ++ const auto& reg = sourceRegisters[i]; ++ ++ if (!uop->isOperandReady(i)) { ++ // The operand hasn't already been supplied ++ if (scoreboard_[reg.type][reg.tag]) { ++ // The scoreboard says it's ready; read and supply the register value ++ uop->supplyOperand(i, registerFileSet_.get(reg)); ++ } else { ++ // This register isn't ready yet. Register this uop to the dependency ++ // matrix for a more efficient lookup later ++ dependencyMatrix_[reg.type][reg.tag].push_back({uop, port, i}); ++ ready = false; ++ } ++ } ++ } ++ ++ // Set scoreboard for all destination registers as not ready ++ auto& destinationRegisters = uop->getDestinationRegisters(); ++ for (const auto& reg : destinationRegisters) { ++ scoreboard_[reg.type][reg.tag] = false; ++ } ++ ++ // Increment dispatches made and RS occupied entries size ++ dispatches[RS_Index]++; ++ rs.currentSize++; ++ ++ if (ready) { ++ rs.ports[RS_Port].ready.push_back(std::move(uop)); ++ } ++ ++ input_.getHeadSlots()[slot] = nullptr; ++ } ++} ++ ++void DispatchIssueUnit::issue() { ++ int issued = 0; ++ // Check the ready queues, and issue an instruction from each if the ++ // corresponding port isn't blocked ++ for (size_t i = 0; i < issuePorts_.size(); i++) { ++ ReservationStation& rs = reservationStations_[portMapping_[i].first]; ++ auto& queue = rs.ports[portMapping_[i].second].ready; ++ if (issuePorts_[i].isStalled()) { ++ if (queue.size() > 0) { ++ portBusyStalls_++; ++ } ++ continue; ++ } ++ ++ if (queue.size() > 0) { ++ auto& uop = queue.front(); ++ issuePorts_[i].getTailSlots()[0] = std::move(uop); ++ queue.pop_front(); ++ ++ // Inform the port allocator that an instruction issued ++ portAllocator_.issued(i); ++ issued++; ++ ++ assert(rs.currentSize > 0); ++ rs.currentSize--; ++ } ++ } ++ ++ if (issued == 0) { ++ for (const auto& rs : reservationStations_) { ++ if (rs.currentSize != 0) { ++ backendStalls_++; ++ return; ++ } ++ } ++ frontendStalls_++; ++ } ++} ++ ++void DispatchIssueUnit::forwardOperands(const span& registers, ++ const span& values) { ++ assert(registers.size() == values.size() && ++ "Mismatched register and value vector sizes"); ++ ++ for (size_t i = 0; i < registers.size(); i++) { ++ const auto& reg = registers[i]; ++ // Flag scoreboard as ready now result is available ++ scoreboard_[reg.type][reg.tag] = true; ++ ++ // Supply the value to all dependent uops ++ const auto& dependents = dependencyMatrix_[reg.type][reg.tag]; ++ for (auto& entry : dependents) { ++ entry.uop->supplyOperand(entry.operandIndex, values[i]); ++ if (entry.uop->canExecute()) { ++ // Add the now-ready instruction to the relevant ready queue ++ auto rsInfo = portMapping_[entry.port]; ++ reservationStations_[rsInfo.first].ports[rsInfo.second].ready.push_back( ++ std::move(entry.uop)); ++ } ++ } ++ ++ // Clear the dependency list ++ dependencyMatrix_[reg.type][reg.tag].clear(); ++ } ++} ++ ++void DispatchIssueUnit::setRegisterReady(Register reg) { ++ scoreboard_[reg.type][reg.tag] = true; ++} ++ ++void DispatchIssueUnit::purgeFlushed() { ++ for (size_t i = 0; i < reservationStations_.size(); i++) { ++ // Search the ready queues for flushed instructions and remove them ++ auto& rs = reservationStations_[i]; ++ for (auto& port : rs.ports) { ++ // Ready queue ++ auto readyIter = port.ready.begin(); ++ while (readyIter != port.ready.end()) { ++ auto& uop = *readyIter; ++ if (uop->isFlushed()) { ++ portAllocator_.deallocate(port.issuePort); ++ readyIter = port.ready.erase(readyIter); ++ assert(rs.currentSize > 0); ++ rs.currentSize--; ++ } else { ++ readyIter++; ++ } ++ } ++ } ++ } ++ ++ // Collect flushed instructions and remove them from the dependency matrix ++ for (auto& it : flushed_) it.second.clear(); ++ for (auto& registerType : dependencyMatrix_) { ++ for (auto& dependencyList : registerType) { ++ auto it = dependencyList.begin(); ++ while (it != dependencyList.end()) { ++ auto& entry = *it; ++ if (entry.uop->isFlushed()) { ++ auto rsIndex = portMapping_[entry.port].first; ++ if (!flushed_[rsIndex].count(entry.uop)) { ++ flushed_[rsIndex].insert(entry.uop); ++ portAllocator_.deallocate(entry.port); ++ } ++ it = dependencyList.erase(it); ++ } else { ++ it++; ++ } ++ } ++ } ++ } ++ ++ // Update reservation station size ++ for (uint8_t i = 0; i < reservationStations_.size(); i++) { ++ assert(reservationStations_[i].currentSize >= flushed_[i].size()); ++ reservationStations_[i].currentSize -= flushed_[i].size(); ++ } ++} ++ ++uint64_t DispatchIssueUnit::getRSStalls() const { return rsStalls_; } ++uint64_t DispatchIssueUnit::getFrontendStalls() const { ++ return frontendStalls_; ++} ++uint64_t DispatchIssueUnit::getBackendStalls() const { return backendStalls_; } ++uint64_t DispatchIssueUnit::getPortBusyStalls() const { ++ return portBusyStalls_; ++} ++ ++void DispatchIssueUnit::getRSSizes(std::vector& sizes) const { ++ for (auto& rs : reservationStations_) { ++ sizes.push_back(rs.capacity - rs.currentSize); ++ } ++} ++ ++} // namespace pipeline_hi ++} // namespace simeng +diff --git a/src/lib/pipeline_hi/ExecuteUnit.cc b/src/lib/pipeline_hi/ExecuteUnit.cc +new file mode 100644 +index 00000000..e3b5089d +--- /dev/null ++++ b/src/lib/pipeline_hi/ExecuteUnit.cc +@@ -0,0 +1,255 @@ ++#include "simeng/pipeline_hi/ExecuteUnit.hh" ++ ++#include ++#include ++ ++namespace simeng { ++namespace pipeline_hi { ++ ++ExecuteUnit::ExecuteUnit( ++ PipelineBuffer>& input, ++ PipelineBuffer>& output, ++ std::function, span)> forwardOperands, ++ std::function&)> handleLoad, ++ std::function&)> handleStore, ++ std::function&)> raiseException, ++ std::function&)> addInstrOrderQ, ++ std::function isInterruptPending, ++ BranchPredictor& predictor, bool pipelined, ++ const std::vector& blockingGroups) ++ : input_(input), ++ output_(output), ++ forwardOperands_(forwardOperands), ++ handleLoad_(handleLoad), ++ handleStore_(handleStore), ++ raiseException_(raiseException), ++ addInstrOrderQ_(addInstrOrderQ), ++ isInterruptPending_(isInterruptPending), ++ predictor_(predictor), ++ pipelined_(pipelined), ++ blockingGroups_(blockingGroups) {} ++ ++void ExecuteUnit::tick() { ++ tickCounter_++; ++ shouldFlush_ = false; ++ ++ if (stallUntil_ <= tickCounter_) { ++ input_.stall(false); ++ // Input isn't stalled; process instruction and add to pipeline ++ ++ auto& uop = input_.getHeadSlots()[0]; ++ if (uop != nullptr) { ++ if (!uop->isFlushed()) { ++ // Retrieve execution latency from the instruction ++ auto latency = uop->getLatency(); ++ cycles_++; ++ // Block uop execution if appropriate ++ if (std::find(blockingGroups_.begin(), blockingGroups_.end(), ++ uop->getGroup()) != blockingGroups_.end()) { ++ if (operationsStalled_.size() == 0) { ++ // Add uop to pipeline ++ pipeline_.push_back({nullptr, tickCounter_ + latency - 1}); ++ pipeline_.back().insn = std::move(uop); ++ operationsStalled_.push_back(pipeline_.back().insn); ++ } else { ++ // Stall execution start cycle ++ operationsStalled_.push_back(nullptr); ++ operationsStalled_.back() = std::move(uop); ++ } ++ } else if (latency == 1 && pipeline_.size() == 0) { ++ // Pipeline is empty and insn will execute this cycle; bypass ++ execute(uop); ++ } else { ++ // This instruction may take more than a single cycle; check for a ++ // stall. For unpipelined units, the unit will stall for the full ++ // instruction duration. ++ auto stallCycles = ++ pipelined_ ? uop->getStallCycles() : uop->getLatency(); ++ if (stallCycles > 1) { ++ stallUntil_ = tickCounter_ + stallCycles - 1; ++ input_.stall(true); ++ } ++ ++ // Add insn to pipeline ++ pipeline_.push_back({nullptr, tickCounter_ + latency - 1}); ++ pipeline_.back().insn = std::move(uop); ++ } ++ } ++ input_.getHeadSlots()[0] = nullptr; ++ } ++ } ++ ++ if (pipeline_.size() == 0) { ++ return; ++ } ++ ++ auto& head = pipeline_.front(); ++ if (head.readyAt <= tickCounter_) { ++ // Check if the completion of an operation would unblock ++ // another stalled operation. ++ if (std::find(blockingGroups_.begin(), blockingGroups_.end(), ++ head.insn->getGroup()) != blockingGroups_.end()) { ++ operationsStalled_.pop_front(); ++ if (operationsStalled_.size() > 0) { ++ // Add uop to pipeline ++ auto& uop = operationsStalled_.front(); ++ pipeline_.push_back({nullptr, tickCounter_ + uop->getLatency() - 1}); ++ pipeline_.back().insn = std::move(uop); ++ operationsStalled_.front() = pipeline_.back().insn; ++ } ++ } ++ execute(head.insn); ++ pipeline_.pop_front(); ++ } ++} ++ ++void ExecuteUnit::execute(std::shared_ptr& uop) { ++ assert(uop->canExecute() && ++ "Attempted to execute an instruction before it was ready"); ++ ++ int16_t pendingInterruptId = isInterruptPending_(); ++ if(pendingInterruptId>=0) { ++ //std::cout << std::hex << "Execution encountered pending interrupt, PC 0x" << uop->getInstructionAddress() << std::endl; ++ uop->raiseInterrupt(pendingInterruptId); ++ uop->setFlushed(); ++ raiseException_(uop); ++ shouldFlush_ = true; ++ return; ++ } ++ ++ addInstrOrderQ_(uop); ++ if (uop->exceptionEncountered()) { ++ // Exception encountered prior to execution ++ // TODO: Identify whether this can be removed; executing an ++ // exception-encountered uop would have to be guaranteed to be safe ++ raiseException_(uop); ++ return; ++ } ++ ++ if (uop->isLoad()) { ++ uop->generateAddresses(); ++ if (uop->exceptionEncountered()) { ++ // Exception; don't pass handle load function ++ raiseException_(uop); ++ return; ++ } ++ handleLoad_(uop); ++ return; ++ } else if (uop->isStoreAddress() || uop->isStoreData()) { ++ if (uop->isStoreAddress()) { ++ uop->generateAddresses(); ++ } ++ if (uop->isStoreData()) { ++ uop->execute(); ++ } ++ handleStore_(uop); ++ } else { ++ uop->execute(); ++ } ++ ++ if (uop->exceptionEncountered()) { ++ // Exception; don't forward results, don't pass uop forward ++ raiseException_(uop); ++ shouldFlush_ = true; ++ //TODO: Let the instruction go into writeback stage ++ // return; ++ } ++ ++ if (uop->isBranch()) { ++ pc_ = uop->getBranchAddress(); ++ ++ // Update branch predictor with branch results ++ predictor_.update(uop->getInstructionAddress(), uop->wasBranchTaken(), pc_, ++ uop->getBranchType()); ++ ++ // Update the branch instruction counter ++ branchesExecuted_++; ++ ++ if (uop->wasBranchMispredicted()) { ++ //std::cout << std::dec << tickCounter_ << std::hex << ": Misprediction iaddr: 0x" << uop->getInstructionAddress() << ", " << uop->getBranchPrediction().taken << std::endl; ++ // Misprediction; flush the pipeline ++ shouldFlush_ = true; ++ flushAfter_ = uop->getInstructionId(); ++ // Update the branch misprediction counter ++ branchMispredicts_++; ++ } ++ } ++ ++ // Operand forwarding; allows a dependent uop to execute next cycle ++ //if (!uop->isMul() && !uop->isDiv()) { ++ // forwardOperands_(uop->getDestinationRegisters(), uop->getResults()); ++ //} ++ ++ output_.getTailSlots()[0] = std::move(uop); ++} ++ ++bool ExecuteUnit::shouldFlush() const { return shouldFlush_; } ++uint64_t ExecuteUnit::getFlushAddress() const { return pc_; } ++uint64_t ExecuteUnit::getFlushSeqId() const { return flushAfter_; } ++ ++void ExecuteUnit::purgeFlushed() { ++ auto& uop = input_.getHeadSlots()[0]; ++ if (uop != nullptr) { ++ if (!uop->isFlushed()) { ++ uop->setFlushed(); ++ } ++ } ++ ++ if (pipeline_.size() == 0) { ++ return; ++ } ++ ++ // If the newest instruction has been flushed, clear any stalls. ++ if (pipeline_.back().insn->isFlushed()) { ++ stallUntil_ = tickCounter_; ++ } ++ ++ // Iterate over the pipeline and remove flushed instructions ++ auto it = pipeline_.begin(); ++ while (it != pipeline_.end()) { ++ auto& entry = *it; ++ if (entry.insn->isFlushed()) { ++ it = pipeline_.erase(it); ++ } else { ++ it++; ++ } ++ } ++ ++ // If first blocking in-flight instruction is flushed, ensure another ++ // non-flushed stalled instruction takes it place in the pipeline if ++ // available. ++ bool replace = false; ++ if (operationsStalled_.size() > 0 && ++ operationsStalled_.front()->isFlushed()) { ++ replace = true; ++ } ++ auto itStall = operationsStalled_.begin(); ++ while (itStall != operationsStalled_.end()) { ++ auto& entry = *itStall; ++ if (entry->isFlushed()) { ++ itStall = operationsStalled_.erase(itStall); ++ } else { ++ itStall++; ++ } ++ } ++ ++ if (replace && operationsStalled_.size() > 0) { ++ // Add uop to pipeline ++ auto& uop = operationsStalled_.front(); ++ pipeline_.push_back({nullptr, tickCounter_ + uop->getLatency() - 1}); ++ pipeline_.back().insn = std::move(uop); ++ operationsStalled_.front() = pipeline_.back().insn; ++ } ++} ++ ++uint64_t ExecuteUnit::getBranchExecutedCount() const { ++ return branchesExecuted_; ++} ++uint64_t ExecuteUnit::getBranchMispredictedCount() const { ++ return branchMispredicts_; ++} ++ ++uint64_t ExecuteUnit::getCycles() const { return cycles_; } ++ ++} // namespace pipeline_hi ++} // namespace simeng +diff --git a/src/lib/pipeline_hi/FetchUnit.cc b/src/lib/pipeline_hi/FetchUnit.cc +new file mode 100644 +index 00000000..4de190ef +--- /dev/null ++++ b/src/lib/pipeline_hi/FetchUnit.cc +@@ -0,0 +1,265 @@ ++#include "simeng/pipeline_hi/FetchUnit.hh" ++ ++namespace simeng { ++namespace pipeline_hi { ++ ++FetchUnit::FetchUnit(PipelineBuffer& output, ++ MemoryInterface& instructionMemory, ++ uint64_t programByteLength, uint64_t entryPoint, ++ uint8_t blockSize, const arch::Architecture& isa, ++ BranchPredictor& branchPredictor) ++ : output_(output), ++ pc_(entryPoint), ++ instructionMemory_(instructionMemory), ++ programByteLength_(programByteLength), ++ isa_(isa), ++ branchPredictor_(branchPredictor), ++ blockSize_(blockSize), ++ blockMask_(~(blockSize_ - 1)) { ++ assert(blockSize_ >= isa_.getMaxInstructionSize() && ++ "fetch block size must be larger than the largest instruction"); ++ fetchBuffer_ = new uint8_t[2 * blockSize_]; ++ requestFromPC(); ++} ++ ++FetchUnit::~FetchUnit() { delete[] fetchBuffer_; } ++ ++void FetchUnit::tick() { ++ if (output_.isStalled()) { ++ return; ++ } ++ ++ if (hasHalted_ || waitSCEval_) { ++ return; ++ } ++ ++ // If loop buffer has been filled, fill buffer to decode ++ // if (loopBufferState_ == LoopBufferState::SUPPLYING) { ++ // auto outputSlots = output_.getTailSlots(); ++ // for (size_t slot = 0; slot < output_.getWidth(); slot++) { ++ // auto& macroOp = outputSlots[slot]; ++ // auto bytesRead = isa_.predecode(&(loopBuffer_.front().encoding), ++ // loopBuffer_.front().instructionSize, ++ // loopBuffer_.front().address, macroOp); ++ ++ // assert(bytesRead != 0 && "predecode failure for loop buffer entry"); ++ ++ // // Set prediction to recorded value during loop buffer filling ++ // if (macroOp[0]->isBranch()) { ++ // macroOp[0]->setBranchPrediction(loopBuffer_.front().prediction); ++ // } ++ ++ // // Cycle queue by moving front entry to back ++ // loopBuffer_.push_back(loopBuffer_.front()); ++ // loopBuffer_.pop_front(); ++ // } ++ // return; ++ // } ++ ++ // Pointer to the instruction data to decode from ++ const uint8_t* buffer; ++ uint8_t bufferOffset; ++ ++ // Check if more instruction data is required ++ if (bufferedBytes_ < isa_.getMaxInstructionSize()) { ++ // Calculate the address of the next fetch block ++ uint64_t blockAddress; ++ if (bufferedBytes_ > 0) { ++ // There is already some data in the buffer, so check for the next block ++ bufferOffset = 0; ++ blockAddress = pc_ + bufferedBytes_; ++ assert((blockAddress & ~blockMask_) == 0 && "misaligned fetch buffer"); ++ } else { ++ // Fetch buffer is empty, so start from the PC ++ blockAddress = pc_ & blockMask_; ++ bufferOffset = pc_ - blockAddress; ++ } ++ ++ // Find fetched memory that matches the desired block ++ const auto& fetched = instructionMemory_.getCompletedReads(); ++ ++ size_t fetchIndex; ++ for (fetchIndex = 0; fetchIndex < fetched.size(); fetchIndex++) { ++ if (fetched[fetchIndex].target.address == blockAddress) { ++ break; ++ } ++ } ++ if (fetchIndex == fetched.size()) { ++ // Need to wait for fetched instructions ++ return; ++ } ++ ++ // TODO: Handle memory faults ++ assert(fetched[fetchIndex].data && "Memory read failed"); ++ const uint8_t* fetchData = fetched[fetchIndex].data.getAsVector(); ++ ++ // Copy fetched data to fetch buffer after existing data ++ std::memcpy(fetchBuffer_ + bufferedBytes_, fetchData + bufferOffset, ++ blockSize_ - bufferOffset); ++ ++ bufferedBytes_ += blockSize_ - bufferOffset; ++ buffer = fetchBuffer_; ++ // Decoding should start from the beginning of the fetchBuffer_. ++ bufferOffset = 0; ++ } else { ++ // There is already enough data in the fetch buffer, so use that ++ buffer = fetchBuffer_; ++ bufferOffset = 0; ++ } ++ ++ // Check we have enough data to begin decoding ++ if (bufferedBytes_ == isa_.getMinInstructionSize()) { ++ //Check if those bytes points to a instruction with minimum size or more data is required. If more data is required return ++ // TODO: this is not generic solution, just trying to make it work ++ uint16_t rawBits; ++ memcpy(&rawBits, buffer + bufferOffset, 2); ++ if((rawBits & 0x3) == 0x3) { ++ //std::cout << std::hex << "Only 2 bytes left in fetch buffer and not compresses instr type, current PC: 0x" << pc_ << std::endl; ++ return; ++ } ++ } ++ ++ auto outputSlots = output_.getTailSlots(); ++ for (size_t slot = 0; slot < output_.getWidth(); slot++) { ++ auto& macroOp = outputSlots[slot]; ++ ++ auto bytesRead = ++ isa_.predecode(buffer + bufferOffset, bufferedBytes_, pc_, macroOp); ++ ++ // If predecode fails, bail and wait for more data ++ if (bytesRead == 0) { ++ assert(bufferedBytes_ < isa_.getMinInstructionSize() && ++ "unexpected predecode failure"); ++ break; ++ } ++ ++ // Create branch prediction after identifing instruction type ++ // (e.g. RET, BL, etc). ++ BranchPrediction prediction = {false, 0}; ++ if (macroOp[0]->isBranch()) { ++ prediction = branchPredictor_.predict(pc_, macroOp[0]->getBranchType(), ++ macroOp[0]->getKnownOffset(), ++ (uint8_t)bytesRead); ++ macroOp[0]->setBranchPrediction(prediction); ++ } ++ ++ // if (loopBufferState_ == LoopBufferState::FILLING) { ++ // // Record instruction fetch information in loop body ++ // uint32_t encoding; ++ // memcpy(&encoding, buffer + bufferOffset, sizeof(uint32_t)); ++ // loopBuffer_.push_back( ++ // {encoding, bytesRead, pc_, macroOp[0]->getBranchPrediction()}); ++ ++ // if (pc_ == loopBoundaryAddress_) { ++ // // loopBoundaryAddress_ has been fetched whilst filling the loop buffer. ++ // // Stop filling as loop body has been recorded and begin to supply ++ // // decode unit with instructions from the loop buffer ++ // loopBufferState_ = LoopBufferState::SUPPLYING; ++ // bufferedBytes_ = 0; ++ // break; ++ // } ++ // } else if (loopBufferState_ == LoopBufferState::WAITING && ++ // pc_ == loopBoundaryAddress_) { ++ // // Once set loopBoundaryAddress_ is fetched, start to fill loop buffer ++ // loopBufferState_ = LoopBufferState::FILLING; ++ // } ++ ++ assert(bytesRead <= bufferedBytes_ && ++ "Predecode consumed more bytes than were available"); ++ // Increment the offset, decrement available bytes ++ bufferOffset += bytesRead; ++ bufferedBytes_ -= bytesRead; ++ ++ if (!prediction.taken) { ++ // Predicted as not taken; increment PC to next instruction ++ pc_ += bytesRead; ++ } else { ++ // Predicted as taken; set PC to predicted target address ++ pc_ = prediction.target; ++ } ++// std::cout << std::hex << "PC: 0x" << pc_ << ", PBL: 0x" << programByteLength_ << std::endl; ++ if (pc_ == 0 && (macroOp[0]->getBranchType() == BranchType::SubroutineCall)) { ++ waitSCEval_ = true; ++ break; ++ } ++ ++ if (pc_ >= programByteLength_) { ++ hasHalted_ = true; ++ break; ++ } ++ ++ if (prediction.taken) { ++ if (slot + 1 < output_.getWidth()) { ++ branchStalls_++; ++ } ++ // Can't continue fetch immediately after a branch ++ bufferedBytes_ = 0; ++ break; ++ } ++ ++ // Too few bytes remaining in buffer to continue ++ if (bufferedBytes_ == 0) { ++ break; ++ } ++ } ++ ++ if (bufferedBytes_ > 0) { ++ // Move start of fetched data to beginning of fetch buffer ++ std::memmove(fetchBuffer_, buffer + bufferOffset, bufferedBytes_); ++ } ++ ++ instructionMemory_.clearCompletedReads(); ++} ++ ++void FetchUnit::registerLoopBoundary(uint64_t branchAddress) { ++ // Set branch which forms the loop as the loopBoundaryAddress_ and place loop ++ // buffer in state to begin filling once the loopBoundaryAddress_ has been ++ // fetched ++ loopBufferState_ = LoopBufferState::WAITING; ++ loopBoundaryAddress_ = branchAddress; ++} ++ ++bool FetchUnit::hasHalted() const { return hasHalted_; } ++ ++void FetchUnit::updatePC(uint64_t address) { ++ pc_ = address; ++ bufferedBytes_ = 0; ++ hasHalted_ = (pc_ >= programByteLength_); ++ waitSCEval_ = false; ++} ++ ++void FetchUnit::requestFromPC() { ++ // Do nothing if buffer already contains enough data ++ if (bufferedBytes_ >= isa_.getMaxInstructionSize()) return; ++ ++ // Do nothing if unit has halted to avoid invalid speculative memory reads ++ // beyond the programByteLength_ ++ if (hasHalted_ || waitSCEval_) return; ++ ++ uint64_t blockAddress; ++ if (bufferedBytes_ > 0) { ++ // There's already some data in the buffer, so fetch the next block ++ blockAddress = pc_ + bufferedBytes_; ++ assert((blockAddress & ~blockMask_) == 0 && "misaligned fetch buffer"); ++ } else { ++ // Fetch buffer is empty, so fetch from the PC ++ blockAddress = pc_ & blockMask_; ++ } ++ ++ instructionMemory_.requestRead({blockAddress, blockSize_}); ++} ++ ++uint64_t FetchUnit::getBranchStalls() const { return branchStalls_; } ++ ++void FetchUnit::flushLoopBuffer() { ++ // loopBuffer_.clear(); ++ // loopBufferState_ = LoopBufferState::IDLE; ++ // loopBoundaryAddress_ = 0; ++} ++ ++void FetchUnit::flushPredictor(uint64_t address) { ++ branchPredictor_.flush(address); ++} ++ ++} // namespace pipeline_hi ++} // namespace simeng +diff --git a/src/lib/pipeline_hi/LoadStoreQueue.cc b/src/lib/pipeline_hi/LoadStoreQueue.cc +new file mode 100644 +index 00000000..c0b752e8 +--- /dev/null ++++ b/src/lib/pipeline_hi/LoadStoreQueue.cc +@@ -0,0 +1,315 @@ ++#include "simeng/pipeline_hi/LoadStoreQueue.hh" ++ ++#include ++#include ++#include ++#include ++ ++namespace simeng { ++namespace pipeline_hi { ++ ++/** Check whether requests `a` and `b` overlap. */ ++bool requestsOverlap(MemoryAccessTarget a, MemoryAccessTarget b) { ++ // Check whether one region ends before the other begins, implying no overlap, ++ // and negate ++ return !(a.address + a.size <= b.address || b.address + b.size <= a.address); ++} ++ ++LoadStoreQueue::LoadStoreQueue( ++ unsigned int maxCombinedSpace, MemoryInterface& memory, ++ span>> completionSlots, ++ std::function, span)> forwardOperands, ++ bool exclusive, uint16_t loadBandwidth, uint16_t storeBandwidth, ++ uint16_t permittedRequests, uint16_t permittedLoads, ++ uint16_t permittedStores) ++ : completionSlots_(completionSlots), ++ forwardOperands_(forwardOperands), ++ maxCombinedSpace_(maxCombinedSpace), ++ combined_(true), ++ memory_(memory), ++ exclusive_(exclusive), ++ loadBandwidth_(loadBandwidth), ++ storeBandwidth_(storeBandwidth), ++ totalLimit_(permittedRequests), ++ // Set per-cycle limits for each request type ++ reqLimits_{permittedLoads, permittedStores} {}; ++ ++LoadStoreQueue::LoadStoreQueue( ++ unsigned int maxLoadQueueSpace, unsigned int maxStoreQueueSpace, ++ MemoryInterface& memory, ++ span>> completionSlots, ++ std::function, span)> forwardOperands, ++ bool exclusive, uint16_t loadBandwidth, uint16_t storeBandwidth, ++ uint16_t permittedRequests, uint16_t permittedLoads, ++ uint16_t permittedStores) ++ : completionSlots_(completionSlots), ++ forwardOperands_(forwardOperands), ++ maxLoadQueueSpace_(maxLoadQueueSpace), ++ maxStoreQueueSpace_(maxStoreQueueSpace), ++ combined_(false), ++ memory_(memory), ++ exclusive_(exclusive), ++ loadBandwidth_(loadBandwidth), ++ storeBandwidth_(storeBandwidth), ++ totalLimit_(permittedRequests), ++ // Set per-cycle limits for each request type ++ reqLimits_{permittedLoads, permittedStores} {}; ++ ++unsigned int LoadStoreQueue::getLoadQueueSpace() const { ++ if (combined_) { ++ return getCombinedSpace(); ++ } else { ++ return getLoadQueueSplitSpace(); ++ } ++} ++unsigned int LoadStoreQueue::getStoreQueueSpace() const { ++ if (combined_) { ++ return getCombinedSpace(); ++ } else { ++ return getStoreQueueSplitSpace(); ++ } ++} ++unsigned int LoadStoreQueue::getTotalSpace() const { ++ if (combined_) { ++ return getCombinedSpace(); ++ } else { ++ return getLoadQueueSplitSpace() + getStoreQueueSplitSpace(); ++ } ++} ++ ++unsigned int LoadStoreQueue::getLoadQueueSplitSpace() const { ++ return maxLoadQueueSpace_ - loadQueue_.size(); ++} ++unsigned int LoadStoreQueue::getStoreQueueSplitSpace() const { ++ return maxStoreQueueSpace_ - storeQueue_.size(); ++} ++unsigned int LoadStoreQueue::getCombinedSpace() const { ++ return maxCombinedSpace_ - loadQueue_.size() - storeQueue_.size(); ++} ++ ++bool isMisAligned(uint64_t addr, uint8_t sz) { ++ if(((addr & 0x1) && sz==2) || ((addr & 0x3) && sz==4)) { ++ return true; ++ } ++ return false; ++} ++ ++void LoadStoreQueue::addLoad(const std::shared_ptr& insn) { ++ ++ const auto& addresses = insn->getGeneratedAddresses(); ++ ++ assert(addresses.size()==1 && "Expecting only 1 address in load request"); ++ // Do something to split into multiple requests if alignment is required for case like crossing 4 byte boundary. ++ ++ loadQueue_.push_back(insn); ++ uint64_t add_tick = 1; ++ bool isMisAlign = false; ++ if (isMisAligned(addresses[0].address, addresses[0].size)) { ++ add_tick+=1; ++ isMisAlign=true; ++ } ++ requestQueue_.push_back({{}, {}, insn, LOAD, (tickCounter_+add_tick) + insn->getLSQLatency(), isMisAlign}); ++ // Submit request write to memory interface early as the architectural state ++ // considers the store to be retired and thus its operation complete ++ ++ for (size_t i = 0; i < addresses.size(); i++) { ++ //memory_.requestWrite(addresses[i], data[i]); ++ // Still add addresses to requestQueue_ to ensure contention of resources is ++ // correctly simulated ++ requestQueue_.back().reqAddresses.push(addresses[i]); ++ } ++ ++ //loadQueue_.push_back(insn); ++ //startLoad(insn); ++} ++ ++void LoadStoreQueue::addStore(const std::shared_ptr& insn) { ++ ++ const auto& addresses = insn->getGeneratedAddresses(); ++ span data = insn->getData(); ++ ++ assert(addresses.size()==1 && "Expecting only 1 address in store request"); ++ // Do something to split into multiple requests if alignment is required for case like crossing 4 byte boundary. ++ ++ storeQueue_.push_back({insn, data}); ++ ++ uint64_t add_tick = 1; ++ bool isMisAlign = false; ++ if (isMisAligned(addresses[0].address, addresses[0].size)) { ++ add_tick+=1; ++ isMisAlign = true; ++ } ++ ++ requestQueue_.push_back({{}, {}, insn, STORE, (tickCounter_+add_tick) + insn->getLSQLatency(), isMisAlign}); ++ // Submit request write to memory interface early as the architectural state ++ // considers the store to be retired and thus its operation complete ++ ++ for (size_t i = 0; i < addresses.size(); i++) { ++ //memory_.requestWrite(addresses[i], data[i]); ++ // Still add addresses to requestQueue_ to ensure contention of resources is ++ // correctly simulated ++ requestQueue_.back().reqAddresses.push(addresses[i]); ++ requestQueue_.back().data.push(data[i]); ++ } ++ //storeQueue_.push_back({insn, {}}); ++ //supplyStoreData(insn); ++ //commitStore(insn); ++} ++ ++void LoadStoreQueue::startLoad(const std::shared_ptr& insn) { ++ return; ++} ++ ++void LoadStoreQueue::supplyStoreData(const std::shared_ptr& insn) { ++ return; ++} ++ ++bool LoadStoreQueue::commitStore(const std::shared_ptr& uop) { ++ ++ if (storeQueue_.front().first == uop) { ++ storeQueue_.pop_front(); ++ } else { ++ assert(false && "The commited store is not the one in the front of the storeQueue_"); ++ } ++ return true; ++} ++ ++void LoadStoreQueue::commitLoad(const std::shared_ptr& uop) { ++ ++ if (loadQueue_.front() == uop) { ++ loadQueue_.pop_front(); ++ } else { ++ assert(false && "The commited store is not the one in the front of the loadQueue_"); ++ } ++ return; ++} ++ ++void LoadStoreQueue::purgeFlushed() { ++ ++ return; ++ ++} ++ ++bool LoadStoreQueue::isBusy() const { ++ // TODO: This is just to allow only 1 outstanding request to be used for SST integeration. ++ //if (activeMisAlignedOpr() || loadQueue_.size()>=1 || storeQueue_.size()>=1) { ++ if (activeMisAlignedOpr() || (loadQueue_.size()+storeQueue_.size())>=2) { ++ return true; ++ } ++ return false; ++} ++ ++void LoadStoreQueue::tick() { ++ tickCounter_++; ++ ++ //Request at the front of the queue should be sent to memory first ++ //Ensure its scheduled after necessary tick ++ if (requestQueue_.size() > 0) { ++ requestEntry1& oldestreq = requestQueue_.front(); ++ if (tickCounter_ >= oldestreq.reqtick) { ++ if(oldestreq.type == LOAD) { ++ memory_.requestRead(oldestreq.reqAddresses.front(), (uint64_t) busReqId); ++ oldestreq.reqAddresses.pop(); ++ if (oldestreq.reqAddresses.size() == 0) { // All requests sent ++ requestQueue_.pop_front(); ++ } ++ requestedLoads_.emplace(busReqId, oldestreq.insn); ++ numLoads++; ++ latencyLoads_.emplace(busReqId, tickCounter_); ++ busReqId++; ++ } else if(oldestreq.type == STORE) { ++ memory_.requestWrite(oldestreq.reqAddresses.front(), oldestreq.data.front()); ++ oldestreq.reqAddresses.pop(); ++ oldestreq.data.pop(); ++ if (oldestreq.reqAddresses.size() == 0) { // All requests sent ++ requestQueue_.pop_front(); ++ //Verify same instruction. and remove from the storeQueue_ as well ++ //storeQueue_.pop_front();//No need ++ } ++ } else { ++ assert(false && "Unknown request type to be scheduled to memory"); ++ } ++ } ++ } ++ ++ //processResponse(); ++} ++ ++void LoadStoreQueue::processResponse() { ++ // Process completed read requests ++ for (const auto& response : memory_.getCompletedReads()) { ++ const auto& address = response.target.address; ++ const auto& data = response.data; ++ ++ // TODO: Detect and handle non-fatal faults (e.g. page fault) ++ ++ // Find instruction that requested the memory read ++ const auto& itr = requestedLoads_.find(response.requestId); ++ if (itr == requestedLoads_.end()) { ++ continue; ++ } else { ++ requestedLoads_.erase(response.requestId); ++ uint32_t ldLatency = ((tickCounter_ + 1) - latencyLoads_.at(response.requestId)); ++ if (ldLatency > maxLdLatency) { ++ maxLdLatency = ldLatency; ++ } ++ if (ldLatency < minLdLatency) { ++ minLdLatency = ldLatency; ++ } ++ totalLdLatency += ldLatency; ++ //std::cout << std::dec << "Total Ld latency: " << totalLdLatency << ", numLoads: " << numLoads << std::endl; ++ latencyLoads_.erase(response.requestId); ++ } ++ // Supply data to the instruction and execute if it is ready ++ const auto& load = itr->second; ++ load->supplyData(address, data); ++ if (load->hasAllData()) { ++ // This load has completed ++ load->execute(); ++ /*if (load->isStoreData()) { ++ supplyStoreData(load); ++ }*/ ++ completedLoads_.push(load); ++ } ++ } ++ memory_.clearCompletedReads(); ++ ++ // Pop from the front of the completed loads queue and send to writeback ++ size_t count = 0; ++ while (completedLoads_.size() > 0 && count < completionSlots_.size()) { ++ const auto& insn = completedLoads_.front(); ++ ++ // Don't process load instruction if it has been flushed ++ if (insn->isFlushed()) { ++ completedLoads_.pop(); ++ continue; ++ } ++ ++ // Forward the results ++ // forwardOperands_(insn->getDestinationRegisters(), insn->getResults()); ++ ++ completionSlots_[count].getTailSlots()[0] = std::move(insn); ++ ++ completedLoads_.pop(); ++ ++ count++; ++ } ++} ++ ++std::shared_ptr LoadStoreQueue::getViolatingLoad() const { ++ return violatingLoad_; ++} ++ ++//Clean up is required! ++bool LoadStoreQueue::activeMisAlignedOpr() const { ++ //if the front of the request queue has a misaligned request that is not yet being sent to the bus then its better to halt LSU taking new requests. ++ // if(storeQueue_.size() > 0 && activeMisAlignedStore) { ++ // return true; ++ // } ++ return (requestQueue_.size() > 0 && requestQueue_.front().isMisAligned && ((requestQueue_.front().reqtick-tickCounter_)==1)); ++} ++ ++bool LoadStoreQueue::isCombined() const { return combined_; } ++ ++} // namespace pipeline_hi ++} // namespace simeng +diff --git a/src/lib/pipeline_hi/RegDepMap.cc b/src/lib/pipeline_hi/RegDepMap.cc +new file mode 100644 +index 00000000..4ab004bf +--- /dev/null ++++ b/src/lib/pipeline_hi/RegDepMap.cc +@@ -0,0 +1,143 @@ ++#include "simeng/pipeline_hi/RegDepMap.hh" ++ ++#include ++ ++//#define RDMDEBUG ++#ifdef RDMDEBUG ++#define DEBUG(x) std::cout << "Core: " << std::hex << x << std::endl; ++#else ++#define DEBUG(x) do { } while (false); ++#endif ++ ++namespace simeng { ++namespace pipeline_hi { ++ ++const Register l_ZERO_REGISTER = {0, 0}; ++ ++RegDepMap::RegDepMap(const std::vector registerFileStructures, ++ const RegisterFileSet& registerFileSet) : ++ registerFileStructures_(registerFileStructures), ++ registerFileSet_(registerFileSet) { ++ regMap_.resize(registerFileStructures_.size());//Just for Integer Register File for now ++ for (size_t type=0; typegetDestinationRegisters(); ++ for(const auto& reg: destinationRegisters) { ++ if(reg != l_ZERO_REGISTER) { //Not X0 ++ outstandingDep_++; ++ DEBUG("Adding Depencency: addr, 0x" << instr->getInstructionAddress() << std::dec << ", dest: " << reg << ", outstanding: " << outstandingDep_); ++ regMap_[reg.type][reg.tag].push_back(instr); ++ } ++ } ++} ++ ++void RegDepMap::remove(InstrPtr instr) ++{ ++ auto& destinationRegisters = instr->getDestinationRegisters(); ++ for(const auto& reg: destinationRegisters) { ++ auto it = regMap_[reg.type][reg.tag].begin(); ++ while (it != regMap_[reg.type][reg.tag].end()) { ++ if(*it == instr) { ++ outstandingDep_--; ++ DEBUG("Removing Depencency: addr, 0x" << instr->getInstructionAddress() << std::dec << ", dest: " << reg << ", outstanding: " << outstandingDep_); ++ it = regMap_[reg.type][reg.tag].erase(it); ++ break; ++ } else { ++ it++; ++ } ++ } ++ } ++} ++ ++bool RegDepMap::canRead(InstrPtr instr) ++{ ++ bool dependency = false; ++ auto& sourceRegisters = instr->getOperandRegisters(); ++ for (uint16_t i = 0; i < sourceRegisters.size(); i++) { ++ const auto& srcReg = sourceRegisters[i]; ++ ++ if (!instr->isOperandReady(i)) { ++ // The operand hasn't already been supplied ++ if (regMap_[srcReg.type][srcReg.tag].size() == 0) {//pick up value from register file ++ instr->supplyOperand(i, registerFileSet_.get(srcReg)); ++ } else if (regMap_[srcReg.type][srcReg.tag].back()->hasExecuted() && ++ !(regMap_[srcReg.type][srcReg.tag].back()->isMul() || regMap_[srcReg.type][srcReg.tag].back()->isDiv() || ++ (regMap_[srcReg.type][srcReg.tag].back()->isLoad() && !instr->isStoreData()))) {//pick up value from last executed instruction ++ const auto& destRegisters = regMap_[srcReg.type][srcReg.tag].back()->getDestinationRegisters(); ++ const auto& destValues = regMap_[srcReg.type][srcReg.tag].back()->getResults(); ++ for (size_t j = 0; j < destRegisters.size(); j++) { ++ const auto& destReg = destRegisters[j]; ++ if (destReg == srcReg) { ++ instr->supplyOperand(i, destValues[j]); ++ break; ++ } ++ } ++ } else { ++ dependency = true; ++ } ++ } ++ } ++ ++ return !dependency; ++} ++ ++bool RegDepMap::canWrite(InstrPtr instr) ++{ ++ bool dependency = false; ++ auto& destRegisters = instr->getDestinationRegisters(); ++ for(uint16_t i = 0; i < destRegisters.size(); i++) { ++ const auto& destReg = destRegisters[i]; ++ if (regMap_[destReg.type][destReg.tag].size() > 0 && ++ !regMap_[destReg.type][destReg.tag].back()->hasExecuted()) { ++ dependency = true; ++ break; ++ } ++ } ++ return !dependency || (instr->isLoad()); ++} ++ ++//Clean up the options logic to ensure all of them work well together ++bool RegDepMap::canForward(InstrPtr instr) ++{ ++ return true; ++} ++ ++void RegDepMap::purgeFlushed() { ++ for (auto& registerType : regMap_) { ++ for (auto& dependencyList : registerType) { ++ auto it = dependencyList.begin(); ++ while (it != dependencyList.end()) { ++ DEBUG("Purge entry present at addr: 0x" << (*it)->getInstructionAddress()); ++ if ((*it)->isFlushed()) { ++ outstandingDep_--; ++ it = dependencyList.erase(it); ++ } else { ++ it++; ++ } ++ } ++ } ++ } ++} ++ ++void RegDepMap::dump() ++{ ++} ++ ++} // namespace pipeline_hi ++} // namespace simeng +diff --git a/src/lib/pipeline_hi/RegisterAliasTable.cc b/src/lib/pipeline_hi/RegisterAliasTable.cc +new file mode 100644 +index 00000000..0c813a6f +--- /dev/null ++++ b/src/lib/pipeline_hi/RegisterAliasTable.cc +@@ -0,0 +1,110 @@ ++#include "simeng/pipeline_hi/RegisterAliasTable.hh" ++ ++#include ++ ++namespace simeng { ++namespace pipeline_hi { ++ ++RegisterAliasTable::RegisterAliasTable( ++ std::vector architecturalStructure, ++ std::vector physicalRegisterCounts) ++ : mappingTable_(architecturalStructure.size()), ++ historyTable_(architecturalStructure.size()), ++ destinationTable_(architecturalStructure.size()), ++ freeQueues_(architecturalStructure.size()) { ++ assert(architecturalStructure.size() == physicalRegisterCounts.size() && ++ "The number of physical register types does not match the number of " ++ "architectural register types"); ++ ++ for (size_t type = 0; type < architecturalStructure.size(); type++) { ++ auto archCount = architecturalStructure[type].quantity; ++ auto physCount = physicalRegisterCounts[type]; ++ assert(archCount <= physCount && ++ "Cannot have fewer physical registers than architectural registers"); ++ ++ // Set up the initial mapping table state for this register type ++ mappingTable_[type].resize(archCount); ++ ++ for (size_t tag = 0; tag < archCount; tag++) { ++ // Pre-assign a physical register to each architectural register ++ mappingTable_[type][tag] = tag; ++ } ++ ++ // Add remaining physical registers to free queue ++ for (size_t tag = archCount; tag < physCount; tag++) { ++ freeQueues_[type].push(tag); ++ } ++ ++ // Set up history/destination tables ++ historyTable_[type].resize(physCount); ++ destinationTable_[type].resize(physCount); ++ } ++}; ++ ++Register RegisterAliasTable::getMapping(Register architectural) const { ++ // Asserts to ensure mapping isn't attempted for an out-of-bound index (i.e. ++ // mapping of WZR / XZR) ++ assert(architectural.type < mappingTable_.size() && ++ "Invalid register type. Cannot find RAT mapping."); ++ assert(architectural.type >= 0 && ++ "Invalid register type. Cannot find RAT mapping."); ++ ++ auto tag = mappingTable_[architectural.type][architectural.tag]; ++ return {architectural.type, tag}; ++} ++ ++bool RegisterAliasTable::canAllocate(uint8_t type, ++ unsigned int quantity) const { ++ return (freeQueues_[type].size() >= quantity); ++} ++ ++bool RegisterAliasTable::canRename(uint8_t type) const { ++ // Renaming possible iff there are more physical than architectural registers ++ return destinationTable_[type].size() > mappingTable_[type].size(); ++} ++ ++unsigned int RegisterAliasTable::freeRegistersAvailable(uint8_t type) const { ++ return freeQueues_[type].size(); ++} ++ ++Register RegisterAliasTable::allocate(Register architectural) { ++ std::queue& freeQueue = freeQueues_[architectural.type]; ++ assert(freeQueue.size() > 0 && ++ "Attempted to allocate free register when none were available"); ++ ++ auto tag = freeQueue.front(); ++ freeQueue.pop(); ++ ++ // Keep the old physical register in the history table ++ historyTable_[architectural.type][tag] = ++ mappingTable_[architectural.type][architectural.tag]; ++ ++ // Update the mapping table with the new tag, and mark the architectural ++ // register it replaces in the destination table ++ mappingTable_[architectural.type][architectural.tag] = tag; ++ destinationTable_[architectural.type][tag] = architectural.tag; ++ ++ return {architectural.type, tag}; ++} ++ ++void RegisterAliasTable::commit(Register physical) { ++ // Find the register previously mapped to the same architectural register and ++ // free it ++ auto oldTag = historyTable_[physical.type][physical.tag]; ++ freeQueues_[physical.type].push(oldTag); ++} ++void RegisterAliasTable::rewind(Register physical) { ++ // Find which architectural tag this referred to ++ auto destinationTag = destinationTable_[physical.type][physical.tag]; ++ // Rewind the mapping table to the old physical tag ++ mappingTable_[physical.type][destinationTag] = ++ historyTable_[physical.type][physical.tag]; ++ // Add the rewound physical tag back to the free queue ++ freeQueues_[physical.type].push(physical.tag); ++} ++void RegisterAliasTable::free(Register physical) { ++ freeQueues_[physical.type].push(physical.tag); ++} ++ ++} // namespace pipeline_hi ++} // namespace simeng +diff --git a/src/lib/pipeline_hi/ReorderBuffer.cc b/src/lib/pipeline_hi/ReorderBuffer.cc +new file mode 100644 +index 00000000..c653ffd5 +--- /dev/null ++++ b/src/lib/pipeline_hi/ReorderBuffer.cc +@@ -0,0 +1,206 @@ ++#include "simeng/pipeline_hi/ReorderBuffer.hh" ++ ++#include ++#include ++#include ++ ++namespace simeng { ++namespace pipeline_hi { ++ ++ReorderBuffer::ReorderBuffer( ++ unsigned int maxSize, RegisterAliasTable& rat, LoadStoreQueue& lsq, ++ std::function&)> raiseException, ++ std::function sendLoopBoundary, ++ BranchPredictor& predictor, uint16_t loopBufSize, ++ uint16_t loopDetectionThreshold) ++ : rat_(rat), ++ lsq_(lsq), ++ maxSize_(maxSize), ++ raiseException_(raiseException), ++ sendLoopBoundary_(sendLoopBoundary), ++ predictor_(predictor), ++ loopBufSize_(loopBufSize), ++ loopDetectionThreshold_(loopDetectionThreshold) {} ++ ++void ReorderBuffer::reserve(const std::shared_ptr& insn) { ++ assert(buffer_.size() < maxSize_ && ++ "Attempted to reserve entry in reorder buffer when already full"); ++ insn->setSequenceId(seqId_); ++ seqId_++; ++ insn->setInstructionId(insnId_); ++ if (insn->isLastMicroOp()) insnId_++; ++ ++ buffer_.push_back(insn); ++} ++ ++void ReorderBuffer::commitMicroOps(uint64_t insnId) { ++ if (buffer_.size()) { ++ size_t index = 0; ++ int firstOp = -1; ++ bool validForCommit = false; ++ ++ // Find first instance of uop belonging to macro-op instruction ++ for (; index < buffer_.size(); index++) { ++ if (buffer_[index]->getInstructionId() == insnId) { ++ firstOp = index; ++ break; ++ } ++ } ++ ++ if (firstOp > -1) { ++ // If found, see if all uops are committable ++ for (; index < buffer_.size(); index++) { ++ if (buffer_[index]->getInstructionId() != insnId) break; ++ if (!buffer_[index]->isWaitingCommit()) { ++ return; ++ } else if (buffer_[index]->isLastMicroOp()) { ++ // all microOps must be in ROB for the commit to be valid ++ validForCommit = true; ++ } ++ } ++ if (!validForCommit) return; ++ ++ // No early return thus all uops are committable ++ for (; firstOp < buffer_.size(); firstOp++) { ++ if (buffer_[firstOp]->getInstructionId() != insnId) break; ++ buffer_[firstOp]->setCommitReady(); ++ } ++ } ++ } ++ return; ++} ++ ++unsigned int ReorderBuffer::commit(unsigned int maxCommitSize) { ++ shouldFlush_ = false; ++ size_t maxCommits = ++ std::min(static_cast(maxCommitSize), buffer_.size()); ++ ++ unsigned int n; ++ for (n = 0; n < maxCommits; n++) { ++ auto& uop = buffer_[0]; ++ if (!uop->canCommit()) { ++ break; ++ } ++ ++ if (uop->isLastMicroOp()) instructionsCommitted_++; ++ ++ if (uop->exceptionEncountered()) { ++ raiseException_(uop); ++ buffer_.pop_front(); ++ return n + 1; ++ } ++ ++ const auto& destinations = uop->getDestinationRegisters(); ++ for (int i = 0; i < destinations.size(); i++) { ++ rat_.commit(destinations[i]); ++ } ++ ++ // If it's a memory op, commit the entry at the head of the respective queue ++ if (uop->isLoad()) { ++ lsq_.commitLoad(uop); ++ } ++ if (uop->isStoreAddress()) { ++ bool violationFound = lsq_.commitStore(uop); ++ if (violationFound) { ++ loadViolations_++; ++ // Memory order violation found; aborting commits and flushing ++ auto load = lsq_.getViolatingLoad(); ++ shouldFlush_ = true; ++ flushAfter_ = load->getInstructionId() - 1; ++ pc_ = load->getInstructionAddress(); ++ ++ buffer_.pop_front(); ++ return n + 1; ++ } ++ } ++ ++ // Increment or swap out branch counter for loop detection ++ if (uop->isBranch() && !loopDetected_) { ++ bool increment = true; ++ if (branchCounter_.first.address != uop->getInstructionAddress()) { ++ // Mismatch on instruction address, reset ++ increment = false; ++ } else if (branchCounter_.first.outcome != uop->getBranchPrediction()) { ++ // Mismatch on branch outcome, reset ++ increment = false; ++ } else if ((instructionsCommitted_ - branchCounter_.first.commitNumber) > ++ loopBufSize_) { ++ // Loop too big to fit in loop buffer, reset ++ increment = false; ++ } ++ ++ if (increment) { ++ // Reset commitNumber value ++ branchCounter_.first.commitNumber = instructionsCommitted_; ++ // Increment counter ++ branchCounter_.second++; ++ ++ if (branchCounter_.second > loopDetectionThreshold_) { ++ // If the same branch with the same outcome is sequentially retired ++ // more times than the loopDetectionThreshold_ value, identify as a ++ // loop boundary ++ loopDetected_ = true; ++ sendLoopBoundary_(uop->getInstructionAddress()); ++ } ++ } else { ++ // Swap out latest branch ++ branchCounter_ = {{uop->getInstructionAddress(), ++ uop->getBranchPrediction(), instructionsCommitted_}, ++ 0}; ++ } ++ } ++ buffer_.pop_front(); ++ } ++ ++ return n; ++} ++ ++void ReorderBuffer::flush(uint64_t afterSeqId) { ++ // Iterate backwards from the tail of the queue to find and remove ops newer ++ // than `afterSeqId` ++ while (!buffer_.empty()) { ++ auto& uop = buffer_.back(); ++ if (uop->getInstructionId() <= afterSeqId) { ++ break; ++ } ++ ++ // To rewind destination registers in correct history order, rewinding of ++ // register renaming is done backwards ++ auto destinations = uop->getDestinationRegisters(); ++ for (int i = destinations.size() - 1; i >= 0; i--) { ++ const auto& reg = destinations[i]; ++ rat_.rewind(reg); ++ } ++ uop->setFlushed(); ++ // If the instruction is a branch, supply address to branch flushing logic ++ if (uop->isBranch()) { ++ predictor_.flush(uop->getInstructionAddress()); ++ } ++ buffer_.pop_back(); ++ } ++ ++ // Reset branch counter and loop detection ++ branchCounter_ = {{0, {false, 0}, 0}, 0}; ++ loopDetected_ = false; ++} ++ ++unsigned int ReorderBuffer::size() const { return buffer_.size(); } ++ ++unsigned int ReorderBuffer::getFreeSpace() const { ++ return maxSize_ - buffer_.size(); ++} ++ ++bool ReorderBuffer::shouldFlush() const { return shouldFlush_; } ++uint64_t ReorderBuffer::getFlushAddress() const { return pc_; } ++uint64_t ReorderBuffer::getFlushSeqId() const { return flushAfter_; } ++ ++uint64_t ReorderBuffer::getInstructionsCommittedCount() const { ++ return instructionsCommitted_; ++} ++ ++uint64_t ReorderBuffer::getViolatingLoadsCount() const { ++ return loadViolations_; ++} ++ ++} // namespace pipeline_hi ++} // namespace simeng +diff --git a/src/lib/pipeline_hi/StaticPredictor.cc b/src/lib/pipeline_hi/StaticPredictor.cc +new file mode 100644 +index 00000000..66199899 +--- /dev/null ++++ b/src/lib/pipeline_hi/StaticPredictor.cc +@@ -0,0 +1,120 @@ ++#include "simeng/pipeline_hi/StaticPredictor.hh" ++ ++#include ++ ++namespace simeng { ++namespace pipeline_hi { ++//TODO: temp for get rid of yaml, delete it later ++StaticPredictor::StaticPredictor(uint8_t sType) ++ : staticType_(sType) {} ++ ++StaticPredictor::StaticPredictor(YAML::Node config) ++ : staticType_(config["Branch-Predictor"]["Static-Type"].as()), ++ rasSize_(config["Branch-Predictor"]["RAS-entries"].as()){} ++ ++StaticPredictor::~StaticPredictor() { ++ ras_.clear(); ++ rasHistory_.clear(); ++} ++ ++BranchPrediction StaticPredictor::predict(uint64_t address, BranchType type, ++ uint64_t knownOffset, ++ uint8_t byteLength) { ++ int64_t offset = knownOffset; ++ uint64_t predict_target = (knownOffset) ? knownOffset + address : 0; ++ BranchPrediction prediction = {false, 0}; ++ ++ assert(byteLength > 1 && "byteLength <= 1"); ++ ++ if (type == BranchType::Unconditional) { ++ prediction = { true, predict_target}; ++ } else if (type == BranchType::Return) { ++ if (ras_.size() > 0) { ++ predict_target = ras_.back(); ++ // Record top of RAS used for target prediction ++ rasHistory_[address] = ras_.back(); ++ ras_.pop_back(); ++ } ++ prediction = {true, predict_target}; ++ } else if (type == BranchType::SubroutineCall) { //JAL and JALR ++ if (ras_.size() >= rasSize_) { ++ ras_.pop_front(); ++ } ++ ras_.push_back(address + byteLength); ++ // Record that this address is a branch-and-link instruction ++ rasHistory_[address] = 0; ++ prediction = {true, predict_target}; ++ } else if (type == BranchType::Conditional) { ++ switch (staticType_) { ++ case 0: //always-taken ++ prediction = {true, predict_target}; ++ break; ++ ++ case 1: //always-not-taken; ++ prediction = {false, 0}; ++ break; ++ ++ case 2: //Backward Taken, Forward Not Taken ++ { ++ if (offset >= 0) { ++ //not taken ++ prediction = {false, address+byteLength}; ++ } else { ++ prediction = {true, predict_target}; ++ } ++ break; ++ } ++ ++ case 3: //Forward Taken, Backward Not Taken ++ { ++ if (offset <= 0) { ++ //not taken ++ prediction = {false, address+byteLength}; ++ } else { ++ prediction = {true, predict_target}; ++ } ++ break; ++ } ++ ++ default: ++ assert(staticType_ < 4 && "Non-supported type for static predictor"); ++ break; ++ } ++ } ++ ++ return prediction; ++} ++ ++void StaticPredictor::update(uint64_t address, bool taken, ++ uint64_t targetAddress, BranchType type) {} ++ ++void StaticPredictor::flush(uint64_t address) { ++ // If address interacted with RAS, rewind entry ++ auto it = rasHistory_.find(address); ++ if (it != rasHistory_.end()) { ++ uint64_t target = it->second; ++ if (target != 0) { ++ // If history entry belongs to a return instruction, push target back onto ++ // stack ++ if (ras_.size() >= rasSize_) { ++ ras_.pop_front(); ++ } ++ ras_.push_back(target); ++ } else { ++ // If history entry belongs to a branch-and-link instruction, pop target ++ // off of stack ++ if (ras_.size()) { ++ ras_.pop_back(); ++ } ++ } ++ rasHistory_.erase(it); ++ } ++} ++BranchPrediction StaticPredictor::predict(uint64_t address, BranchType type, ++ uint64_t knownTarget) { ++ printf("StaticPredictor::predict(), This is overloaded and deprecated! \n"); ++ return predict(address, type, knownTarget, 4); ++} ++ ++} // namespace pipeline_hi ++} // namespace simeng +diff --git a/src/lib/pipeline_hi/WritebackUnit.cc b/src/lib/pipeline_hi/WritebackUnit.cc +new file mode 100644 +index 00000000..b0dfd971 +--- /dev/null ++++ b/src/lib/pipeline_hi/WritebackUnit.cc +@@ -0,0 +1,74 @@ ++#include "simeng/pipeline_hi/WritebackUnit.hh" ++ ++#include ++ ++namespace simeng { ++namespace pipeline_hi { ++ ++WritebackUnit::WritebackUnit( ++ std::vector>>& completionSlots, ++ RegisterFileSet& registerFileSet, ++ std::function flagMicroOpCommits, ++ std::function&)> removeDep, ++ std::function&)> removeInstrOrderQ) ++ : completionSlots_(completionSlots), ++ registerFileSet_(registerFileSet), ++ flagMicroOpCommits_(flagMicroOpCommits), ++ removeDep_(removeDep), ++ removeInstrOrderQ_(removeInstrOrderQ) {} ++ ++void WritebackUnit::tick() { ++ for (size_t slot = 0; slot < completionSlots_.size(); slot++) { ++ auto& uop = completionSlots_[slot].getHeadSlots()[0]; ++ ++ if (uop == nullptr) { ++ continue; ++ } ++ ++ auto& results = uop->getResults(); ++ auto& destinations = uop->getDestinationRegisters(); ++ for (size_t i = 0; i < results.size(); i++) { ++ // Write results to register file ++ registerFileSet_.set(destinations[i], results[i]); ++ } ++ if (uop->isMicroOp()) { ++ uop->setWaitingCommit(); ++ flagMicroOpCommits_(uop->getInstructionId()); ++ if (uop->isLastMicroOp()) { ++ instructionsWritten_++; ++ committedInstsForTrace_.push_back(uop); ++ } ++ } else { ++ uop->setCommitReady(); ++ removeDep_(uop); ++ instructionsWritten_++; ++ committedInstsForTrace_.push_back(uop); ++ } ++ ++ completionSlots_[slot].getHeadSlots()[0] = nullptr; ++ } ++} ++ ++uint64_t WritebackUnit::getInstructionsWrittenCount() const { ++ return instructionsWritten_; ++} ++ ++std::vector> WritebackUnit::getInstsForTrace() { ++ std::shared_ptr instr; ++ std::deque>::iterator it = committedInstsForTrace_.begin(); ++ while(it != committedInstsForTrace_.end()) { ++ instr = *it; ++ if (removeInstrOrderQ_(instr)) { ++ committedInstsForTrace_.erase(it); ++ return {instr}; ++ } ++ it++; ++ } ++ return {}; //committedInstsForTrace_; ++} ++void WritebackUnit::traceFinished() { ++ //committedInstsForTrace_.clear(); ++} ++ ++} // namespace pipeline_hi ++} // namespace simeng +diff --git a/src/tools/simeng/main.cc b/src/tools/simeng/main.cc +index fa9b58ba..f5cfa535 100644 +--- a/src/tools/simeng/main.cc ++++ b/src/tools/simeng/main.cc +@@ -10,7 +10,7 @@ + #include "simeng/version.hh" + + /** Tick the provided core model until it halts. */ +-int simulate(simeng::Core& core, simeng::MemoryInterface& dataMemory, ++uint64_t simulate(simeng::Core& core, simeng::MemoryInterface& dataMemory, + simeng::MemoryInterface& instructionMemory) { + uint64_t iterations = 0; + +@@ -91,7 +91,7 @@ int main(int argc, char** argv) { + + // Run simulation + std::cout << "[SimEng] Starting...\n" << std::endl; +- int iterations = 0; ++ uint64_t iterations = 0; + auto startTime = std::chrono::high_resolution_clock::now(); + iterations = simulate(*core, *dataMemory, *instructionMemory); + +diff --git a/sst/SimEngCoreWrapper.cc b/sst/SimEngCoreWrapper.cc +index 45c1bdde..668439f5 100644 +--- a/sst/SimEngCoreWrapper.cc ++++ b/sst/SimEngCoreWrapper.cc +@@ -10,9 +10,85 @@ + + #include "Assemble.hh" + ++#include ++ + using namespace SST::SSTSimEng; + using namespace SST::Interfaces; + ++//For now just make sure that the code and data is loaded into memory ++// at the correct addresses instead of sending the entire process image ++void SimEngCoreWrapper::processMemoryImage() { ++ std::ifstream file(executablePath_, std::ios::binary); ++ if (!file.is_open()) { ++ return; ++ } ++ ++ char elfMagic[4] = {0x7f, 'E', 'L', 'F'}; ++ char fileMagic[4]; ++ file.read(fileMagic, 4); ++ if (std::memcmp(elfMagic, fileMagic, sizeof(elfMagic))) { ++ return; ++ } ++ ++ /** ++ * The fifth byte of the ELF Header identifies the architecture ++ * of the ELF binary i.e 32-bit or 64-bit. ++ */ ++ ++ // Check whether this is a 32-bit executable ++ char bitFormat; ++ file.read(&bitFormat, sizeof(bitFormat)); ++ if (bitFormat != ElfBitFormat::Format32) { ++ return; ++ } ++ struct Elf32Header { ++ uint32_t type; ++ uint32_t offset; ++ uint32_t virtualAddress; ++ uint32_t physicalAddress; ++ uint32_t fileSize; ++ uint32_t memorySize; ++ }; ++ uint32_t entryPoint32_; ++ std::vector headers32_; ++ ++ file.seekg(0x18); ++ file.read(reinterpret_cast(&entryPoint32_), sizeof(entryPoint32_)); ++ uint32_t headerOffset; ++ file.read(reinterpret_cast(&headerOffset), sizeof(headerOffset)); ++ file.seekg(0x2a); ++ uint16_t headerEntrySize; ++ file.read(reinterpret_cast(&headerEntrySize), sizeof(headerEntrySize)); ++ uint16_t headerEntries; ++ file.read(reinterpret_cast(&headerEntries), sizeof(headerEntries)); ++ headers32_.resize(headerEntries); ++ // Loop over all headers and extract them. ++ for (size_t i = 0; i < headerEntries; i++) { ++ file.seekg(headerOffset + (i * headerEntrySize)); ++ auto& header = headers32_[i]; ++ ++ const int fieldBytes = 4; ++ file.read(reinterpret_cast(&(header.type)), sizeof(header.type)); ++ file.read(reinterpret_cast(&(header.offset)), fieldBytes); ++ file.read(reinterpret_cast(&(header.virtualAddress)), fieldBytes); ++ file.read(reinterpret_cast(&(header.physicalAddress)), fieldBytes); ++ file.read(reinterpret_cast(&(header.fileSize)), fieldBytes); ++ file.read(reinterpret_cast(&(header.memorySize)), fieldBytes); ++ } ++ // Process headers; only observe LOAD sections for this basic implementation ++ for (const auto& header : headers32_) { ++ if (header.type == 1) { // LOAD ++ char* imagePointer; ++ imagePointer = (char*)calloc(header.memorySize, sizeof(char)); ++ file.seekg(header.offset); ++ file.read(imagePointer, header.fileSize); ++ dataMemory_->sendProcessImageToSST(imagePointer, header.memorySize, header.virtualAddress); ++ } ++ } ++ std::cout << "[SSTSimEng:SimEngCoreWrapper] Done exporting elf data into SST memory" << std::endl; ++ //assert(false && "Incomplete implementation"); ++} ++ + SimEngCoreWrapper::SimEngCoreWrapper(SST::ComponentId_t id, SST::Params& params) + : SST::Component(id) { + output_.init("[SSTSimEng:SimEngCoreWrapper] " + getName() + ":@p:@l ", 999, 0, +@@ -95,7 +171,7 @@ void SimEngCoreWrapper::finish() { + std::cout << "[SimEng] " << key << ": " << value << "\n"; + } + +- std::cout << "\n[SimEng] Finished " << iterations_ << " ticks in " << duration ++ std::cout << "\n[SimEng] Finished " << std::dec << iterations_ << " ticks in " << duration + << "ms (" << std::round(khz) << " kHz, " << std::setprecision(2) + << mips << " MIPS)" << std::endl; + } +@@ -284,13 +360,13 @@ void SimEngCoreWrapper::fabricateSimEngCore() { + : std::make_unique( + a64fxConfigPath_, executablePath_, executableArgs_); + } +- if (coreInstance_->getSimulationMode() != ++ /*if (coreInstance_->getSimulationMode() != + simeng::SimulationMode::OutOfOrder) { + output_.verbose(CALL_INFO, 1, 0, + "SimEng currently only supports Out-of-Order " + "archetypes with SST."); + std::exit(EXIT_FAILURE); +- } ++ }*/ + // Set the SST data memory SimEng should use + coreInstance_->setL1DataMemory(dataMemory_); + +@@ -303,7 +379,7 @@ void SimEngCoreWrapper::fabricateSimEngCore() { + + // This check ensures that SST has enough memory to store the entire + // processImage constructed by SimEng. +- if (maxAddrMemory_ < coreInstance_->getProcessImageSize()) { ++ /*if (maxAddrMemory_ < coreInstance_->getProcessImageSize()) { + output_.verbose( + CALL_INFO, 1, 0, + "Error: SST backend memory is less than processImage size. " +@@ -312,7 +388,7 @@ void SimEngCoreWrapper::fabricateSimEngCore() { + "\'addr_range_end\'. \n"); + primaryComponentOKToEndSim(); + std::exit(EXIT_FAILURE); +- } ++ }*/ + // If testing is enabled populate heap if heap values have been specified. + #ifdef SIMENG_ENABLE_SST_TESTS + if (heapStr_ != "") { +@@ -320,8 +396,10 @@ void SimEngCoreWrapper::fabricateSimEngCore() { + } + #endif + // Send the process image data over to the SST memory +- dataMemory_->sendProcessImageToSST(coreInstance_->getProcessImage().get(), +- coreInstance_->getProcessImageSize()); ++ //dataMemory_->sendProcessImageToSST(coreInstance_->getProcessImage().get(), ++ // coreInstance_->getProcessImageSize()); ++ ++ processMemoryImage(); + + output_.verbose(CALL_INFO, 1, 0, "SimEng core setup successfully.\n"); + // Print out build metadata +@@ -356,4 +434,4 @@ std::vector SimEngCoreWrapper::splitHeapStr() { + } + out.push_back(static_cast(std::stoull(acc))); + return out; +-} +\ No newline at end of file ++} +diff --git a/sst/SimEngMemInterface.cc b/sst/SimEngMemInterface.cc +index 4e07801f..678d9853 100644 +--- a/sst/SimEngMemInterface.cc ++++ b/sst/SimEngMemInterface.cc +@@ -18,7 +18,7 @@ SimEngMemInterface::SimEngMemInterface(StandardMem* mem, uint64_t cl, + this->debug_ = debug; + }; + +-void SimEngMemInterface::sendProcessImageToSST(char* image, uint64_t size) { ++void SimEngMemInterface::sendProcessImageToSST(char* image, uint64_t size, uint64_t startAddr) { + std::vector data; + data.reserve(size); + +@@ -26,7 +26,8 @@ void SimEngMemInterface::sendProcessImageToSST(char* image, uint64_t size) { + data.push_back((uint8_t)image[i]); + } + +- StandardMem::Request* req = new StandardMem::Write(0, data.size(), data); ++ StandardMem::Request* req = new StandardMem::Write(startAddr, data.size(), data); ++ std::cout << std::hex << "[SSTSimEng:SimEngMemInterface] Sending image section to SST Memory at address 0x" << startAddr << ", size 0x" << data.size() << std::endl; + sstMem_->sendUntimedData(req); + return; + }; +@@ -176,7 +177,7 @@ void SimEngMemInterface::requestRead(const MemoryAccessTarget& target, + if (debug_) { + std::cout << "[SSTSimEng:SSTDebug] MemRead" + << "-read-request-" << requestId << "-cycle-" << tickCounter_ +- << "-split-" << requests.size() << std::endl; ++ << "-split-" << requests.size() << "-addr-0x" << std::hex << addrStart << std::endl; + } + for (StandardMem::Request* req : requests) { + sstMem_->send(req); +@@ -192,7 +193,11 @@ void SimEngMemInterface::requestWrite(const MemoryAccessTarget& target, + AggregateWriteRequest* aggrReq = new AggregateWriteRequest(target, data); + std::vector requests = + makeSSTRequests(aggrReq, addrStart, addrEnd, size); +- ++ if (debug_) { ++ std::cout << "[SSTSimEng:SSTDebug] MemWrite" ++ << "-write-request-xx" << "-cycle-" << tickCounter_ ++ << "-split-" << requests.size() << "-addr-0x" << std::hex << addrStart << std::endl; ++ } + for (StandardMem::Request* req : requests) { + sstMem_->send(req); + } +diff --git a/sst/config/mcu_int_example_config.py b/sst/config/mcu_int_example_config.py +new file mode 100644 +index 00000000..fdd3b968 +--- /dev/null ++++ b/sst/config/mcu_int_example_config.py +@@ -0,0 +1,74 @@ ++import sst ++import os ++ ++DEBUG_L1 = 1 ++DEBUG_MEM = 1 ++DEBUG_LEVEL = 1 ++ ++clw = "32" ++ ++# Assume this is run from SimEng root dir ++simeng_path = os.getcwd() ++binary_file = simeng_path + "/share/dhrystone_rv32imc/memory.elf" # Apply the appropriate binary ++config_file = simeng_path + "/configs/DEMO_RISCV32_mcu_sst.yaml" ++ ++# Define the simulation components ++cpu = sst.Component("core", "sstsimeng.simengcore") ++cpu.addParams({ ++ "simeng_config_path": config_file, ++ "executable_path": binary_file, ++ "executable_args": "", ++ "clock" : "1GHz", ++ "max_addr_memory": 4*1024*1024*1024-1, ++ "cache_line_width": clw, ++ "source": "", ++ "assemble_with_source": False, ++ "heap": "", ++ "debug": False ++}) ++ ++iface = cpu.setSubComponent("memory", "memHierarchy.standardInterface") ++ ++l1cache = sst.Component("l1cache.mesi", "memHierarchy.Cache") ++l1cache.addParams({ ++ "access_latency_cycles" : "1", ++ "cache_frequency" : "1Ghz", ++ "replacement_policy" : "nmru", ++ "coherence_protocol" : "MESI", ++ "associativity" : "4", ++ "cache_line_size" : clw, ++ "debug" : DEBUG_L1, ++ "debug_level" : DEBUG_LEVEL, ++ "L1" : "1", ++ "cache_size" : "32KiB" ++}) ++ ++# Explicitly set the link subcomponents instead of having cache figure them out based on connected port names ++l1toC = l1cache.setSubComponent("cpulink", "memHierarchy.MemLink") ++l1toM = l1cache.setSubComponent("memlink", "memHierarchy.MemLink") ++ ++# Memory controller ++memctrl = sst.Component("memory", "memHierarchy.MemController") ++memctrl.addParams({ ++ "clock" : "1GHz", ++ "request_width" : clw, ++ "debug" : DEBUG_MEM, ++ "debug_level" : DEBUG_LEVEL, ++ "addr_range_end" : 4*1024*1024*1024-1, ++}) ++Mtol1 = memctrl.setSubComponent("cpulink", "memHierarchy.MemLink") ++ ++# Memory model ++memory = memctrl.setSubComponent("backend", "memHierarchy.simpleMem") ++memory.addParams({ ++ "access_time" : "10ns", ++ "mem_size" : "4GiB", ++ "request_width": clw ++}) ++ ++# Define the simulation links ++link_cpu_cache_link = sst.Link("link_cpu_cache_link") ++link_cpu_cache_link.connect( (iface, "port", "0ps"), (l1toC, "port", "0ps") ) ++link_mem_bus_link = sst.Link("link_mem_bus_link") ++link_mem_bus_link.connect( (l1toM, "port", "0ps"), (Mtol1, "port", "0ps") ) ++ +diff --git a/sst/include/SimEngCoreWrapper.hh b/sst/include/SimEngCoreWrapper.hh +index cb53c0f5..fc841949 100644 +--- a/sst/include/SimEngCoreWrapper.hh ++++ b/sst/include/SimEngCoreWrapper.hh +@@ -141,6 +141,8 @@ class SimEngCoreWrapper : public SST::Component { + /** Method used to assemble SimEng core. */ + void fabricateSimEngCore(); + ++ void processMemoryImage(); ++ + /** Method to split the passed executable argument's string into a vector of + * individual arguments. */ + std::vector splitArgs(std::string argString); +@@ -210,7 +212,7 @@ class SimEngCoreWrapper : public SST::Component { + std::shared_ptr dataMemory_; + + /** Number of clock iterations. */ +- int iterations_; ++ uint64_t iterations_; + + /** Start time of simulation. */ + std::chrono::high_resolution_clock::time_point startTime_; +diff --git a/sst/include/SimEngMemInterface.hh b/sst/include/SimEngMemInterface.hh +index 79789a9f..463d0dc9 100644 +--- a/sst/include/SimEngMemInterface.hh ++++ b/sst/include/SimEngMemInterface.hh +@@ -33,7 +33,7 @@ class SimEngMemInterface : public MemoryInterface { + bool debug); + /** Send SimEng's processImage to SST memory backend during `init` lifecycle + * phase of SST. */ +- void sendProcessImageToSST(char* image, uint64_t size); ++ void sendProcessImageToSST(char* image, uint64_t size, uint64_t startAddr=0); + + /** + * Construct an AggregatedReadRequest and use it to generate diff --git a/src/include/simeng/BranchPredictor.hh b/src/include/simeng/BranchPredictor.hh index 88be07dd3f..8d76f08753 100644 --- a/src/include/simeng/BranchPredictor.hh +++ b/src/include/simeng/BranchPredictor.hh @@ -46,6 +46,11 @@ class BranchPredictor { public: virtual ~BranchPredictor(){}; + /** Overload predict() with more information in parameters */ + virtual BranchPrediction predict(uint64_t address, BranchType type, + uint64_t knownTarget, uint8_t instByteLength) + = 0; + /** Generate a branch prediction for the specified instruction address with a * branch type and possible known target. */ virtual BranchPrediction predict(uint64_t address, BranchType type, diff --git a/src/include/simeng/CoreInstance.hh b/src/include/simeng/CoreInstance.hh index c8e151e884..e4d5b23248 100644 --- a/src/include/simeng/CoreInstance.hh +++ b/src/include/simeng/CoreInstance.hh @@ -16,6 +16,7 @@ #include "simeng/kernel/Linux.hh" #include "simeng/models/emulation/Core.hh" #include "simeng/models/inorder/Core.hh" +#include "simeng/models/mcu/Core.hh" #include "simeng/models/outoforder/Core.hh" #include "simeng/pipeline/A64FXPortAllocator.hh" #include "simeng/pipeline/BalancedPortAllocator.hh" @@ -37,7 +38,7 @@ uint32_t hex_[] = { namespace simeng { /** The available modes of simulation. */ -enum class SimulationMode { Emulation, InOrderPipelined, OutOfOrder }; +enum class SimulationMode { Emulation, InOrderPipelined, MCU, OutOfOrder }; /** A class to create a SimEng core instance from a supplied config. */ class CoreInstance { diff --git a/src/include/simeng/Elf.hh b/src/include/simeng/Elf.hh index 14bcddcb28..485debea60 100644 --- a/src/include/simeng/Elf.hh +++ b/src/include/simeng/Elf.hh @@ -2,6 +2,7 @@ #include #include +#include #include "simeng/span.hh" @@ -30,23 +31,85 @@ struct Elf32Header { uint32_t memorySize; }; +typedef struct { + unsigned char e_ident[16]; + uint16_t e_type; + uint16_t e_machine; + uint32_t e_version; + uint32_t e_entry; + uint32_t e_phoff; + uint32_t e_shoff; + uint32_t e_flags; + uint16_t e_ehsize; + uint16_t e_phentsize; + uint16_t e_phnum; + uint16_t e_shentsize; + uint16_t e_shnum; + uint16_t e_shstrndx; +} Elf32_Ehdr; + +typedef struct { + uint32_t p_type; + uint32_t p_offset; + uint32_t p_vaddr; + uint32_t p_paddr; + uint32_t p_filesz; + uint32_t p_memsz; + uint32_t p_flags; + uint32_t p_align; +} Elf32_Phdr; + +typedef struct { + uint32_t sh_name; + uint32_t sh_type; + uint32_t sh_flags; + uint32_t sh_addr; + uint32_t sh_offset; + uint32_t sh_size; + uint32_t sh_link; + uint32_t sh_info; + uint32_t sh_addralign; + uint32_t sh_entsize; +} Elf32_Shdr; + +typedef struct { + uint32_t st_name; + uint32_t st_value; + uint32_t st_size; + unsigned char st_info; + unsigned char st_other; + uint16_t st_shndx; +} Elf32_Sym; + +enum ElfPhType { + PT_NULL, + PT_LOAD +}; + +enum ElfShType { + SHT_NULL, + SHT_PROGBITS, + SHT_SYMTAB, + SHT_STRTAB +}; + /** A processed Executable and Linkable Format (ELF) file. */ class Elf { - public: - Elf(std::string path, char** imagePointer); - ~Elf(); - uint64_t getProcessImageSize() const; - bool isValid() const; - uint64_t getEntryPoint() const; - - private: - uint64_t entryPoint_; - std::vector headers_; - uint32_t entryPoint32_; - std::vector headers32_; - bool isValid_ = false; - uint64_t processImageSize_; - bool mode32bit_; + public: + Elf(std::string path, char** imagePointer, std::unordered_map& symbols); + ~Elf(); + uint64_t getProcessImageSize() const; + bool isValid() const; + uint64_t getEntryPoint() const; + + private: + uint64_t entryPoint_; + std::vector headers_; + uint32_t entryPoint32_; + std::vector headers32_; + bool isValid_ = false; + uint64_t processImageSize_; + bool mode32bit_; }; } // namespace simeng diff --git a/src/include/simeng/GenericPredictor.hh b/src/include/simeng/GenericPredictor.hh index 21df57a4a5..aff5ade8fe 100644 --- a/src/include/simeng/GenericPredictor.hh +++ b/src/include/simeng/GenericPredictor.hh @@ -26,6 +26,9 @@ class GenericPredictor : public BranchPredictor { GenericPredictor(YAML::Node config); ~GenericPredictor(); + BranchPrediction predict(uint64_t address, BranchType type, + uint64_t knownTarget, uint8_t byteLength) override; + /** Generate a branch prediction for the supplied instruction address, a * branch type, and a known target if not 0. Returns a branch direction and * branch target address. */ diff --git a/src/include/simeng/Instruction.hh b/src/include/simeng/Instruction.hh index 8b1cf2f9db..9ffc4a8d27 100644 --- a/src/include/simeng/Instruction.hh +++ b/src/include/simeng/Instruction.hh @@ -23,6 +23,9 @@ class Instruction { * instruction. */ bool exceptionEncountered() const; + /** Binds an interrupt to this instruction */ + virtual void raiseInterrupt(int16_t& interruptId) {} + /** Retrieve the source registers this instruction reads. */ virtual const span getOperandRegisters() const = 0; @@ -99,8 +102,8 @@ class Instruction { /** Retrieve branch type. */ virtual BranchType getBranchType() const = 0; - /** Retrieve a branch target from the instruction's metadata if known. */ - virtual uint64_t getKnownTarget() const = 0; + /** Retrieve an offset of branch target from the instruction's metadata if known. */ + virtual uint64_t getKnownOffset() const = 0; /** Is this a store address operation (a subcategory of store operations which * deal with the generation of store addresses to store data at)? */ @@ -178,6 +181,12 @@ class Instruction { /** Get arbitrary micro-operation index. */ int getMicroOpIndex() const; + bool isDiv() const; + + bool isMul() const; + + bool isSysCall() const; + protected: /** Whether an exception has been encountered. */ bool exceptionEncountered_ = false; @@ -208,8 +217,8 @@ class Instruction { /** What type of branch this instruction is. */ BranchType branchType_ = BranchType::Unknown; - /** If the branch target is known at the time of decode, store it. */ - uint64_t knownTarget_ = 0; + /** If the offset of branch target is known at the time of decode, store it. */ + uint64_t knownOffset_ = 0; // Flushing /** This instruction's sequence ID; a higher ID represents a chronologically @@ -252,6 +261,12 @@ class Instruction { /** An arbitrary index value for the micro-operation. Its use is based on the * implementation of specific micro-operations. */ int microOpIndex_; + + bool isMul_ = false; + + bool isDiv_ = false; + + bool isSysCall_ = false; }; } // namespace simeng \ No newline at end of file diff --git a/src/include/simeng/arch/Architecture.hh b/src/include/simeng/arch/Architecture.hh index edd404c827..29874c6d69 100644 --- a/src/include/simeng/arch/Architecture.hh +++ b/src/include/simeng/arch/Architecture.hh @@ -101,6 +101,9 @@ class Architecture { /** Returns the maximum size of a valid instruction in bytes. */ virtual uint8_t getMaxInstructionSize() const = 0; + /** Returns the minimum size of a valid instruction in bytes. */ + virtual uint8_t getMinInstructionSize() const = 0; + /** Returns the physical register structure as defined within the config * file */ @@ -113,7 +116,7 @@ class Architecture { YAML::Node config) const = 0; /** Updates System registers of any system-based timers. */ - virtual void updateSystemTimerRegisters(RegisterFileSet* regFile, + virtual int16_t updateSystemTimerRegisters(RegisterFileSet* regFile, const uint64_t iterations) const = 0; /** Update trace file */ diff --git a/src/include/simeng/arch/aarch64/Architecture.hh b/src/include/simeng/arch/aarch64/Architecture.hh index ad14dc1c0e..3c1ce27f59 100644 --- a/src/include/simeng/arch/aarch64/Architecture.hh +++ b/src/include/simeng/arch/aarch64/Architecture.hh @@ -51,6 +51,9 @@ class Architecture : public arch::Architecture { /** Returns the maximum size of a valid instruction in bytes. */ uint8_t getMaxInstructionSize() const override; + /** Returns the minimum size of a valid instruction in bytes. */ + uint8_t getMinInstructionSize() const override; + /** Returns the current vector length set by the provided configuration. */ uint64_t getVectorLength() const; @@ -59,7 +62,7 @@ class Architecture : public arch::Architecture { uint64_t getStreamingVectorLength() const; /** Updates System registers of any system-based timers. */ - void updateSystemTimerRegisters(RegisterFileSet* regFile, + int16_t updateSystemTimerRegisters(RegisterFileSet* regFile, const uint64_t iterations) const override; /** Returns the physical register structure as defined within the config file diff --git a/src/include/simeng/arch/aarch64/Instruction.hh b/src/include/simeng/arch/aarch64/Instruction.hh index 43d1bd4961..bffa3c627e 100644 --- a/src/include/simeng/arch/aarch64/Instruction.hh +++ b/src/include/simeng/arch/aarch64/Instruction.hh @@ -301,7 +301,7 @@ class Instruction : public simeng::Instruction { BranchType getBranchType() const override; /** Retrieve a branch target from the instruction's metadata if known. */ - uint64_t getKnownTarget() const override; + uint64_t getKnownOffset() const override; /** Is this a store address operation (a subcategory of store operations which * deal with the generation of store addresses to store data at)? */ diff --git a/src/include/simeng/arch/riscv/Architecture.hh b/src/include/simeng/arch/riscv/Architecture.hh index de6c76c71e..3bdb6287e9 100644 --- a/src/include/simeng/arch/riscv/Architecture.hh +++ b/src/include/simeng/arch/riscv/Architecture.hh @@ -6,27 +6,18 @@ #include #include "simeng/arch/Architecture.hh" -#include "simeng/arch/riscv/ExceptionHandler.hh" + #include "simeng/arch/riscv/Instruction.hh" #include "simeng/kernel/Linux.hh" using csh = size_t; +#include "simeng/arch/riscv/SystemRegister.hh" +#include "simeng/arch/riscv/ExceptionHandler.hh" + namespace simeng { namespace arch { namespace riscv { - -enum riscv_sysreg { - SYSREG_MSTATUS = 0x300, - SYSREG_MSTATUSH = 0x310, - SYSREG_MEPC = 0x341, - SYSREG_MCAUSE = 0x342, - SYSREG_MHARTID = 0xF14, - SYSREG_CYCLE = 0xC00, - SYSREG_TIME = 0xC01, - SYSREG_INSTRRET = 0xC02 -}; - struct constantsPool { const uint8_t alignMask = 0x3; const uint8_t alignMaskCompressed = 0x1; @@ -45,7 +36,7 @@ struct archConstants { /* A basic RISC-V implementation of the `Architecture` interface. */ class Architecture : public arch::Architecture { public: - Architecture(kernel::Linux& kernel, YAML::Node config); + Architecture(kernel::Linux& kernel, YAML::Node config, std::shared_ptr& dataMemory); ~Architecture(); /** Pre-decode instruction memory into a macro-op of `Instruction` * instances. Returns the number of bytes consumed to produce it (always 4), @@ -60,6 +51,9 @@ class Architecture : public arch::Architecture { /** Returns a zero-indexed register tag for a system register encoding. */ int32_t getSystemRegisterTag(uint16_t reg) const override; + /** Returns a System Register index from a system register tag. */ + uint16_t getSystemRegisterIdFromTag(int32_t tag) const; + /** Returns the number of system registers that have a mapping. */ uint16_t getNumSystemRegisters() const override; @@ -77,8 +71,11 @@ class Architecture : public arch::Architecture { /** Returns the maximum size of a valid instruction in bytes. */ uint8_t getMaxInstructionSize() const override; - /** Updates System registers of any system-based timers. */ - void updateSystemTimerRegisters(RegisterFileSet* regFile, + /** Returns the minimum size of a valid instruction in bytes. */ + uint8_t getMinInstructionSize() const override; + + /** Updates System registers of any system-based timers. Return +ve id if interrupt occurs */ + int16_t updateSystemTimerRegisters(RegisterFileSet* regFile, const uint64_t iterations) const override; /** Returns the physical register structure as defined within the config file @@ -117,6 +114,18 @@ class Architecture : public arch::Architecture { /** A mapping from system register encoding to a zero-indexed tag. */ std::unordered_map systemRegisterMap_; + /** Ordered map of memory mapped system regsiters banks **/ + std::map memoryMappedSystemRegisterBlocks; + + /* Memory Interface through which memory mapped system registers are accessed */ + std::shared_ptr systemRegisterMemoryInterface; + + /* Optional Clint block which replicates that functionality in spike */ + std::shared_ptr clint; + + /* Optional Host Target Interface block which replicates that functionality in spike */ + std::shared_ptr htif; + /** A map to hold the relationship between aarch64 instruction groups and * user-defined execution information. */ std::unordered_map groupExecutionInfo_; diff --git a/src/include/simeng/arch/riscv/ExceptionHandler.hh b/src/include/simeng/arch/riscv/ExceptionHandler.hh index 02d29c93bb..36cfd5d187 100644 --- a/src/include/simeng/arch/riscv/ExceptionHandler.hh +++ b/src/include/simeng/arch/riscv/ExceptionHandler.hh @@ -57,6 +57,9 @@ class ExceptionHandler : public simeng::arch::ExceptionHandler { */ bool readBufferThen(uint64_t ptr, uint64_t length, std::function then, bool firstCall = true); + + /** generate system register changes associated with taking an exception **/ + void takeException(uint64_t causecode); /** A data buffer used for reading data from memory. */ std::vector dataBuffer; diff --git a/src/include/simeng/arch/riscv/Instruction.hh b/src/include/simeng/arch/riscv/Instruction.hh index 3f023d28b6..60966ce044 100644 --- a/src/include/simeng/arch/riscv/Instruction.hh +++ b/src/include/simeng/arch/riscv/Instruction.hh @@ -48,7 +48,8 @@ enum class InstructionException { HypervisorCall, SecureMonitorCall, UnmappedSysReg, - NoAvailablePort + NoAvailablePort, + Interrupt }; enum CInstructionFormat { @@ -87,6 +88,18 @@ class Instruction : public simeng::Instruction { * processing this instruction. */ virtual InstructionException getException() const; + /** Raise an interrupt. */ + void raiseInterrupt(int16_t& interruptId) + { + interruptId_ = interruptId; + exceptionEncountered_ = true; + exception_ = InstructionException::Interrupt; + interruptId = -1; + } + + /** Get Id of this interrupr */ + int16_t getInterruptId() const { return interruptId_; } + /** Retrieve the source registers this instruction reads. */ const span getOperandRegisters() const override; @@ -139,8 +152,8 @@ class Instruction : public simeng::Instruction { /** Retrieve branch type. */ BranchType getBranchType() const override; - /** Retrieve a branch target from the instruction's metadata if known. */ - uint64_t getKnownTarget() const override; + /** Retrieve an offset of branch target from the instruction's metadata if known. */ + uint64_t getKnownOffset() const override; /** Is this a store address operation (a subcategory of store operations which * deal with the generation of store addresses to store data at)? */ @@ -186,6 +199,8 @@ class Instruction : public simeng::Instruction { /** ONLY valid after decode. Return regByteWidth */ uint8_t getArchRegWidth() const; + const Architecture& getArchitecture() const; + private: /** The maximum number of source registers any supported RISC-V instruction * can have. */ @@ -292,7 +307,9 @@ class Instruction : public simeng::Instruction { std::vector memoryData; /** Return integer register value, to support both 32-bit and 64-bit mode */ - int64_t getSignedInt(RegisterValue& value) const; + int64_t getSignedInt(RegisterValue& value) const; + + int16_t interruptId_; }; } // namespace riscv diff --git a/src/include/simeng/arch/riscv/SystemRegister.hh b/src/include/simeng/arch/riscv/SystemRegister.hh new file mode 100644 index 0000000000..0556156ef6 --- /dev/null +++ b/src/include/simeng/arch/riscv/SystemRegister.hh @@ -0,0 +1,229 @@ +#pragma once + +#include +#include +#include +#include + +#include "simeng/arch/Architecture.hh" + +#include "simeng/arch/riscv/Instruction.hh" +#include "simeng/kernel/Linux.hh" + +namespace simeng { +namespace arch { +namespace riscv { + +// Should probably move to Capstone + +enum riscv_sysreg { + SYSREG_MSTATUS = 0x300, + SYSREG_MIE = 0x304, + SYSREG_MTVEC = 0x305, + SYSREG_MSTATUSH = 0x310, + SYSREG_MSCRATCH = 0x340, + SYSREG_MEPC = 0x341, + SYSREG_MCAUSE = 0x342, + SYSREG_MHARTID = 0xF14, + SYSREG_MXCPTSC = 0xFC2, + SYSREG_CYCLE = 0xC00, + SYSREG_TIME = 0xC01, + SYSREG_INSTRRET = 0xC02 +}; + +enum riscv_causecode_enum { + CAUSE_IADDRESS_MISALIGN = 0, + CAUSE_IACCESS_FAULT = 1, + CAUSE_ILLEGAL_INSTRUCTION = 2, + CAUSE_BREAKPOINT = 3, + CAUSE_LDADDRESS_MISALIGN = 4, + CAUSE_LDACCESS_FAULT = 5, + CAUSE_STADDRESS_MISALIGN = 6, + CAUSE_STACCESS_FAULT = 7, + CAUSE_ECALL_FROM_M = 11 +}; + +enum class InterruptId { + HALT = 1, + TIMER = 7 +}; + +enum riscv_sysreg_masks { + MSTATUS_MIE_MASK = 0x8, + MSTATUS_MPIE_MASK = 0x80 +}; + +typedef uint16_t riscv_causecode; + +class MemoryMappedSystemRegister { + public: + MemoryMappedSystemRegister(const RegisterValue& val) : state(val) {} + bool size() { return state.size(); } + virtual void put(const RegisterValue& val) { state = val; } + virtual const RegisterValue& get() { return state; } + private: + RegisterValue state; +}; + +class MemoryMappedSystemRegisterBlock { + public: + MemoryMappedSystemRegisterBlock(size_t sz) : size_(sz) {} + size_t size() { return size_; } + virtual bool put(uint16_t, const RegisterValue&); + virtual bool get(uint16_t, RegisterValue&); + virtual void tick() {} + protected: + /** Ordered map of memory mapped system regsiters **/ + std::map memoryMappedSystemRegisters; + size_t size_; +}; + +class SystemRegisterMemoryInterface : public MemoryInterface { + public: + SystemRegisterMemoryInterface( + std::shared_ptr& dataMemory, + std::map& memoryMappedSystemRegisterBlocks + ) : + dataMemory_(dataMemory), + memoryMappedSystemRegisterBlocks_(memoryMappedSystemRegisterBlocks) + {} + + /** Request a read from the supplied target location. */ + virtual void requestRead(const MemoryAccessTarget& target, + uint64_t requestId = 0) + { + RegisterValue data(0,target.size); + if (getMemoryMappedSystemRegister(target.address, data)) + completedReads_.push_back({target, data, requestId}); + else + dataMemory_.get()->requestRead(target,requestId); + } + + /** Request a write of `data` to the target location. */ + virtual void requestWrite(const MemoryAccessTarget& target, + const RegisterValue& data) + { + if (!putMemoryMappedSystemRegister(target.address, data)) + dataMemory_.get()->requestWrite(target,data); + } + + /** Retrieve all completed read requests. */ + virtual const span getCompletedReads() const + { + if (completedReads_.empty()) + return dataMemory_.get()->getCompletedReads(); + else + return {const_cast(completedReads_.data()), completedReads_.size()}; + } + + /** Clear the completed reads. */ + virtual void clearCompletedReads() + { + if (completedReads_.empty()) + dataMemory_.get()->clearCompletedReads(); + else + completedReads_.clear(); + } + + /** Returns true if there are any oustanding memory requests in-flight. */ + virtual bool hasPendingRequests() const + { + return dataMemory_.get()->hasPendingRequests(); + } + + /** Tick the memory interface to allow it to process internal tasks. + * + * TODO: Move ticking out of the memory interface and into a central "memory + * system" covering a set of related interfaces. + */ + virtual void tick() + { + dataMemory_.get()->tick(); + } + + private : + /** Put/Get Memory Mapped Registers */ + bool putMemoryMappedSystemRegister(uint64_t address, const RegisterValue& value); + bool getMemoryMappedSystemRegister(uint64_t address, RegisterValue& value); + + std::shared_ptr dataMemory_; + + /** Address map of all system register blocks */ + std::map& memoryMappedSystemRegisterBlocks_; + + /** A vector containing all completed read requests. */ + std::vector completedReads_; +}; + +class Architecture; + +class HostTargetInterface : public MemoryMappedSystemRegisterBlock { + public: + enum { + PAYLOAD_OFFSET = 0, + DEVICEID_OFFSET = 4 + }; + + HostTargetInterface(Architecture& architecture) + : + MemoryMappedSystemRegisterBlock(8), + architecture_(architecture), + isHalted_(false) + { + memoryMappedSystemRegisters[PAYLOAD_OFFSET] = new MemoryMappedSystemRegister(static_cast(0)); + memoryMappedSystemRegisters[DEVICEID_OFFSET] = new MemoryMappedSystemRegister(static_cast(0)); + } + + bool put(uint16_t offset, const RegisterValue&value); + + int16_t updateSystemTimerRegisters(RegisterFileSet* regFile, const uint64_t iterations) { + if (isHalted_) + return static_cast(InterruptId::HALT); + return -1; + } + + private : + Architecture& architecture_; + bool isHalted_; +}; + +class Clint : public MemoryMappedSystemRegisterBlock { + public: + enum { + CLINT_BASE = 0x02000000, + CLINT_SIZE = 0x0000c000, + MTIMECMP_OFFSET = 0x4000, + MTIME_OFFSET = 0xbff8 + }; + + Clint(Architecture& architecture) + : + MemoryMappedSystemRegisterBlock(CLINT_SIZE), + architecture_(architecture), + mtime_(static_cast(0)), + mtimecmp_(static_cast(0)), + mtime_freq(100), + mtime_count(0), + last_tick(0) + { + memoryMappedSystemRegisters[MTIME_OFFSET] = &mtime_; + memoryMappedSystemRegisters[MTIMECMP_OFFSET] = &mtimecmp_; + } + + int16_t updateSystemTimerRegisters(RegisterFileSet* regFile, const uint64_t iterations); + + private : + Architecture& architecture_; + + MemoryMappedSystemRegister mtime_; + MemoryMappedSystemRegister mtimecmp_; + + uint32_t mtime_freq; + uint32_t mtime_count; + uint64_t last_tick; +}; + + +} // namespace riscv +} // namespace arch +} // namespace simeng diff --git a/src/include/simeng/kernel/Linux.hh b/src/include/simeng/kernel/Linux.hh index 0908d59006..635bd427d5 100644 --- a/src/include/simeng/kernel/Linux.hh +++ b/src/include/simeng/kernel/Linux.hh @@ -93,6 +93,8 @@ struct LinuxProcessState { std::vector fileDescriptorTable; /** Set of deallocated virtual file descriptors available for reuse. */ std::set freeFileDescriptors; + /** Pointer to LinuxProcess from which ProcessState derived*/ + const LinuxProcess* process; }; /** Fixed-width definition of 'rusage' (from ). */ @@ -236,6 +238,9 @@ class Linux { /** The maximum size of a filesystem path. */ static const size_t LINUX_PATH_MAX = 4096; + /** Lookup symbol value from table in elf file. */ + bool lookupSymbolValue(const std::string symbol, uint64_t& value); + private: /** Resturn correct Dirfd depending on given pathname abd dirfd given to * syscall. */ diff --git a/src/include/simeng/kernel/LinuxProcess.hh b/src/include/simeng/kernel/LinuxProcess.hh index 9796b52937..d6b2c4a967 100644 --- a/src/include/simeng/kernel/LinuxProcess.hh +++ b/src/include/simeng/kernel/LinuxProcess.hh @@ -77,6 +77,9 @@ class LinuxProcess { /** Check whether the process image was created successfully. */ bool isValid() const; + /** Lookup symbol value from table in elf file. */ + bool lookupSymbolValue(const std::string symbol, uint64_t& value) const; + private: /** The size of the stack, in bytes. */ const uint64_t STACK_SIZE; @@ -113,6 +116,8 @@ class LinuxProcess { /** Shared pointer to processImage. */ std::shared_ptr processImage_; + + std::unordered_map symbols_; }; } // namespace kernel diff --git a/src/include/simeng/models/emulation/Core.hh b/src/include/simeng/models/emulation/Core.hh index c4a4acc453..1db10d2381 100644 --- a/src/include/simeng/models/emulation/Core.hh +++ b/src/include/simeng/models/emulation/Core.hh @@ -108,6 +108,9 @@ class Core : public simeng::Core { /** The number of branches executed. */ uint64_t branchesExecuted_ = 0; + + /** Set to interruptId when interrupt occurs, otherwise -1 */ + int16_t interruptId_; }; } // namespace emulation diff --git a/src/include/simeng/models/mcu/Core.hh b/src/include/simeng/models/mcu/Core.hh new file mode 100644 index 0000000000..de6a53d3ca --- /dev/null +++ b/src/include/simeng/models/mcu/Core.hh @@ -0,0 +1,181 @@ +#pragma once + +#include + +#include "simeng/ArchitecturalRegisterFileSet.hh" +#include "simeng/Core.hh" +#include "simeng/FlatMemoryInterface.hh" +#include "simeng/pipeline_hi/DecodeUnit.hh" +#include "simeng/pipeline_hi/ExecuteUnit.hh" +#include "simeng/pipeline_hi/FetchUnit.hh" +#include "simeng/pipeline_hi/WritebackUnit.hh" +#include "simeng/pipeline_hi/StaticPredictor.hh" +#include "simeng/pipeline_hi/LoadStoreQueue.hh" +#include "simeng/pipeline_hi/RegDepMap.hh" + +#include "simeng/arch/riscv/Architecture.hh" + +namespace simeng { +namespace models { +namespace mcu { + +/** An entry in the reservation station. */ +struct dependencyEntry1 { + /** The instruction to execute. */ + std::shared_ptr uop; + + /** The operand waiting on a value. */ + uint16_t operandIndex; +}; + +/** A simple scalar in-order pipelined core model. */ +class Core : public simeng::Core { + public: + /** Construct a core model, providing an ISA and branch predictor to use, + * along with a pointer and size of instruction memory, and a pointer to + * process memory. */ + Core(MemoryInterface& instructionMemory, MemoryInterface& dataMemory, + uint64_t processMemorySize, uint64_t entryPoint, + const arch::Architecture& isa, BranchPredictor& branchPredictor, YAML::Node config); + + /** Tick the core. Ticks each of the pipeline stages sequentially, then ticks + * the buffers between them. Checks for and executes pipeline flushes at the + * end of each cycle. */ + void tick() override; + + /** Check whether the program has halted. */ + bool hasHalted() const override; + + /** Retrieve the architectural register file set. */ + const ArchitecturalRegisterFileSet& getArchitecturalRegisterFileSet() + const override; + + /** Retrieve the number of instructions retired. */ + uint64_t getInstructionsRetiredCount() const override; + + /** Retrieve the simulated nanoseconds elapsed since the core started. */ + uint64_t getSystemTimer() const override; + + /** Generate a map of statistics to report. */ + std::map getStats() const override; + + private: + /** Raise an exception to the core, providing the generating instruction. */ + void raiseException(const std::shared_ptr& instruction); + + /** Handle an exception raised during the cycle. */ + void handleException(); + + /** Load and supply memory data requested by an instruction. */ + void loadData(const std::shared_ptr& instruction); + /** Store data supplied by an instruction to memory. */ + void storeData(const std::shared_ptr& instruction); + + /** Forward operands to the most recently decoded instruction. */ + void forwardOperands(const span& destinations, + const span& values); + + bool canIssue(const std::shared_ptr& instruction); + void removeDep(const std::shared_ptr& instruction); + + /** Read pending registers for the most recently decoded instruction. */ + void readRegisters(); + + /** Process the active exception handler. */ + void processExceptionHandler(); + + /** Apply changes to the process state. */ + void applyStateChange(const arch::ProcessStateChange& change); + + /** Handle requesting/execution of a load instruction. */ + void handleLoad(const std::shared_ptr& instruction); + + void addInstrOrderQ(const std::shared_ptr& instruction); + bool removeInstrOrderQ(const std::shared_ptr& instruction); + + /** The process memory. */ + MemoryInterface& dataMemory_; + + /** A reference to the core's architecture. */ + const arch::Architecture& isa_; + + /** The core's register file set. */ + RegisterFileSet registerFileSet_; + + /** An architectural register file set, serving as a simple wrapper around the + * register file set. */ + ArchitecturalRegisterFileSet architecturalRegisterFileSet_; + + /** The process memory. */ + span processMemory; + + /** The buffer between fetch and decode. */ + pipeline_hi::PipelineBuffer fetchToDecodeBuffer_; + + /** The buffer between decode and execute. */ + pipeline_hi::PipelineBuffer> decodeToExecuteBuffer_; + + /** The buffer between execute and writeback. */ + std::vector>> + completionSlots_; + + /** The previously generated addresses. */ + std::queue previousAddresses_; + + /** The register dependency map. */ + pipeline_hi::RegDepMap regDepMap_; + + /** The fetch unit; fetches instructions from memory. */ + pipeline_hi::FetchUnit fetchUnit_; + + /** The decode unit; decodes instructions into uops and reads operands. */ + pipeline_hi::DecodeUnit decodeUnit_; + + /** The execute unit; executes uops and sends to writeback, also forwarding + * results. */ + pipeline_hi::ExecuteUnit executeUnit_; + + /** The writeback unit; writes uop results to the register files. */ + pipeline_hi::WritebackUnit writebackUnit_; + + pipeline_hi::LoadStoreQueue loadStoreQueue_; + + /** The number of times the pipeline has been flushed. */ + uint64_t flushes_ = 0; + + /** The number of times this core has been ticked. */ + uint64_t ticks_ = 0; + + uint64_t lastCommitTick_ = 0; + + /** Whether an exception was generated during the cycle. */ + bool exceptionGenerated_ = false; + + /** A pointer to the instruction responsible for generating the exception. */ + std::shared_ptr exceptionGeneratingInstruction_; + + /** Whether the core has halted. */ + bool hasHalted_ = false; + + /** The active exception handler. */ + std::shared_ptr exceptionHandler_; + + std::deque> inorderIQ_; + + void checkHalting(); + bool enableHaltCheck = false; + uint64_t maxStallCycleTimeout; + uint64_t maxSimCycleTimeout; + uint64_t maxInstrTimeout; + + /** Set to interruptId when interrupt occurs, otherwise -1 */ + int16_t interruptId_; + + /** Return interrupt id of the pending interrupt*/ + int16_t isInterruptPending(); + +}; + +} // namespace mcu +} // namespace models +} // namespace simeng diff --git a/src/include/simeng/pipeline_hi/DecodeUnit.hh b/src/include/simeng/pipeline_hi/DecodeUnit.hh new file mode 100644 index 0000000000..728dff88f6 --- /dev/null +++ b/src/include/simeng/pipeline_hi/DecodeUnit.hh @@ -0,0 +1,66 @@ +#pragma once + +#include +#include + +#include "simeng/arch/Architecture.hh" +#include "simeng/pipeline_hi/PipelineBuffer.hh" + +namespace simeng { +namespace pipeline_hi { + +/** A decode unit for a pipelined processor. Splits pre-decoded macro-ops into + * uops. */ +class DecodeUnit { + public: + /** Constructs a decode unit with references to input/output buffers and the + * current branch predictor. */ + DecodeUnit(PipelineBuffer& input, + PipelineBuffer>& output, + BranchPredictor& predictor, + std::function&)> canIssue); + + /** Ticks the decode unit. Breaks macro-ops into uops, and performs early + * branch misprediction checks. */ + void tick(); + + /** Check whether the core should be flushed this cycle. */ + bool shouldFlush() const; + + /** Retrieve the target instruction address associated with the most recently + * discovered misprediction. */ + uint64_t getFlushAddress() const; + + /** Retrieve the number of times that the decode unit requested a flush due to + * discovering a branch misprediction early. */ + uint64_t getEarlyFlushes() const; + + /** Clear the microOps_ queue. */ + void purgeFlushed(); + + private: + /** A buffer of macro-ops to split into uops. */ + PipelineBuffer& input_; + /** An internal buffer for storing one or more uops. */ + std::deque> microOps_; + /** A buffer for writing decoded uops into. */ + PipelineBuffer>& output_; + + /** A reference to the current branch predictor. */ + BranchPredictor& predictor_; + + /** Whether the core should be flushed after this cycle. */ + bool shouldFlush_; + + /** The target instruction address the PC should be updated to upon flush. */ + uint64_t pc_; + + /** The number of times that the decode unit requested a flush due to + * discovering a branch misprediction early. */ + uint64_t earlyFlushes_ = 0; + + std::function&)> canIssue_; +}; + +} // namespace pipeline_hi +} // namespace simeng diff --git a/src/include/simeng/pipeline_hi/DispatchIssueUnit.hh b/src/include/simeng/pipeline_hi/DispatchIssueUnit.hh new file mode 100644 index 0000000000..132358fd33 --- /dev/null +++ b/src/include/simeng/pipeline_hi/DispatchIssueUnit.hh @@ -0,0 +1,150 @@ +#pragma once + +#include +#include +#include +#include +#include +#include + +#include "simeng/Instruction.hh" +#include "simeng/pipeline_hi/PipelineBuffer.hh" +#include "simeng/pipeline_hi/PortAllocator.hh" +#include "yaml-cpp/yaml.h" + +namespace simeng { +namespace pipeline_hi { + +/** A reservation station issue port */ +struct ReservationStationPort { + /** Issue port this port maps to */ + uint16_t issuePort; + /** Queue of instructions that are ready to be + * issued */ + std::deque> ready; +}; + +/** A reservation station */ +struct ReservationStation { + /** Size of reservation station */ + uint16_t capacity; + /** Number of instructions that can be dispatched to this unit per cycle. */ + uint16_t dispatchRate; + /** Current number of non-stalled instructions + * in reservation station */ + uint16_t currentSize; + /** Issue ports belonging to reservation station */ + std::vector ports; +}; + +/** An entry in the reservation station. */ +struct dependencyEntry { + /** The instruction to execute. */ + std::shared_ptr uop; + /** The port to issue to. */ + uint16_t port; + /** The operand waiting on a value. */ + uint16_t operandIndex; +}; + +/** A dispatch/issue unit for an out-of-order pipelined processor. Reads + * instruction operand and performs scoreboarding. Issues instructions to the + * execution unit once ready. */ +class DispatchIssueUnit { + public: + /** Construct a dispatch/issue unit with references to input/output buffers, + * the register file, the port allocator, and a description of the number of + * physical registers the scoreboard needs to reflect. */ + DispatchIssueUnit( + PipelineBuffer>& fromRename, + std::vector>>& issuePorts, + const RegisterFileSet& registerFileSet, PortAllocator& portAllocator, + const std::vector& physicalRegisterStructure, + YAML::Node config); + + /** Ticks the dispatch/issue unit. Reads available input operands for + * instructions and sets scoreboard flags for destination registers. */ + void tick(); + + /** Identify the oldest ready instruction in the reservation station and issue + * it. */ + void issue(); + + /** Forwards operands and performs register reads for the currently queued + * instruction. */ + void forwardOperands(const span& destinations, + const span& values); + + /** Set the scoreboard entry for the provided register as ready. */ + void setRegisterReady(Register reg); + + /** Clear the RS of all flushed instructions. */ + void purgeFlushed(); + + /** Retrieve the number of cycles this unit stalled due to insufficient RS + * space. */ + uint64_t getRSStalls() const; + + /** Retrieve the number of cycles no instructions were issued due to an empty + * RS. */ + uint64_t getFrontendStalls() const; + + /** Retrieve the number of cycles no instructions were issued due to + * dependencies or a lack of available ports. */ + uint64_t getBackendStalls() const; + + /** Retrieve the number of times an instruction was unable to issue due to a + * busy port. */ + uint64_t getPortBusyStalls() const; + + /** Retrieve the current sizes and capacities of the reservation stations*/ + void getRSSizes(std::vector&) const; + + private: + /** A buffer of instructions to dispatch and read operands for. */ + PipelineBuffer>& input_; + + /** Ports to the execution units, for writing ready instructions to. */ + std::vector>>& issuePorts_; + + /** A reference to the physical register file set. */ + const RegisterFileSet& registerFileSet_; + + /** The register availability scoreboard. */ + std::vector> scoreboard_; + + /** Reservation stations */ + std::vector reservationStations_; + + /** A mapping from port to RS port */ + std::vector> portMapping_; + + /** A dependency matrix, containing all the instructions waiting on an + * operand. For a register `{type,tag}`, the vector of dependents may be found + * at `dependencyMatrix[type][tag]`. */ + std::vector>> dependencyMatrix_; + + /** A map to collect flushed instructions for each reservation station. */ + std::unordered_map>> + flushed_; + + /** A reference to the execution port allocator. */ + PortAllocator& portAllocator_; + + /** The number of cycles stalled due to a full reservation station. */ + uint64_t rsStalls_ = 0; + + /** The number of cycles no instructions were issued due to an empty RS. */ + uint64_t frontendStalls_ = 0; + + /** The number of cycles no instructions were issued due to dependencies or a + * lack of available ports. */ + uint64_t backendStalls_ = 0; + + /** The number of times an instruction was unable to issue due to a busy port. + */ + uint64_t portBusyStalls_ = 0; +}; + +} // namespace pipeline_hi +} // namespace simeng diff --git a/src/include/simeng/pipeline_hi/ExecuteUnit.hh b/src/include/simeng/pipeline_hi/ExecuteUnit.hh new file mode 100644 index 0000000000..da51db3480 --- /dev/null +++ b/src/include/simeng/pipeline_hi/ExecuteUnit.hh @@ -0,0 +1,147 @@ +#pragma once + +#include +#include + +#include "simeng/BranchPredictor.hh" +#include "simeng/Instruction.hh" +#include "simeng/pipeline_hi/PipelineBuffer.hh" + +namespace simeng { +namespace pipeline_hi { + +/** An execution unit pipeline entry, containing an instruction, and an + * indication of when it's reached the front of the execution pipeline. */ +struct ExecutionUnitPipelineEntry { + /** The instruction queued for execution. */ + std::shared_ptr insn; + /** The tick number this instruction will reach the front of the queue at. */ + uint64_t readyAt; +}; + +/** An execute unit for a pipelined processor. Executes instructions and + * forwards results. */ +class ExecuteUnit { + public: + /** Constructs an execute unit with references to an input and output buffer, + * the currently used branch predictor, and handlers for forwarding operands, + * loads/stores, and exceptions. */ + ExecuteUnit( + PipelineBuffer>& input, + PipelineBuffer>& output, + std::function, span)> forwardOperands, + std::function&)> handleLoad, + std::function&)> handleStore, + std::function&)> raiseException, + std::function&)> addInstrOrderQ, + std::function isInterruptPending, + BranchPredictor& predictor, bool pipelined = true, + const std::vector& blockingGroups = {}); + + /** Tick the execute unit. Places incoming instructions into the pipeline and + * executes an instruction that has reached the head of the pipeline, if + * present. */ + void tick(); + + /** Query whether a branch misprediction was discovered this cycle. */ + bool shouldFlush() const; + + /** Retrieve the target instruction address associated with the most recently + * discovered misprediction. */ + uint64_t getFlushAddress() const; + + /** Retrieve the sequence ID associated with the most recently discovered + * misprediction. */ + uint64_t getFlushSeqId() const; + + /** Purge flushed instructions from the internal pipeline and clear any active + * stall, if applicable. */ + void purgeFlushed(); + + /** Retrieve the number of branch instructions that have been executed. */ + uint64_t getBranchExecutedCount() const; + + /** Retrieve the number of branch mispredictions. */ + uint64_t getBranchMispredictedCount() const; + + /** Retrieve the number of active execution cycles. */ + uint64_t getCycles() const; + + private: + /** Execute the supplied uop, write it into the output buffer, and forward + * results back to dispatch/issue. */ + void execute(std::shared_ptr& uop); + + /** A buffer of instructions to execute. */ + PipelineBuffer>& input_; + + /** A buffer for writing executed instructions into. */ + PipelineBuffer>& output_; + + /** A function handle called when forwarding operands. */ + std::function, span)> forwardOperands_; + + /** A function handle called after generating the addresses for a load. */ + std::function&)> handleLoad_; + /** A function handle called after acquiring the data for a store. */ + std::function&)> handleStore_; + + /** A function handle called upon exception generation. */ + std::function&)> raiseException_; + + /** A function to add the executed instruction into an ordering queue. */ + std::function&)> addInstrOrderQ_; + + /** Check if any interrupts are pending */ + std::function isInterruptPending_; + + /** A reference to the branch predictor, for updating with prediction results. + */ + BranchPredictor& predictor_; + + /** Whether this unit is pipelined, or if all instructions should stall until + * complete. */ + bool pipelined_; + + /** The execution unit's internal pipeline, holding instructions until their + * execution latency has expired and they are ready for their final results to + * be calculated and forwarded. */ + std::deque pipeline_; + + /** A group of operation types that are blocked whilst a similar operation + * is being executed. */ + std::vector blockingGroups_; + + /** A queue to hold blocked instructions of a similar group type to + * blockingGroup_. */ + std::deque> operationsStalled_; + + /** Whether the core should be flushed after this cycle. */ + bool shouldFlush_ = false; + + /** The target instruction address the PC should be reset to after this cycle. + */ + uint64_t pc_; + + /** The sequence ID of the youngest instruction that should remain after the + * current flush. */ + uint64_t flushAfter_; + + /** The number of times this unit has been ticked. */ + uint64_t tickCounter_ = 0; + + /** The cycle this unit will become unstalled. */ + uint64_t stallUntil_ = 0; + + /** The number of branch instructions that were executed. */ + uint64_t branchesExecuted_ = 0; + + /** The number of branch mispredictions that were observed. */ + uint64_t branchMispredicts_ = 0; + + /** The number of active execution cycles that were observed. */ + uint64_t cycles_ = 0; +}; + +} // namespace pipeline_hi +} // namespace simeng diff --git a/src/include/simeng/pipeline_hi/FetchUnit.hh b/src/include/simeng/pipeline_hi/FetchUnit.hh new file mode 100644 index 0000000000..1c8f40c212 --- /dev/null +++ b/src/include/simeng/pipeline_hi/FetchUnit.hh @@ -0,0 +1,127 @@ +#pragma once + +#include + +#include "simeng/MemoryInterface.hh" +#include "simeng/arch/Architecture.hh" +#include "simeng/pipeline_hi/PipelineBuffer.hh" + +namespace simeng { +namespace pipeline_hi { + +/** The various states of the loop buffer. */ +enum class LoopBufferState { + IDLE = 0, // No operations + WAITING, // Waiting to find boundary instruction in fetch stream + FILLING, // Filling loop buffer with loop body + SUPPLYING // Feeding loop buffer content to output buffer +}; + +// Struct to hold information about a fetched instruction +struct loopBufferEntry { + // Encoding of the instruction + const uint64_t encoding; + + // Size of the instruction + const uint16_t instructionSize; + + // PC of the instruction + const uint64_t address; + + // Branch prediction made for instruction + const BranchPrediction prediction; +}; + +/** A fetch and pre-decode unit for a pipelined processor. Responsible for + * reading instruction memory and maintaining the program counter. */ +class FetchUnit { + public: + /** Construct a fetch unit with a reference to an output buffer, the ISA, and + * the current branch predictor, and information on the instruction memory. */ + FetchUnit(PipelineBuffer& output, MemoryInterface& instructionMemory, + uint64_t programByteLength, uint64_t entryPoint, uint8_t blockSize, + const arch::Architecture& isa, BranchPredictor& branchPredictor); + + ~FetchUnit(); + + /** Tick the fetch unit. Retrieves and pre-decodes the instruction at the + * current program counter. */ + void tick(); + + /** Function handle to retrieve branch that represents loop boundary. */ + void registerLoopBoundary(uint64_t branchAddress); + + /** Check whether the program has ended. Returns `true` if the current PC is + * outside of instruction memory. */ + bool hasHalted() const; + + /** Update the program counter to the specified address. */ + void updatePC(uint64_t address); + + /** Request instructions at the current program counter for a future cycle. */ + void requestFromPC(); + + /** Retrieve the number of cycles fetch terminated early due to a predicted + * branch. */ + uint64_t getBranchStalls() const; + + /** Clear the loop buffer. */ + void flushLoopBuffer(); + + /** */ + void flushPredictor(uint64_t address); + + private: + /** An output buffer connecting this unit to the decode unit. */ + PipelineBuffer& output_; + + /** The current program counter. */ + uint64_t pc_ = 0; + + /** An interface to the instruction memory. */ + MemoryInterface& instructionMemory_; + + /** The length of the available instruction memory. */ + uint64_t programByteLength_; + + /** Reference to the currently used ISA. */ + const arch::Architecture& isa_; + + /** Reference to the current branch predictor. */ + BranchPredictor& branchPredictor_; + + /** A loop buffer to supply a detected loop instruction stream. */ + std::deque loopBuffer_; + + /** State of the loop buffer. */ + LoopBufferState loopBufferState_ = LoopBufferState::IDLE; + + /** The branch instruction that forms the loop. */ + uint64_t loopBoundaryAddress_ = 0; + + /** The current program halt state. Set to `true` when the PC leaves the + * instruction memory region, and set back to `false` if the PC is returned to + * the instruction region. */ + bool hasHalted_ = false; + + bool waitSCEval_ = false; + + /** The number of cycles fetch terminated early due to a predicted branch. */ + uint64_t branchStalls_ = 0; + + /** The size of a fetch block, in bytes. */ + uint8_t blockSize_; + + /** A mask of the bits of the program counter to use for obtaining the block + * address to fetch. */ + uint64_t blockMask_; + + /** The buffer used to hold fetched instruction data. */ + uint8_t* fetchBuffer_; + + /** The amount of data currently in the fetch buffer. */ + uint8_t bufferedBytes_ = 0; +}; + +} // namespace pipeline_hi +} // namespace simeng diff --git a/src/include/simeng/pipeline_hi/LoadStoreQueue.hh b/src/include/simeng/pipeline_hi/LoadStoreQueue.hh new file mode 100644 index 0000000000..211b1ef72d --- /dev/null +++ b/src/include/simeng/pipeline_hi/LoadStoreQueue.hh @@ -0,0 +1,235 @@ +#pragma once + +#include +#include +#include +#include +#include + +#include "simeng/Instruction.hh" +#include "simeng/MemoryInterface.hh" +#include "simeng/pipeline_hi/PipelineBuffer.hh" + +namespace simeng { +namespace pipeline_hi { + +/** The memory access types which are processed. */ +enum accessType { LOAD = 0, STORE }; + +/** A requestQueue_ entry. */ +struct requestEntry { + /** The memory address(es) to be accessed. */ + std::queue reqAddresses; + /** The instruction sending the request(s). */ + std::shared_ptr insn; +}; +/** A requestQueue_ entry. */ +struct requestEntry1 { + /** The memory address(es) to be accessed. */ + std::queue reqAddresses; + /** The memory address(es) to be accessed. */ + std::queue data; + /** The instruction sending the request(s). */ + std::shared_ptr insn; + accessType type; + uint64_t reqtick; + bool isMisAligned; +}; +/** A load store queue (known as "load/store buffers" or "memory order buffer"). + * Holds in-flight memory access requests to ensure load/store consistency. */ +class LoadStoreQueue { + public: + /** Constructs a combined load/store queue model, simulating a shared queue + * for both load and store instructions, supplying completion slots for loads + * and an operand forwarding handler. */ + LoadStoreQueue( + unsigned int maxCombinedSpace, MemoryInterface& memory, + span>> completionSlots, + std::function, span)> forwardOperands, + bool exclusive = false, uint16_t loadBandwidth = UINT16_MAX, + uint16_t storeBandwidth = UINT16_MAX, + uint16_t permittedRequests = UINT16_MAX, + uint16_t permittedLoads = UINT16_MAX, + uint16_t permittedStores = UINT16_MAX); + + /** Constructs a split load/store queue model, simulating discrete queues for + * load and store instructions, supplying completion slots for loads and an + * operand forwarding handler. */ + LoadStoreQueue( + unsigned int maxLoadQueueSpace, unsigned int maxStoreQueueSpace, + MemoryInterface& memory, + span>> completionSlots, + std::function, span)> forwardOperands, + bool exclusive = false, uint16_t loadBandwidth = UINT16_MAX, + uint16_t storeBandwidth = UINT16_MAX, + uint16_t permittedRequests = UINT16_MAX, + uint16_t permittedLoads = UINT16_MAX, + uint16_t permittedStores = UINT16_MAX); + + /** Retrieve the available space for load uops. For combined queue this is the + * total remaining space. */ + unsigned int getLoadQueueSpace() const; + + /** Retrieve the available space for store uops. For a combined queue this is + * the total remaining space. */ + unsigned int getStoreQueueSpace() const; + + /** Retrieve the available space for any memory uops. For a split queue this + * is the sum of the space in both queues. */ + unsigned int getTotalSpace() const; + + /** Add a load uop to the queue. */ + void addLoad(const std::shared_ptr& insn); + + /** Add a store uop to the queue. */ + void addStore(const std::shared_ptr& insn); + + /** Add the load instruction's memory requests to the requestQueue_. */ + void startLoad(const std::shared_ptr& insn); + + /** Supply the data to be stored by a store operation. */ + void supplyStoreData(const std::shared_ptr& insn); + + /** Commit and write the oldest store instruction to memory, removing it from + * the store queue. Returns `true` if memory disambiguation has discovered a + * memory order violation during the commit. */ + bool commitStore(const std::shared_ptr& uop); + + /** Remove the oldest load instruction from the load queue. */ + void commitLoad(const std::shared_ptr& uop); + + /** Remove all flushed instructions from the queues. */ + void purgeFlushed(); + + /** Whether this is a combined load/store queue. */ + bool isCombined() const; + + /** Process received load data and send any completed loads for writeback. */ + void tick(); + + /** Retrieve the load instruction associated with the most recently discovered + * memory order violation. */ + std::shared_ptr getViolatingLoad() const; + + void processResponse(); + + bool activeMisAlignedOpr() const; + + bool isBusy() const; + + float getAvgLdLat() const { return (totalLdLatency)/numLoads; }; + + uint32_t getMaxLdLat() const { return maxLdLatency; }; + uint32_t getMinLdLat() const { return minLdLatency; }; + + private: + /** The load queue: holds in-flight load instructions. */ + std::deque> loadQueue_; + + /** The store queue: holds in-flight store instructions with its associated + * data. */ + std::deque, + span>> + storeQueue_; + + /** Slots to write completed load instructions into for writeback. */ + span>> completionSlots_; + + /** Map of loads that have requested their data, keyed by sequence ID. */ + std::unordered_map> requestedLoads_; + + /** Map of loads that have requested their data, keyed by sequence ID. */ + std::unordered_map latencyLoads_; + + /** A function handler to call to forward the results of a completed load. */ + std::function, span)> forwardOperands_; + + /** The maximum number of loads that can be in-flight. Undefined if this + * is a combined queue. */ + unsigned int maxLoadQueueSpace_; + + /** The maximum number of stores that can be in-flight. Undefined if this is a + * combined queue. */ + unsigned int maxStoreQueueSpace_; + + /** The maximum number of memory ops that can be in-flight. Undefined if this + * is a split queue. */ + unsigned int maxCombinedSpace_; + + /** Whether this queue is combined or split. */ + bool combined_; + + /** Retrieve the load queue space for a split queue. */ + unsigned int getLoadQueueSplitSpace() const; + + /** Retrieve the store queue space for a split queue. */ + unsigned int getStoreQueueSplitSpace() const; + + /** Retrieve the total memory uop space available for a combined queue. */ + unsigned int getCombinedSpace() const; + + /** A pointer to process memory. */ + MemoryInterface& memory_; + + /** The load instruction associated with the most recently discovered memory + * order violation. */ + std::shared_ptr violatingLoad_ = nullptr; + + /** The number of times this unit has been ticked. */ + uint64_t tickCounter_ = 0; + + /** A map to hold load instructions that are stalled due to a detected + * memory reordering confliction. First key is a store's sequence id and the + * second key the conflicting address. The value takes the form of a vector of + * pairs containing a pointer to the conflicted load and the size of the data + * needed at that address by the load. */ + std::unordered_map< + uint64_t, + std::unordered_map< + uint64_t, + std::vector, uint16_t>>>> + conflictionMap_; + + /** A map between LSQ cycles and load requests ready on that cycle. */ + std::map> requestLoadQueue_; + + /** A map between LSQ cycles and store requests ready on that cycle. */ + std::map> requestStoreQueue_; + + /** A queue of completed loads ready for writeback. */ + std::queue> completedLoads_; + + /** Whether the LSQ can only process loads xor stores within a cycle. */ + bool exclusive_; + + /** The amount of data readable from the L1D cache per cycle. */ + uint16_t loadBandwidth_; + + /** The amount of data writable to the L1D cache per cycle. */ + uint16_t storeBandwidth_; + + /** The combined limit of loads and store requests permitted per cycle. */ + uint16_t totalLimit_; + + /** The number of loads and stores permitted per cycle. */ + std::array reqLimits_; + + /** A map between LSQ cycles and load or store requests ready on that cycle. */ + std::deque requestQueue_; + + /* Identifier for request to memory*/ + uint8_t busReqId = 0; + + //bool activeMisAlignedStore = false; + + //Stats + uint64_t numLoads = 0; + double totalLdLatency = 0; + uint32_t maxLdLatency = 0; + uint32_t minLdLatency = 0xFFFF; + float averageAccessLdLatency = 0.0; +}; + + +} // namespace pipeline_hi +} // namespace simeng diff --git a/src/include/simeng/pipeline_hi/PipelineBuffer.hh b/src/include/simeng/pipeline_hi/PipelineBuffer.hh new file mode 100644 index 0000000000..dd2ed70ce7 --- /dev/null +++ b/src/include/simeng/pipeline_hi/PipelineBuffer.hh @@ -0,0 +1,107 @@ +#pragma once + +#include +#include +#include + +namespace simeng { +namespace pipeline_hi { + +/** A tickable pipelined buffer. Values are shifted from the tail slot to the + * head slot each time `tick()` is called. */ +template +class PipelineBuffer { + public: + /** Construct a pipeline buffer of width `width`, and fill all slots with + * `initialValue`. */ + PipelineBuffer(int width, const T& initialValue) + : width(width), buffer(width * defaultLength_, initialValue), + length_(defaultLength_), headIndex_(defaultLength_-1), + tailIndex_(0) {} + + PipelineBuffer(int width, const T& initialValue, int length) + : width(width), buffer(width * length, initialValue), length_(length), + headIndex_(length_-1), tailIndex_(0) { + assert(length_ != 0 && "Pipeline buffer length cannot be 0"); + } + + /** Tick the buffer and move head/tail pointers, or do nothing if it's + * stalled. */ + void tick() { + if (isStalled_) return; + + //length ==1 shortcut? condition check cost + + if (headIndex_) { // when headIndex != 0 + headIndex_--; + } else { + headIndex_ = length_ - 1; + } + if (tailIndex_) { // when tailIndex != 0 + tailIndex_--; + } else { + tailIndex_ = length_ - 1; + } + } + + /** Get a tail slots pointer. */ + T* getTailSlots() { + T* ptr = buffer.data(); + return &ptr[tailIndex_ * width]; + } + + /** Get a const tail slots pointer. */ + const T* getTailSlots() const { + const T* ptr = buffer.data(); + return &ptr[tailIndex_ * width]; + } + + /** Get a head slots pointer. */ + T* getHeadSlots() { + T* ptr = buffer.data(); + return &ptr[headIndex_ * width]; + } + + /** Get a const head slots pointer. */ + const T* getHeadSlots() const { + const T* ptr = buffer.data(); + return &ptr[headIndex_ * width]; + } + + /** Check if the buffer is stalled. */ + bool isStalled() const { return isStalled_; } + + /** Set the buffer's stall flag to `stalled`. */ + void stall(bool stalled) { isStalled_ = stalled; } + + /** Fill the buffer with a specified value. */ + void fill(const T& value) { std::fill(buffer.begin(), buffer.end(), value); } + + /** Get the width of the buffer slots. */ + unsigned short getWidth() const { return width; } + + private: + /** The width of each row of slots. */ + unsigned short width; + + /** The buffer. */ + std::vector buffer; + + /** Whether the buffer is stalled or not. */ + bool isStalled_ = false; + + /** Buffer length */ + const unsigned int length_; + + /** */ + unsigned int headIndex_; + + /** */ + unsigned int tailIndex_; + + /** The number of stages in the pipeline. */ + static const unsigned int defaultLength_ = 2; +}; + +} // namespace pipeline_hi +} // namespace simeng diff --git a/src/include/simeng/pipeline_hi/PipelineBuffer1.hh b/src/include/simeng/pipeline_hi/PipelineBuffer1.hh new file mode 100644 index 0000000000..dfb465a33c --- /dev/null +++ b/src/include/simeng/pipeline_hi/PipelineBuffer1.hh @@ -0,0 +1,133 @@ +#pragma once + +#include +#include +#include + +namespace simeng { +namespace pipeline_hi { + +// TODO: Extend to allow specifying the number of cycles it will take for +// information to move from tail to head (currently fixed at 1 by +// implementation) + +/** A tickable pipelined buffer. Values are shifted from the tail slot to the + * head slot each time `tick()` is called. */ +template +class PipelineBuffer { + public: + /** Construct a pipeline buffer of width `width`, and fill all slots with + * `initialValue`. */ + PipelineBuffer(int width, const T& initialValue) + : width(width), buffer(width * defaultLength_, initialValue), + length_(defaultLength_) {} + + //TODO:currently length > 2 is not working, oscillate between 0 and 1 + PipelineBuffer(int width, const T& initialValue, int length) + : width(width), buffer(width * length, initialValue), length_(length), + useDefaultLength_(false) { + assert(length_ != 0 && "Pipeline buffer length cannot be 0"); + } + + /** Tick the buffer and move head/tail pointers, or do nothing if it's + * stalled. */ + void tick() { + if (useDefaultLength_) { + if (isStalled_) return; + + headIsStart = !headIsStart; + } else { + if (length_ == 1) { + return; + } else if (length_ > 2) { + //TODO + } + } + } + + /** Get a tail slots pointer. */ + T* getTailSlots() { + T* ptr = buffer.data(); + if (useDefaultLength_) { + return &ptr[headIsStart * width]; + } else { + if (length_ == 1) { + return &ptr[0]; + } + } + } + + /** Get a const tail slots pointer. */ + const T* getTailSlots() const { + const T* ptr = buffer.data(); + if (useDefaultLength_) { + return &ptr[headIsStart * width]; + } else { + if (length_ == 1) { + return &ptr[0]; + } + } + } + + + /** Get a head slots pointer. */ + T* getHeadSlots() { + T* ptr = buffer.data(); + if (useDefaultLength_) { + return &ptr[!headIsStart * width]; + } else { + if (length_ == 1) { + return &ptr[0]; + } + } + } + + /** Get a const head slots pointer. */ + const T* getHeadSlots() const { + const T* ptr = buffer.data(); + if (useDefaultLength_) { + return &ptr[!headIsStart * width]; + } else { + if (length_ == 1) { + return &ptr[0]; + } + } + } + + /** Check if the buffer is stalled. */ + bool isStalled() const { return isStalled_; } + + /** Set the buffer's stall flag to `stalled`. */ + void stall(bool stalled) { isStalled_ = stalled; } + + /** Fill the buffer with a specified value. */ + void fill(const T& value) { std::fill(buffer.begin(), buffer.end(), value); } + + /** Get the width of the buffer slots. */ + unsigned short getWidth() const { return width; } + + private: + /** The width of each row of slots. */ + unsigned short width; + + /** The buffer. */ + std::vector buffer; + + /** The offset of the head pointer; either 0 or 1. */ + bool headIsStart = 0; + + /** Whether the buffer is stalled or not. */ + bool isStalled_ = false; + + /** Buffer length */ + const unsigned int length_; + + /** True if using default length (== 2) */ + bool useDefaultLength_ = true; + + /** The number of stages in the pipeline. */ + static const unsigned int defaultLength_ = 2; +}; + +} // namespace pipeline_hi +} // namespace simeng diff --git a/src/include/simeng/pipeline_hi/PortAllocator.hh b/src/include/simeng/pipeline_hi/PortAllocator.hh new file mode 100644 index 0000000000..bc985c0aaa --- /dev/null +++ b/src/include/simeng/pipeline_hi/PortAllocator.hh @@ -0,0 +1,43 @@ +#pragma once + +#include +#include + +namespace simeng { +namespace pipeline_hi { + +namespace PortType { +/** Instructions have to match the exact group(s) in set. */ +const uint8_t COMPULSORY = 0; +/** Instructions can optional match group(s) in set. */ +const uint8_t OPTIONAL = 1; +} // namespace PortType + +/** An abstract execution port allocator interface. */ +class PortAllocator { + public: + virtual ~PortAllocator(){}; + + /** Allocate a port for the specified instruction group; returns the allocated + * port. */ + virtual uint16_t allocate(const std::vector& ports) = 0; + + /** Inform the allocator that an instruction was issued to the specified port. + */ + virtual void issued(uint16_t port) = 0; + + /** Inform the allocator that an instruction will not issue to its + * allocated port. */ + virtual void deallocate(uint16_t port) = 0; + + /** Set function from DispatchIssueUnit to retrieve reservation + * station sizes during execution. */ + virtual void setRSSizeGetter( + std::function&)> rsSizes) = 0; + + /** Tick the port allocator to allow it to process internal tasks. */ + virtual void tick() = 0; +}; + +} // namespace pipeline_hi +} // namespace simeng diff --git a/src/include/simeng/pipeline_hi/RegDepMap.hh b/src/include/simeng/pipeline_hi/RegDepMap.hh new file mode 100644 index 0000000000..7145fd1903 --- /dev/null +++ b/src/include/simeng/pipeline_hi/RegDepMap.hh @@ -0,0 +1,57 @@ +#pragma once + +#include +#include +#include +#include + +#include "simeng/Instruction.hh" + +namespace simeng { +namespace pipeline_hi { + +typedef std::shared_ptr InstrPtr; +class RegDepMap +{ + public: + RegDepMap(const std::vector registerFileStructures, + const RegisterFileSet& registerFileSet); + ~RegDepMap(); + + /** Clear the Entire Map */ + void clear(); + + /** Insert all of a instruction's destination registers into map*/ + void insert(InstrPtr instr); + + /** Remove all of a instruction's destination registers into map*/ + void remove(InstrPtr instr); + + /** Is the current instruction able to read from this + * destination register? + */ + bool canRead(InstrPtr instr); + + /** Is the current instruction able to write to this + * destination register? + */ + bool canWrite(InstrPtr instr); + + /* Is there any instr that can forward the data for this instr. If yes, set + * the data*/ + bool canForward(InstrPtr instr); + + void purgeFlushed(); + + void dump(); + + private: + const std::vector registerFileStructures_; + const RegisterFileSet& registerFileSet_; + typedef std::vector > DepMap; + std::vector regMap_; + uint32_t outstandingDep_ = 0; +}; + +} // namespace pipeline_hi +} // namespace simeng diff --git a/src/include/simeng/pipeline_hi/RegisterAliasTable.hh b/src/include/simeng/pipeline_hi/RegisterAliasTable.hh new file mode 100644 index 0000000000..1b2327fc52 --- /dev/null +++ b/src/include/simeng/pipeline_hi/RegisterAliasTable.hh @@ -0,0 +1,69 @@ +#pragma once + +#include + +#include "simeng/RegisterFileSet.hh" + +namespace simeng { +namespace pipeline_hi { + +/** A Register Alias Table (RAT) implementation. Contains information on + * the current register renaming state. */ +class RegisterAliasTable { + public: + /** Construct a RAT, supplying a description of the architectural register + * structure, and the corresponding numbers of physical registers that should + * be available. */ + RegisterAliasTable(std::vector architecturalStructure, + std::vector physicalStructure); + + /** Retrieve the current physical register assigned to the provided + * architectural register. */ + Register getMapping(Register architectural) const; + + /** Determine whether it's possible to allocate `quantity` physical registers + * of type `type` this cycle. */ + bool canAllocate(uint8_t type, unsigned int quantity) const; + + /** Check whether registers of type `type` can be renamed by this RAT. */ + bool canRename(uint8_t type) const; + + /** Allocate a physical register for the provided architectural register. */ + Register allocate(Register architectural); + + /** Get the number of free registers available for allocation this cycle. */ + unsigned int freeRegistersAvailable(uint8_t type) const; + + /** Commit the provided physical register. This register now holds the + * committed state of the corresponding architectural register, and previous + * physical register is freed. */ + void commit(Register physical); + + /** Rewind the allocation of a physical register. The former physical register + * is reinstated to the mapping table, and the provided register is freed. */ + void rewind(Register physical); + + /** Free the provided physical register. */ + void free(Register physical); + + private: + /** The register mapping tables. Holds a map of architectural -> physical + * register mappings for each register type. */ + std::vector> mappingTable_; + + /** The register history tables. Each table holds an entry for each physical + * register, recording the physical register formerly assigned to its + * architectural register; one table is available per register type. */ + std::vector> historyTable_; + + /** The register destination tables. Holds a map of physical -> architectural + * register mappings for each register type. Used for rewind behaviour. */ + std::vector> destinationTable_; + + /** The free register queues. Holds a list of unallocated physical registers + * for each register type. */ + std::vector> freeQueues_; +}; + +} // namespace pipeline_hi +} // namespace simeng diff --git a/src/include/simeng/pipeline_hi/ReorderBuffer.hh b/src/include/simeng/pipeline_hi/ReorderBuffer.hh new file mode 100644 index 0000000000..1e5fd8408c --- /dev/null +++ b/src/include/simeng/pipeline_hi/ReorderBuffer.hh @@ -0,0 +1,136 @@ +#pragma once + +#include +#include + +#include "simeng/Instruction.hh" +#include "simeng/pipeline_hi/LoadStoreQueue.hh" +#include "simeng/pipeline_hi/RegisterAliasTable.hh" + +namespace simeng { +namespace pipeline_hi { + +/** A branch prediction outcome with an associated instruction address. */ +struct latestBranch { + /** Branch instruction address. */ + uint64_t address; + + /** Outcome of the branch. */ + BranchPrediction outcome; + + /** The related instructionsCommitted_ value that this instruction was + * committed on. */ + uint64_t commitNumber; +}; + +/** A Reorder Buffer (ROB) implementation. Contains an in-order queue of + * in-flight instructions. */ +class ReorderBuffer { + public: + /** Constructs a reorder buffer of maximum size `maxSize`, supplying a + * reference to the register alias table. */ + ReorderBuffer( + unsigned int maxSize, RegisterAliasTable& rat, LoadStoreQueue& lsq, + std::function&)> raiseException, + std::function sendLoopBoundary, + BranchPredictor& predictor, uint16_t loopBufSize, + uint16_t loopDetectionThreshold); + + /** Add the provided instruction to the ROB. */ + void reserve(const std::shared_ptr& insn); + + void commitMicroOps(uint64_t insnId); + + /** Commit and remove up to `maxCommitSize` instructions. */ + unsigned int commit(unsigned int maxCommitSize); + + /** Flush all instructions with a sequence ID greater than `afterSeqId`. */ + void flush(uint64_t afterSeqId); + + /** Retrieve the current size of the ROB. */ + unsigned int size() const; + + /** Retrieve the current amount of free space in the ROB. */ + unsigned int getFreeSpace() const; + + /** Query whether a memory order violation was discovered in the most recent + * cycle. */ + bool shouldFlush() const; + + /** Retrieve the instruction address associated with the most recently + * discovered memory order violation. */ + uint64_t getFlushAddress() const; + + /** Retrieve the sequence ID associated with the most recently discovered + * memory order violation. */ + uint64_t getFlushSeqId() const; + + /** Get the number of instructions the ROB has committed. */ + uint64_t getInstructionsCommittedCount() const; + + /** Get the number of speculated loads which violated load-store ordering. */ + uint64_t getViolatingLoadsCount() const; + + private: + /** A reference to the register alias table. */ + RegisterAliasTable& rat_; + + /** A reference to the load/store queue. */ + LoadStoreQueue& lsq_; + + /** The maximum size of the ROB. */ + unsigned int maxSize_; + + /** A function to call upon exception generation. */ + std::function)> raiseException_; + + /** A function to send an instruction at a detected loop boundary. */ + std::function sendLoopBoundary_; + + /** Whether or not a loop has been detected. */ + bool loopDetected_ = false; + + /** A reference to the current branch predictor. */ + BranchPredictor& predictor_; + + /** The buffer containing in-flight instructions. */ + std::deque> buffer_; + + /** Whether the core should be flushed after the most recent commit. */ + bool shouldFlush_ = false; + + /** The target instruction address the PC should be reset to after the most + * recent commit. + */ + uint64_t pc_; + + /** The sequence ID of the youngest instruction that should remain after the + * current flush. */ + uint64_t flushAfter_; + + /** Latest retired branch outcome with a counter. */ + std::pair branchCounter_ = {{0, {false, 0}, 0}, 0}; + + /** Loop buffer size. */ + uint16_t loopBufSize_; + + /** Amount of times a branch must be seen without interruption for it to be + * considered a loop. */ + uint16_t loopDetectionThreshold_; + + /** The next available sequence ID. */ + uint64_t seqId_ = 0; + + /** The next available instruction ID. Used to identify in-order groups of + * micro-operations. */ + uint64_t insnId_ = 0; + + /** The number of instructions committed. */ + uint64_t instructionsCommitted_ = 0; + + /** The number of speculatived loads which violated load-store ordering. */ + uint64_t loadViolations_ = 0; +}; + +} // namespace pipeline_hi +} // namespace simeng diff --git a/src/include/simeng/pipeline_hi/StaticPredictor.hh b/src/include/simeng/pipeline_hi/StaticPredictor.hh new file mode 100644 index 0000000000..d8923dc23c --- /dev/null +++ b/src/include/simeng/pipeline_hi/StaticPredictor.hh @@ -0,0 +1,53 @@ +#pragma once + +#include + +#include "simeng/BranchPredictor.hh" +#include "yaml-cpp/yaml.h" + +namespace simeng { +namespace pipeline_hi { + +/** A static branch predictor; configurable in YAML config + */ +class StaticPredictor : public BranchPredictor { + public: + StaticPredictor(uint8_t sType); //TODO: temp constructor, get rid of yaml, delete it later + StaticPredictor(YAML::Node config); + ~StaticPredictor(); + + BranchPrediction predict(uint64_t address, BranchType type, + uint64_t knownTarget, uint8_t byteLength) override; + + /** Generate a branch prediction for the specified instruction address; will + * behave based on the configuration */ + BranchPrediction predict(uint64_t address, BranchType type, + uint64_t knownTarget) override; + + /** Provide branch results to update the prediction model for the specified + * instruction address. As this model is static, this does nothing. */ + void update(uint64_t address, bool taken, uint64_t targetAddress, + BranchType type) override; + + /** Provide flush logic for branch prediction scheme. The behaviour will + * be based on the configuration */ + void flush(uint64_t address) override; + + private: + /** Decide which static predictor will be in use */ + uint8_t staticType_; + + /** A return address stack. */ + std::deque ras_; + + /** RAS history with instruction address as the keys. A non-zero value + * represents the target prediction for a return instruction and a 0 entry for + * a branch-and-link instruction. */ + std::map rasHistory_; + + /** The size of the RAS. */ + uint64_t rasSize_ = 1000; +}; + +} // namespace pipeline_hi +} // namespace simeng diff --git a/src/include/simeng/pipeline_hi/WritebackUnit.hh b/src/include/simeng/pipeline_hi/WritebackUnit.hh new file mode 100644 index 0000000000..0816d3b5dc --- /dev/null +++ b/src/include/simeng/pipeline_hi/WritebackUnit.hh @@ -0,0 +1,62 @@ +#pragma once + +#include + +#include "simeng/Instruction.hh" +#include "simeng/pipeline_hi/PipelineBuffer.hh" +#include + +namespace simeng { +namespace pipeline_hi { + +/** A writeback pipeline unit. Responsible for writing instruction results to + * the register files. */ +class WritebackUnit { + public: + /** Constructs a writeback unit with references to an input buffer and + * register file to write to. */ + WritebackUnit(std::vector>>& + completionSlots, + RegisterFileSet& registerFileSet, + std::function flagMicroOpCommits, + std::function&)> removeDep, + std::function&)> removeInstrOrderQ); + + /** Tick the writeback unit to perform its operation for this cycle. */ + void tick(); + + /** Retrieve a count of the number of instructions retired. */ + uint64_t getInstructionsWrittenCount() const; + + /** Retrieve instruction(s) to be printed out to the trace */ + std::vector> getInstsForTrace(); + + /** Clear the container for tracing */ + void traceFinished(); //Might be safer to update trace within WritebackUnit + + private: + /** Buffers of completed instructions to process. */ + std::vector>>& completionSlots_; + + /** The register file set to write results into. */ + RegisterFileSet& registerFileSet_; + + /** A function handle called to determine if uops associated to an instruction + * ID can now be committed. */ + std::function flagMicroOpCommits_; + + /** A function to remove the commited instruction from dependency queue. */ + std::function&)> removeDep_; + + /** A function to remove the commited instruction from ordering queue. */ + std::function&)> removeInstrOrderQ_; + + /** The number of instructions processed and retired by this stage. */ + uint64_t instructionsWritten_ = 0; + + /** Instruction(s) to be printed out to the trace */ + std::deque> committedInstsForTrace_; +}; + +} // namespace pipeline_hi +} // namespace simeng diff --git a/src/lib/CMakeLists.txt b/src/lib/CMakeLists.txt index 1fbf286509..97de63eb46 100644 --- a/src/lib/CMakeLists.txt +++ b/src/lib/CMakeLists.txt @@ -9,6 +9,7 @@ set(SIMENG_SOURCES arch/aarch64/MicroDecoder.cc arch/riscv/Architecture.cc arch/riscv/ExceptionHandler.cc + arch/riscv/SystemRegister.cc arch/riscv/Instruction.cc arch/riscv/Instruction_address.cc arch/riscv/Instruction_decode.cc @@ -18,6 +19,7 @@ set(SIMENG_SOURCES kernel/LinuxProcess.cc models/emulation/Core.cc models/inorder/Core.cc + models/mcu/Core.cc models/outoforder/Core.cc pipeline/A64FXPortAllocator.cc pipeline/BalancedPortAllocator.cc @@ -32,6 +34,16 @@ set(SIMENG_SOURCES pipeline/RenameUnit.cc pipeline/ReorderBuffer.cc pipeline/WritebackUnit.cc + pipeline_hi/DecodeUnit.cc + pipeline_hi/DispatchIssueUnit.cc + pipeline_hi/ExecuteUnit.cc + pipeline_hi/FetchUnit.cc + pipeline_hi/LoadStoreQueue.cc + pipeline_hi/RegDepMap.cc + pipeline_hi/RegisterAliasTable.cc + pipeline_hi/ReorderBuffer.cc + pipeline_hi/StaticPredictor.cc + pipeline_hi/WritebackUnit.cc AlwaysNotTakenPredictor.cc ArchitecturalRegisterFileSet.cc CMakeLists.txt diff --git a/src/lib/CoreInstance.cc b/src/lib/CoreInstance.cc index 8ba06c8e08..e8f91d3450 100644 --- a/src/lib/CoreInstance.cc +++ b/src/lib/CoreInstance.cc @@ -90,6 +90,10 @@ void CoreInstance::setSimulationMode() { "outoforder") { mode_ = SimulationMode::OutOfOrder; modeString_ = "Out-of-Order"; + } else if (config_["Core"]["Simulation-Mode"].as() == + "mcu") { + mode_ = SimulationMode::MCU; + modeString_ = "MCU"; } return; @@ -236,7 +240,7 @@ void CoreInstance::createCore() { if (config_["Core"]["ISA"].as() == "rv64" || config_["Core"]["ISA"].as() == "rv32") { arch_ = - std::make_unique(kernel_, config_); + std::make_unique(kernel_, config_,dataMemory_); } else if (config_["Core"]["ISA"].as() == "AArch64") { arch_ = std::make_unique(kernel_, config_); @@ -244,6 +248,9 @@ void CoreInstance::createCore() { // Construct branch predictor object predictor_ = std::make_unique(config_); + if (mode_ == SimulationMode::MCU) { + predictor_ = std::make_unique(2); //config_ + } // Extract port arrangement from config file auto config_ports = config_["Ports"]; @@ -268,6 +275,10 @@ void CoreInstance::createCore() { core_ = std::make_shared( *instructionMemory_, *dataMemory_, processMemorySize_, entryPoint, *arch_, *predictor_); + } else if (mode_ == SimulationMode::MCU) { + core_ = std::make_shared( + *instructionMemory_, *dataMemory_, processMemorySize_, entryPoint, + *arch_, *predictor_, config_); } else if (mode_ == SimulationMode::OutOfOrder) { core_ = std::make_shared( *instructionMemory_, *dataMemory_, processMemorySize_, entryPoint, diff --git a/src/lib/Elf.cc b/src/lib/Elf.cc index 6281598403..901f370eec 100644 --- a/src/lib/Elf.cc +++ b/src/lib/Elf.cc @@ -2,6 +2,7 @@ #include #include +#include namespace simeng { @@ -13,7 +14,8 @@ namespace simeng { * https://man7.org/linux/man-pages/man5/elf.5.html */ -Elf::Elf(std::string path, char** imagePointer) { +Elf::Elf(std::string path, char** imagePointer, std::unordered_map& symbols) +{ std::ifstream file(path, std::ios::binary); if (!file.is_open()) { @@ -174,120 +176,69 @@ Elf::Elf(std::string path, char** imagePointer) { } } } else { - /** - * Starting from the 24th byte of the ELF header a 32-bit value - * represents the virtual address to which the system first transfers - * control, thus starting the process. - * In `elf32_hdr` this value maps to the member `Elf32_Addr e_entry`. - */ + file.seekg(0); - // Seek to the entry point of the file. - // The information in between is discarded - file.seekg(0x18); - file.read(reinterpret_cast(&entryPoint32_), sizeof(entryPoint32_)); + Elf32_Ehdr eheader; + file.read(reinterpret_cast(&eheader), sizeof(eheader)); - /** - * Starting from the 32nd byte of the ELF Header a 64-bit value - * represents the offset of the ELF Program header or - * Program header table in the ELF file. - * In `elf32_hdr` this value maps to the member `Elf32_Addr e_phoff`. - */ - - // Seek to the byte representing the start of the header offset table. - uint32_t headerOffset; - file.read(reinterpret_cast(&headerOffset), sizeof(headerOffset)); - - /** - * Starting 42th byte of the ELF Header a 16-bit value indicates - * the size of each entry in the ELF Program header. In the `elf32_hdr` - * struct this value maps to the member `Elf32_Half e_phentsize`. All - * header entries have the same size. - * Starting from the 44th byte a 16-bit value represents the number - * of header entries in the ELF Program header. In the `elf32_hdr` - * struct this value maps to `Elf32_Half e_phnum`. - */ - - // Seek to the byte representing header entry size. - file.seekg(0x2a); - uint16_t headerEntrySize; - file.read(reinterpret_cast(&headerEntrySize), sizeof(headerEntrySize)); - uint16_t headerEntries; - file.read(reinterpret_cast(&headerEntries), sizeof(headerEntries)); - - // Resize the header to equal the number of header entries. - headers32_.resize(headerEntries); + entryPoint32_ = eheader.e_entry; + processImageSize_ = 0; - // Loop over all headers and extract them. - for (size_t i = 0; i < headerEntries; i++) { - // Since all headers entries have the same size. - // We can extract the nth header using the header offset - // and header entry size. - file.seekg(headerOffset + (i * headerEntrySize)); - auto& header = headers32_[i]; + // Loop over pheaders and extract them. + file.seekg(eheader.e_phoff); + std::vector pheaders(eheader.e_phnum); + for (auto &ph : pheaders) { + file.read(reinterpret_cast(&ph), sizeof(ph)); + if ((ph.p_type == PT_LOAD) && (ph.p_vaddr+ph.p_memsz > processImageSize_)) + processImageSize_ = ph.p_vaddr+ph.p_memsz; + } - /** - * Like the ELF Header, the ELF Program header is also defined - * using a struct: - * typedef struct { - * uint32_t p_type; - * Elf32_Off p_offset; - * Elf32_Addr p_vaddr; - * Elf32_Addr p_paddr; - * uint32_t p_filesz; - * uint32_t p_memsz; - * uint32_t p_flags; - * uint32_t p_align; - * } Elf32_Phdr; - * - * The ELF Program header table is an array of structures, - * each describing a segment or other information the system - * needs to prepare the program for execution. A segment - * contains one or more sections (ELF Program Section). - * - * The `p_vaddr` field holds the virtual address at which the first - * byte of the segment resides in memory and the `p_memsz` field - * holds the number of bytes in the memory image of the segment. - * It may be zero. The `p_offset` member holds the offset from the - * beginning of the file at which the first byte of the segment resides. - */ + *imagePointer = (char*)malloc(processImageSize_ * sizeof(char)); - // Each address-related field is 4 bytes in a 32-bit ELF file - const int fieldBytes = 4; - file.read(reinterpret_cast(&(header.type)), sizeof(header.type)); - file.read(reinterpret_cast(&(header.offset)), fieldBytes); - file.read(reinterpret_cast(&(header.virtualAddress)), fieldBytes); - file.read(reinterpret_cast(&(header.physicalAddress)), fieldBytes); - file.read(reinterpret_cast(&(header.fileSize)), fieldBytes); - file.read(reinterpret_cast(&(header.memorySize)), fieldBytes); + for (const auto& ph : pheaders) { + if (ph.p_type == PT_LOAD) { + file.seekg(ph.p_offset); + // Read `fileSize` bytes from `file` into the appropriate place in process memory + file.read(*imagePointer+ph.p_vaddr, ph.p_filesz); - // To construct the process we look for the largest virtual address and - // add it to the memory size of the header. This way we obtain a very - // large array which can hold data at large virtual address. - // However, this way we end up creating a sparse array, in which most - // of the entries are unused. Also SimEng internally treats these - // virtual address as physical addresses to index into this large array. - if (header.virtualAddress + header.memorySize > processImageSize_) { - processImageSize_ = header.virtualAddress + header.memorySize; + if (ph.p_memsz>ph.p_filesz) + // Need to padd the rest of the section memory with zeros + memset(*imagePointer+ph.p_vaddr+ph.p_filesz, 0, ph.p_memsz-ph.p_filesz); } } - *imagePointer = (char*)malloc(processImageSize_ * sizeof(char)); - /** - * The ELF Program header has a member called `p_type`, which represents - * the kind of data or memory segments described by the program header. - * The value PT_LOAD=1 represents a loadable segment. In other words, - * it contains initialized data that contributes to the program's - * memory image. - */ - - // Process headers; only observe LOAD sections for this basic implementation - for (const auto& header : headers32_) { - if (header.type == 1) { // LOAD - file.seekg(header.offset); - // Read `fileSize` bytes from `file` into the appropriate place in process - // memory - file.read(*imagePointer + header.virtualAddress, header.fileSize); + // read section headers + Elf32_Shdr* sh_strtab = NULL; + Elf32_Shdr* sh_symtab = NULL; + file.seekg(eheader.e_shoff); + std::vector sheaders(eheader.e_shnum); + unsigned int sh_idx = 0; + for (auto &sh : sheaders) { + file.read(reinterpret_cast(&sh), sizeof(sh)); + + // find section header for strings to use for symbol table. + if (sh.sh_type==SHT_SYMTAB) + sh_symtab = &sh; + else if (sh.sh_type==SHT_STRTAB && sh_idx!=eheader.e_shstrndx) + sh_strtab = &sh; + sh_idx++; + }; + + // Read strings table + file.seekg(sh_strtab->sh_offset); + std::vector strtab(sh_strtab->sh_size); + file.read(&strtab[0], sh_strtab->sh_size); + + // Read symbols tables + file.seekg(sh_symtab->sh_offset); + unsigned num_symbols = sh_symtab->sh_size/sh_symtab->sh_entsize; + Elf32_Sym sym; + while(num_symbols--) { + file.read(reinterpret_cast(&sym), sizeof(sym)); + if (strtab[sym.st_name]) { + std::string name(&strtab[sym.st_name]); + symbols[name] = sym.st_value; } } } diff --git a/src/lib/GenericPredictor.cc b/src/lib/GenericPredictor.cc index 2539d7ae59..4b93d832bc 100644 --- a/src/lib/GenericPredictor.cc +++ b/src/lib/GenericPredictor.cc @@ -110,4 +110,11 @@ void GenericPredictor::flush(uint64_t address) { } } + +BranchPrediction GenericPredictor::predict(uint64_t address, BranchType type, + uint64_t knownTarget, + uint8_t byteLength) { + return predict(address, type, knownTarget); +} + } // namespace simeng diff --git a/src/lib/Instruction.cc b/src/lib/Instruction.cc index ac923c11b2..d1b7b112c5 100644 --- a/src/lib/Instruction.cc +++ b/src/lib/Instruction.cc @@ -57,5 +57,8 @@ bool Instruction::isLastMicroOp() const { return isLastMicroOp_; } void Instruction::setWaitingCommit() { waitingCommit_ = true; } bool Instruction::isWaitingCommit() const { return waitingCommit_; } int Instruction::getMicroOpIndex() const { return microOpIndex_; } +bool Instruction::isDiv() const { return isDiv_; } +bool Instruction::isMul() const { return isMul_; } +bool Instruction::isSysCall() const { return isSysCall_; } } // namespace simeng diff --git a/src/lib/ModelConfig.cc b/src/lib/ModelConfig.cc index 88cc1f7d59..342476347c 100644 --- a/src/lib/ModelConfig.cc +++ b/src/lib/ModelConfig.cc @@ -69,7 +69,7 @@ void ModelConfig::validate() { configFile_[root][subFields[0]], subFields[0], std::vector({"AArch64", "rv64", "rv32"}), ExpectedValue::String); nodeChecker(configFile_[root][subFields[1]], subFields[1], - {"emulation", "inorderpipelined", "outoforder"}, + {"emulation", "inorderpipelined", "mcu", "outoforder"}, ExpectedValue::String); nodeChecker(configFile_[root][subFields[2]], subFields[2], std::make_pair(0.f, 10.f), ExpectedValue::Float); diff --git a/src/lib/arch/aarch64/Architecture.cc b/src/lib/arch/aarch64/Architecture.cc index 23ebf86ae3..5ad11c70d1 100644 --- a/src/lib/arch/aarch64/Architecture.cc +++ b/src/lib/arch/aarch64/Architecture.cc @@ -281,11 +281,13 @@ ProcessStateChange Architecture::getInitialState() const { uint8_t Architecture::getMaxInstructionSize() const { return 4; } +uint8_t Architecture::getMinInstructionSize() const { return 4; } + uint64_t Architecture::getVectorLength() const { return VL_; } uint64_t Architecture::getStreamingVectorLength() const { return SVL_; } -void Architecture::updateSystemTimerRegisters(RegisterFileSet* regFile, +int16_t Architecture::updateSystemTimerRegisters(RegisterFileSet* regFile, const uint64_t iterations) const { // Update the Processor Cycle Counter to total cycles completed. regFile->set(PCCreg_, iterations); @@ -293,6 +295,8 @@ void Architecture::updateSystemTimerRegisters(RegisterFileSet* regFile, if (iterations % (uint64_t)vctModulo_ == 0) { regFile->set(VCTreg_, regFile->get(VCTreg_).get() + 1); } + // interrupts NYI + return -1; } std::vector diff --git a/src/lib/arch/aarch64/Instruction.cc b/src/lib/arch/aarch64/Instruction.cc index 909f5263ae..602bdc7fb3 100644 --- a/src/lib/arch/aarch64/Instruction.cc +++ b/src/lib/arch/aarch64/Instruction.cc @@ -43,7 +43,7 @@ Instruction::Instruction(const Instruction& insn) branchAddress_ = insn.branchAddress_; branchTaken_ = insn.branchTaken_; branchType_ = insn.branchType_; - knownTarget_ = insn.knownTarget_; + knownOffset_ = insn.knownOffset_; sequenceId_ = insn.sequenceId_; flushed_ = insn.flushed_; latency_ = insn.latency_; @@ -182,7 +182,7 @@ std::tuple Instruction::checkEarlyBranchMisprediction() const { BranchType Instruction::getBranchType() const { return branchType_; } -uint64_t Instruction::getKnownTarget() const { return knownTarget_; } +uint64_t Instruction::getKnownOffset() const { return knownOffset_; } uint16_t Instruction::getGroup() const { // Use identifiers to decide instruction group diff --git a/src/lib/arch/aarch64/Instruction_decode.cc b/src/lib/arch/aarch64/Instruction_decode.cc index e3f0529ee9..ca86962958 100644 --- a/src/lib/arch/aarch64/Instruction_decode.cc +++ b/src/lib/arch/aarch64/Instruction_decode.cc @@ -364,7 +364,7 @@ void Instruction::decode() { switch (metadata.opcode) { case Opcode::AArch64_B: // b label branchType_ = BranchType::Unconditional; - knownTarget_ = metadata.operands[0].imm; + knownOffset_ = metadata.operands[0].imm; break; case Opcode::AArch64_BR: { // br xn branchType_ = BranchType::Unconditional; @@ -372,7 +372,7 @@ void Instruction::decode() { } case Opcode::AArch64_BL: // bl #imm branchType_ = BranchType::SubroutineCall; - knownTarget_ = metadata.operands[0].imm; + knownOffset_ = metadata.operands[0].imm; break; case Opcode::AArch64_BLR: { // blr xn branchType_ = BranchType::SubroutineCall; @@ -383,7 +383,7 @@ void Instruction::decode() { branchType_ = BranchType::LoopClosing; else branchType_ = BranchType::Conditional; - knownTarget_ = metadata.operands[0].imm; + knownOffset_ = metadata.operands[0].imm; break; } case Opcode::AArch64_CBNZW: // cbnz wn, #imm @@ -397,7 +397,7 @@ void Instruction::decode() { branchType_ = BranchType::LoopClosing; else branchType_ = BranchType::Conditional; - knownTarget_ = metadata.operands[1].imm; + knownOffset_ = metadata.operands[1].imm; break; } case Opcode::AArch64_TBNZW: // tbnz wn, #imm, label @@ -411,7 +411,7 @@ void Instruction::decode() { branchType_ = BranchType::LoopClosing; else branchType_ = BranchType::Conditional; - knownTarget_ = metadata.operands[2].imm; + knownOffset_ = metadata.operands[2].imm; break; } case Opcode::AArch64_RET: { // ret {xr} diff --git a/src/lib/arch/riscv/Architecture.cc b/src/lib/arch/riscv/Architecture.cc index d1a18777e8..84afcc0996 100644 --- a/src/lib/arch/riscv/Architecture.cc +++ b/src/lib/arch/riscv/Architecture.cc @@ -4,6 +4,7 @@ #include #include #include +#include #include "InstructionMetadata.hh" @@ -14,8 +15,10 @@ namespace riscv { std::unordered_map Architecture::decodeCache; std::forward_list Architecture::metadataCache; -Architecture::Architecture(kernel::Linux& kernel, YAML::Node config) - : linux_(kernel) { +Architecture::Architecture(kernel::Linux& kernel, YAML::Node config, std::shared_ptr& dataMemory) +: + linux_(kernel) +{ is32Bit_ = ARCH_64BIT; if (config["Core"]["ISA"].as() == "rv32") { is32Bit_ = ARCH_32BIT; @@ -46,14 +49,39 @@ Architecture::Architecture(kernel::Linux& kernel, YAML::Node config) // Generate zero-indexed system register map systemRegisterMap_[SYSREG_MSTATUS] = systemRegisterMap_.size(); + systemRegisterMap_[SYSREG_MIE] = systemRegisterMap_.size(); + systemRegisterMap_[SYSREG_MTVEC] = systemRegisterMap_.size(); systemRegisterMap_[SYSREG_MSTATUSH] = systemRegisterMap_.size(); + systemRegisterMap_[SYSREG_MSCRATCH] = systemRegisterMap_.size(); systemRegisterMap_[SYSREG_MEPC] = systemRegisterMap_.size(); systemRegisterMap_[SYSREG_MCAUSE] = systemRegisterMap_.size(); systemRegisterMap_[SYSREG_MHARTID] = systemRegisterMap_.size(); + systemRegisterMap_[SYSREG_MXCPTSC] = systemRegisterMap_.size(); systemRegisterMap_[SYSREG_CYCLE] = systemRegisterMap_.size(); systemRegisterMap_[SYSREG_TIME] = systemRegisterMap_.size(); systemRegisterMap_[SYSREG_INSTRRET] = systemRegisterMap_.size(); + // Memory Mapped System Register Blocks + + // if elf file includes the label tohost then assume that this binary supports HTIF protocol (used by spike) and include an HTI block + uint64_t htifAddress; + if (linux_.lookupSymbolValue("tohost",htifAddress)) + { + std::cout << "[SimEng] HTIF detected at: " << std::hex << htifAddress << std::endl; + htif = std::make_shared(*this); + memoryMappedSystemRegisterBlocks[htifAddress] = htif.get(); + } + + // Install CLINT into memort map, this is optional + clint = std::make_shared(*this); + memoryMappedSystemRegisterBlocks[Clint::CLINT_BASE] = clint.get(); + + if (!memoryMappedSystemRegisterBlocks.empty()) + { + systemRegisterMemoryInterface = std::make_shared(dataMemory, memoryMappedSystemRegisterBlocks); + dataMemory = systemRegisterMemoryInterface; + } + // Instantiate an executionInfo entry for each group in the InstructionGroup // namespace. for (int i = 0; i < NUM_GROUPS; i++) { @@ -145,7 +173,7 @@ Architecture::Architecture(kernel::Linux& kernel, YAML::Node config) } } } - if (config["Core"]["Trace"].as()) { + if (config["Core"]["Trace"].IsDefined() && config["Core"]["Trace"].as()) { traceFile_ = new std::ofstream(); traceFile_->open("./trace.log"); traceOn_ = true; @@ -164,6 +192,7 @@ Architecture::~Architecture() { uint8_t Architecture::predecode(const void* ptr, uint8_t bytesAvailable, uint64_t instructionAddress, MacroOp& output) const { + // Check that instruction address is 4-byte aligned as required by RISC-V // 2-byte when Compressed ISA is supported if (instructionAddress & constants_.alignMask) { @@ -221,9 +250,11 @@ uint8_t Architecture::predecode(const void* ptr, uint8_t bytesAvailable, output.resize(1); auto& uop = output[0]; - // Retrieve the cached instruction and write to output - uop = std::make_shared(iter->second); + // Retrieve the cached instruction + auto newinsn = std::make_shared(iter->second); + // write to output + uop = newinsn; uop->setInstructionAddress(instructionAddress); return iter->second.getMetadata().lenBytes; @@ -265,8 +296,19 @@ int32_t Architecture::getSystemRegisterTag(uint16_t reg) const { // Check below is done for speculative instructions that may be passed into // the function but will not be executed. If such invalid speculative // instructions get through they can cause an out-of-range error. - if (!systemRegisterMap_.count(reg)) return 0; - return systemRegisterMap_.at(reg); + if (systemRegisterMap_.count(reg)) + return systemRegisterMap_.at(reg); + else + return -1; +} + +/** Returns a System Register index from a system register tag. + reverse lookup slow but only used in printing so will be fine */ +uint16_t Architecture::getSystemRegisterIdFromTag(int32_t tag) const { + for (auto it = systemRegisterMap_.begin();it != systemRegisterMap_.end();it++) + if (it->second == tag) + return it->first; + assert(0 && "Tag not found in systemRegisterMap"); } ProcessStateChange Architecture::getInitialState() const { @@ -289,6 +331,8 @@ ProcessStateChange Architecture::getInitialState() const { uint8_t Architecture::getMaxInstructionSize() const { return 4; } +uint8_t Architecture::getMinInstructionSize() const { return 2; } + std::vector Architecture::getConfigPhysicalRegisterStructure(YAML::Node config) const { return {{constants_.regWidth, config["Register-Set"]["GeneralPurpose-Count"].as()}, @@ -306,9 +350,21 @@ uint16_t Architecture::getNumSystemRegisters() const { return static_cast(systemRegisterMap_.size()); } -// Left blank as no implementation necessary -void Architecture::updateSystemTimerRegisters(RegisterFileSet* regFile, +int16_t Architecture::updateSystemTimerRegisters(RegisterFileSet* regFile, const uint64_t iterations) const { + int16_t interruptId = -1; + + if (htif) + { + interruptId = htif->updateSystemTimerRegisters(regFile, iterations); + if (interruptId>=0) + return interruptId; + } + + if (clint) + interruptId = clint->updateSystemTimerRegisters(regFile, iterations); + + return interruptId; } void Architecture::updateInstrTrace(const std::shared_ptr& instruction, @@ -346,7 +402,7 @@ void Architecture::updateInstrTrace(const std::shared_ptr& } else if(reg.type == RegisterType::FLOAT) { s << "f" << std::dec << std::setfill('0') << std::setw(2) << reg.tag << "=0x"; } else if(reg.type == RegisterType::SYSTEM) { - s << "csr_0x" << std::hex << std::setfill('0') << std::setw(3) << metadata.csr << "=0x"; + s << "csr_0x" << std::hex << std::setfill('0') << std::setw(3) << getSystemRegisterIdFromTag(reg.tag) << "=0x"; } s << std::hex << std::setfill('0') << std::setw(8) << regFile->get(reg).get(); if(i < (num_dest-1)) { @@ -364,7 +420,7 @@ void Architecture::updateInstrTrace(const std::shared_ptr& } else if(reg.type == RegisterType::FLOAT) { s << "f" << std::dec << std::setfill('0') << std::setw(2) << reg.tag << "=0x"; } else if(reg.type == RegisterType::SYSTEM) { - s << "csr_0x" << std::hex << std::setfill('0') << std::setw(3) << metadata.csr << "=0x"; + s << "csr_0x" << std::hex << std::setfill('0') << std::setw(3) << getSystemRegisterIdFromTag(reg.tag) << "=0x"; } s << std::hex << std::setfill('0') << std::setw(8) << regFile->get(reg).get(); if(i < (num_src-1)) { diff --git a/src/lib/arch/riscv/ExceptionHandler.cc b/src/lib/arch/riscv/ExceptionHandler.cc index c88448048d..9ba2200858 100644 --- a/src/lib/arch/riscv/ExceptionHandler.cc +++ b/src/lib/arch/riscv/ExceptionHandler.cc @@ -1,5 +1,5 @@ +#include "simeng/arch/riscv/Architecture.hh" #include "simeng/arch/riscv/ExceptionHandler.hh" - #include #include @@ -646,6 +646,18 @@ bool ExceptionHandler::init() { } return concludeSyscall(stateChange); + + } else if (exception == InstructionException::SecureMonitorCall) { + printException(instruction_); + takeException(CAUSE_BREAKPOINT); + return true; + } else if (exception == InstructionException::Interrupt) { + printException(instruction_); + if (instruction_.getInterruptId() == static_cast(InterruptId::HALT)) + return fatal(); + uint64_t mcause_val = static_cast(instruction_.getInterruptId()) | (1<<(8*instruction_.getArchRegWidth()-1)); + takeException(mcause_val); + return true; } printException(instruction_); @@ -745,6 +757,45 @@ void ExceptionHandler::readLinkAt(span path) { concludeSyscall(stateChange); } +void ExceptionHandler::takeException(uint64_t causecode) +{ + const auto& registerFileSet = core.getArchitecturalRegisterFileSet(); + auto& architecture = instruction_.getArchitecture(); + uint16_t mtvec_tag = static_cast(architecture.getSystemRegisterTag(SYSREG_MTVEC)); + uint16_t mstatus_tag = static_cast(architecture.getSystemRegisterTag(SYSREG_MSTATUS)); + uint16_t mepc_tag = static_cast(architecture.getSystemRegisterTag(SYSREG_MEPC)); + uint16_t mcause_tag = static_cast(architecture.getSystemRegisterTag(SYSREG_MCAUSE)); + uint64_t mcause_val = static_cast(causecode); + + auto mstatus_bits = registerFileSet.get( { RegisterType::SYSTEM, mstatus_tag } ).get(); + + // mpie=mie, mie=0 + mstatus_bits &= ~MSTATUS_MPIE_MASK; + if (mstatus_bits & MSTATUS_MIE_MASK) + mstatus_bits |= MSTATUS_MPIE_MASK; + mstatus_bits &= ~MSTATUS_MIE_MASK; + + RegisterValue mstatus (mstatus_bits, architecture.getConstants().regWidth); + RegisterValue mepc (instruction_.getInstructionAddress(), architecture.getConstants().regWidth); + RegisterValue mcause (mcause_val, architecture.getConstants().regWidth); + + uint64_t mtvec = registerFileSet.get( { RegisterType::SYSTEM, mtvec_tag } ).get(); + + ProcessStateChange changes = { + ChangeType::REPLACEMENT, + { + { RegisterType::SYSTEM, mstatus_tag }, + { RegisterType::SYSTEM, mepc_tag }, + { RegisterType::SYSTEM, mcause_tag } + }, + {mstatus, mepc, mcause} + }; + + result_ = {false, mtvec, changes}; + //result_ = {false, instruction_.getInstructionAddress(), changes}; +} + + bool ExceptionHandler::readBufferThen(uint64_t ptr, uint64_t length, std::function then, bool firstCall) { @@ -827,6 +878,9 @@ void ExceptionHandler::printException(const Instruction& insn) const { case InstructionException::NoAvailablePort: std::cout << "unsupported execution port"; break; + case InstructionException::Interrupt: + std::cout << "interrupt (id: " << insn.getInterruptId() << ")"; + break; case InstructionException::UnmappedSysReg: std::cout << "unmapped system register"; break; diff --git a/src/lib/arch/riscv/Instruction.cc b/src/lib/arch/riscv/Instruction.cc index 6cfc173b9d..e292b889b9 100644 --- a/src/lib/arch/riscv/Instruction.cc +++ b/src/lib/arch/riscv/Instruction.cc @@ -131,7 +131,7 @@ std::tuple Instruction::checkEarlyBranchMisprediction() const { BranchType Instruction::getBranchType() const { return branchType_; } -uint64_t Instruction::getKnownTarget() const { return knownTarget_; } +uint64_t Instruction::getKnownOffset() const { return knownOffset_; } uint16_t Instruction::getGroup() const { uint16_t base = InstructionGroups::INT; @@ -171,6 +171,10 @@ void Instruction::setArchRegWidth(uint8_t len) { archRegWidth_ = len; } uint8_t Instruction::getArchRegWidth() const { return archRegWidth_; } +const Architecture& Instruction::getArchitecture() const { + return architecture_; +} + } // namespace riscv } // namespace arch } // namespace simeng diff --git a/src/lib/arch/riscv/InstructionMetadata.cc b/src/lib/arch/riscv/InstructionMetadata.cc index f2b5a9b736..d293bc7fdb 100644 --- a/src/lib/arch/riscv/InstructionMetadata.cc +++ b/src/lib/arch/riscv/InstructionMetadata.cc @@ -264,7 +264,10 @@ void InstructionMetadata::alterPseudoInstructions(const cs_insn& insn) { csr = ((uint32_t)encoding[3] << 4) | ((uint32_t)encoding[2] >> 4); //If there are less than 2 operands provided add necessary x0 operand if(operandCount == 1) { - if(strcmp(mnemonic, "csrr") == 0) { //csrrs rd,csr,x0 + if((strcmp(mnemonic, "rdinstret") == 0) || + (strcmp(mnemonic, "rdcycle") == 0) || + (strcmp(mnemonic, "rdtime") == 0) || + (strcmp(mnemonic, "csrr") == 0)) { //csrrs rd,csr,x0 operands[1].type = RISCV_OP_REG; operands[1].reg = 1; } else { //csrrxx x0,csr,rs/imm diff --git a/src/lib/arch/riscv/Instruction_decode.cc b/src/lib/arch/riscv/Instruction_decode.cc index 8bdd5041eb..9efa7f5c4c 100644 --- a/src/lib/arch/riscv/Instruction_decode.cc +++ b/src/lib/arch/riscv/Instruction_decode.cc @@ -143,6 +143,24 @@ void Instruction::decode() { case Opcode::RISCV_SD: isStore_ = true; break; + //identify MULs/DIVs + case Opcode::RISCV_MUL: + case Opcode::RISCV_MULH: + case Opcode::RISCV_MULHU: + case Opcode::RISCV_MULHSU: + case Opcode::RISCV_MULW: + isMultiply_ = true; + isMul_ = true; //this one is for simeng/Instruction.hh + break; + case Opcode::RISCV_DIV: + case Opcode::RISCV_DIVU: + case Opcode::RISCV_DIVUW: + case Opcode::RISCV_DIVW: + isDivide_ = true; + isDiv_ = true; //this one is for simeng/Instruction.hh + break; + case Opcode::RISCV_ECALL: + isSysCall_ = true; } if (Opcode::RISCV_AMOADD_D <= metadata.opcode && @@ -257,6 +275,16 @@ void Instruction::decode() { isCompare_ = true; } + if (Opcode::RISCV_MRET == metadata.opcode) { + uint16_t mepc_tag = static_cast(architecture_.getSystemRegisterTag(SYSREG_MEPC)); + uint16_t mstatus_tag = static_cast(architecture_.getSystemRegisterTag(SYSREG_MSTATUS)); + sourceRegisters[sourceRegisterCount++] = { RegisterType::SYSTEM, mepc_tag }; + sourceRegisters[sourceRegisterCount++] = { RegisterType::SYSTEM, mstatus_tag }; + destinationRegisters[destinationRegisterCount++] = { RegisterType::SYSTEM, mstatus_tag }; + operandsPending += 2; + isBranch_ = true; + } + // Set branch type switch (metadata.opcode) { case Opcode::RISCV_BEQ: @@ -266,12 +294,24 @@ void Instruction::decode() { case Opcode::RISCV_BGE: case Opcode::RISCV_BGEU: branchType_ = BranchType::Conditional; - knownTarget_ = instructionAddress_ + metadata.operands[2].imm; + knownOffset_ = metadata.operands[2].imm; break; case Opcode::RISCV_JAL: + branchType_ = BranchType::SubroutineCall; + knownOffset_ = metadata.operands[1].imm; + break; case Opcode::RISCV_JALR: - branchType_ = BranchType::Unconditional; - knownTarget_ = instructionAddress_ + metadata.operands[1].imm; + { + //jalr x0, 0(x1) == ret + if (metadata.operands[0].reg == RISCV_REG_X0 && metadata.operands[1].reg == RISCV_REG_X1 && metadata.operands[2].imm == 0) { + branchType_ = BranchType::Return; + } else { + branchType_ = BranchType::SubroutineCall; + } + break; + } + case Opcode::RISCV_MRET: + branchType_ = BranchType::Unknown; //TODO: think which type it fits / create new type break; } } @@ -292,10 +332,14 @@ bool Instruction::decode16() { "Invalid operand for JR,JALR:- CR instructions"); sourceRegisters[sourceRegisterCount++] = csRegToRegister(metadata.operands[0].reg); operandsPending++; + branchType_ = BranchType::SubroutineCall; if (metadata.opcode == Opcode::RISCV_C_JALR) { destinationRegisters[destinationRegisterCount++] = Instruction::RA_REGISTER; + } else { //case C_JR + if (metadata.operands[0].reg == RISCV_REG_X1 ) { + branchType_ = BranchType::Return; + } } - branchType_ = BranchType::Unconditional; break; case Opcode::RISCV_C_MV: instFormat_ = CIF_CR; @@ -309,7 +353,7 @@ bool Instruction::decode16() { sourceRegisters[sourceRegisterCount++] = csRegToRegister(metadata.operands[1].reg); operandsPending++; break; - case Opcode::RISCV_C_EBREAK://TODO + case Opcode::RISCV_C_EBREAK: instFormat_ = CIF_CR; break; case Opcode::RISCV_C_ADD: @@ -410,7 +454,7 @@ bool Instruction::decode16() { operandsPending++; c_imm = metadata.operands[1].imm; branchType_ = BranchType::Conditional; - knownTarget_ = instructionAddress_ + metadata.operands[1].imm; + knownOffset_ = metadata.operands[1].imm; break; case Opcode::RISCV_C_FLD: case Opcode::RISCV_C_FLW: @@ -503,9 +547,11 @@ bool Instruction::decode16() { c_imm = metadata.operands[0].imm; if (metadata.opcode == Opcode::RISCV_C_JAL) { destinationRegisters[destinationRegisterCount++] = Instruction::RA_REGISTER; + branchType_ = BranchType::SubroutineCall; + } else { // case C_J + branchType_ = BranchType::Unconditional; } - branchType_ = BranchType::Unconditional; - knownTarget_ = instructionAddress_ + metadata.operands[0].imm; + knownOffset_ = metadata.operands[0].imm; break; case Opcode::RISCV_C_UNIMP: break; @@ -523,7 +569,7 @@ bool Instruction::decodeCsr() { } isCsr_ = true; - uint32_t sysRegTag = architecture_.getSystemRegisterTag(metadata.csr); + int32_t sysRegTag = architecture_.getSystemRegisterTag(metadata.csr); if (sysRegTag == -1) { exceptionEncountered_ = true; exception_ = InstructionException::UnmappedSysReg; @@ -539,16 +585,16 @@ bool Instruction::decodeCsr() { destinationRegisters[destinationRegisterCount++] = { RegisterType::SYSTEM, static_cast(sysRegTag)}; - // First operand from metadata is rd, second operand from metadata is rs1 - if (csRegToRegister(metadata.operands[1].reg) != Instruction::ZERO_REGISTER) { + // First operand (0) from metadata is rd, second operand (1) from metadata is rs1 + if (csRegToRegister(metadata.operands[0].reg) != Instruction::ZERO_REGISTER) { destinationRegisters[destinationRegisterCount++] = - csRegToRegister(metadata.operands[1].reg); + csRegToRegister(metadata.operands[0].reg); } - if(metadata.operands[0].type == RISCV_OP_IMM) { - c_imm = metadata.operands[0].imm; - } else if (metadata.operands[0].type == RISCV_OP_REG) { - sourceRegisters[sourceRegisterCount] = csRegToRegister(metadata.operands[0].reg); + if(metadata.operands[1].type == RISCV_OP_IMM) { + c_imm = metadata.operands[1].imm; + } else if (metadata.operands[1].type == RISCV_OP_REG) { + sourceRegisters[sourceRegisterCount] = csRegToRegister(metadata.operands[1].reg); if (sourceRegisters[sourceRegisterCount] == Instruction::ZERO_REGISTER) { // Catch zero register references and pre-complete those operands diff --git a/src/lib/arch/riscv/Instruction_execute.cc b/src/lib/arch/riscv/Instruction_execute.cc index b7a4a822b4..a37d3750fd 100644 --- a/src/lib/arch/riscv/Instruction_execute.cc +++ b/src/lib/arch/riscv/Instruction_execute.cc @@ -4,6 +4,7 @@ #include "InstructionMetadata.hh" #include "simeng/arch/riscv/Instruction.hh" +#include "simeng/arch/riscv/SystemRegister.hh" namespace simeng { namespace arch { @@ -358,7 +359,9 @@ void Instruction::execute() { } case Opcode::RISCV_SLTIU: { // SLTIU rd,rs1,imm const uint64_t rs1 = operands[0].get(); - const uint64_t imm = static_cast(metadata.operands[2].imm); + uint64_t imm = metadata.operands[2].imm; + if (archRegWidth_==4) + imm = static_cast(imm); if (rs1 < imm) { results[0] = RegisterValue(static_cast(1), archRegWidth_); } else { @@ -460,13 +463,28 @@ void Instruction::execute() { results[0] = RegisterValue(instructionAddress_ + 4, archRegWidth_); break; } - // TODO EBREAK + case Opcode::RISCV_EBREAK: { // EBREAK // used to return control to a debugging environment pg27 20191213 + exceptionEncountered_ = true; + exception_ = InstructionException::SecureMonitorCall; + break; + } case Opcode::RISCV_ECALL: { // ECALL exceptionEncountered_ = true; exception_ = InstructionException::SupervisorCall; break; } + case Opcode::RISCV_MRET: { // MRET + branchAddress_ = (operands[0].get()) & ~1; // Set LSB of result to 0 + branchTaken_ = true; + + auto mstatus = operands[1].get(); + if (mstatus & MSTATUS_MPIE_MASK) + mstatus |= MSTATUS_MIE_MASK; + + results[0] = RegisterValue(mstatus, archRegWidth_); + break; + } case Opcode::RISCV_FENCE: { // FENCE // TODO currently modelled as a NOP as all codes are currently single // threaded "Informally, no other RISC-V hart or external device can @@ -709,28 +727,49 @@ void Instruction::execute() { results[0] = RegisterValue(static_cast(rs1 * rs2), archRegWidth_); break; } - // case Opcode::RISCV_MULH: {//MULH rd,rs1,rs2 - // return executionNYI(); - // - // const int64_t rs1 = operands[0].get(); - // const int64_t rs2 = operands[1].get(); - // results[0] = RegisterValue(mulhiss(rs1, rs2); - // break; - // } + case Opcode::RISCV_MULH: {//MULH rd,rs1,rs2 + int64_t result; + if (archRegWidth_==4) + { + const int64_t rs1 = operands[0].get(); + const int64_t rs2 = operands[1].get(); + result = (rs1*rs2)>>32; + } else { + const int64_t rs1 = operands[0].get(); + const int64_t rs2 = operands[1].get(); + //result = mulhiss(rs1, rs2); + return executionNYI(); + } + results[0] = RegisterValue(result, archRegWidth_); + break; + } case Opcode::RISCV_MULHU: { // MULHU rd,rs1,rs2 const uint64_t rs1 = operands[0].get(); const uint64_t rs2 = operands[1].get(); - results[0] = RegisterValue(mulhiuu(rs1, rs2), archRegWidth_); + uint64_t result; + if (archRegWidth_==4) + result = (rs1*rs2)>>32; + else + result = mulhiuu(rs1, rs2); + results[0] = RegisterValue(result, archRegWidth_); + break; + } + case Opcode::RISCV_MULHSU: {//MULHSU rd,rs1,rs2 + int64_t result; + if (archRegWidth_==4) + { + const int64_t rs1 = operands[0].get(); + const uint64_t rs2 = operands[1].get(); + result = (rs1*rs2)>>32; + } else { + const int64_t rs1 = operands[0].get(); + const uint64_t rs2 = operands[1].get(); + //result = mulhisu(rs1, rs2); + return executionNYI(); + } + results[0] = RegisterValue(result, archRegWidth_); break; } - // case Opcode::RISCV_MULHSU: {//MULHSU rd,rs1,rs2 - // return executionNYI(); - // - // const int64_t rs1 = operands[0].get(); - // const uint64_t rs2 = operands[1].get(); - // results[0] = RegisterValue(mulhisu(rs1, rs2); - // break; - // } case Opcode::RISCV_MULW: { // MULW rd,rs1,rs2 const uint32_t rs1 = operands[0].get(); const uint32_t rs2 = operands[1].get(); @@ -852,12 +891,14 @@ void Instruction::execute() { uint32_t new_csr_value = old_csr_value & ~(operands[1].get()); results[0] = RegisterValue(new_csr_value, 4); results[1] = RegisterValue(old_csr_value, 4); + break; } case Opcode::RISCV_CSRRCI: { uint32_t old_csr_value = operands[0].get(); uint32_t new_csr_value = old_csr_value & ~(c_imm); results[0] = RegisterValue(new_csr_value, 4); results[1] = RegisterValue(old_csr_value, 4); + break; } case Opcode::RISCV_CSRRS: { uint32_t old_csr_value = operands[0].get(); @@ -938,8 +979,12 @@ void Instruction::execute() { } break; } - case Opcode::RISCV_C_EBREAK: + case Opcode::RISCV_C_EBREAK: { + // used to return control to a debugging environment pg27 20191213 + exceptionEncountered_ = true; + exception_ = InstructionException::SecureMonitorCall; break; + } case Opcode::RISCV_C_FLD: break; case Opcode::RISCV_C_FLDSP: diff --git a/src/lib/arch/riscv/SystemRegister.cc b/src/lib/arch/riscv/SystemRegister.cc new file mode 100644 index 0000000000..05de188dcd --- /dev/null +++ b/src/lib/arch/riscv/SystemRegister.cc @@ -0,0 +1,124 @@ +#include "simeng/arch/riscv/Architecture.hh" + +namespace simeng { +namespace arch { +namespace riscv { + +bool MemoryMappedSystemRegisterBlock::put(uint16_t offset, const RegisterValue& value) +{ + auto it = memoryMappedSystemRegisters.upper_bound(offset); + if (it != memoryMappedSystemRegisters.begin() ) + { + it--; + if (offset-it->first < it->second->size()) { + it->second->put(value); + return true; + } + return false; + } + return false; +} + +bool MemoryMappedSystemRegisterBlock::get(uint16_t offset, RegisterValue& value) +{ + auto it = memoryMappedSystemRegisters.upper_bound(offset); + if (it != memoryMappedSystemRegisters.begin() ) + { + it--; + if (offset-it->first < it->second->size()) { + value = it->second->get(); + return true; + } + return false; + } + return false; +} + +/** Put/Get Memory Mapped Registers */ +bool SystemRegisterMemoryInterface::putMemoryMappedSystemRegister(uint64_t address, const RegisterValue& value) +{ + auto it = memoryMappedSystemRegisterBlocks_.upper_bound(address); + if (it != memoryMappedSystemRegisterBlocks_.begin() ) + { + it--; + if (address-it->first < it->second->size()) { + it->second->put(static_cast(address-it->first),value); + return true; + } + return false; + } + return false; +} + +bool SystemRegisterMemoryInterface::getMemoryMappedSystemRegister(uint64_t address, RegisterValue& value) +{ + auto it = memoryMappedSystemRegisterBlocks_.upper_bound(address); + if (it != memoryMappedSystemRegisterBlocks_.begin() ) + { + it--; + if (address-it->first < it->second->size()) { + it->second->get(static_cast(address-it->first),value); + return true; + } + return false; + } + return false; +} + +bool HostTargetInterface::put(uint16_t offset, const RegisterValue&value) +{ + switch(offset) { + case PAYLOAD_OFFSET : + { + char ch = value.getAsVector()[0]; + if (ch==3 || ch==1) + isHalted_ = true; + else + putchar(ch); + return true; + } + default : + return MemoryMappedSystemRegisterBlock::put(offset, value); + } +} + +int16_t Clint::updateSystemTimerRegisters(RegisterFileSet* regFile, const uint64_t iterations) +{ + uint64_t ticks = iterations-last_tick; + uint64_t mtime_val = mtime_.get().get(); + bool ticked = false; + + last_tick = iterations; + + // if large time passed then multiple timer ticks might be needed + while (ticks>=mtime_count) + { + ticks -= mtime_count; + mtime_count = mtime_freq; + mtime_val += 1; + ticked = true; + } + + // any remaining ticks taken of mtime countdown + if (ticks) + mtime_count -= ticks; + + mtime_.put(mtime_val); + + if (ticked) + { + // to improve execution speed only do interrupt checks when the timer ticks + // check if interrupts enabled + uint16_t mstatus_tag = static_cast(architecture_.getSystemRegisterTag(SYSREG_MSTATUS)); + auto mstatus_bits = regFile->get( { RegisterType::SYSTEM, mstatus_tag } ).get(); + if (mstatus_bits & MSTATUS_MIE_MASK) + if (mtime_val >= mtimecmp_.get().get()) + return static_cast(InterruptId::TIMER); + } + + return -1; +} + +} // namespace riscv +} // namespace arch +} // namespace simeng diff --git a/src/lib/kernel/Linux.cc b/src/lib/kernel/Linux.cc index 02de895080..bc060bbae4 100644 --- a/src/lib/kernel/Linux.cc +++ b/src/lib/kernel/Linux.cc @@ -29,10 +29,12 @@ void Linux::createProcess(const LinuxProcess& process) { .currentBrk = process.getHeapStart(), .initialStackPointer = process.getStackPointer(), .mmapRegion = process.getMmapStart(), - .pageSize = process.getPageSize()}); + .pageSize = process.getPageSize(), + }); processStates_.back().fileDescriptorTable.push_back(STDIN_FILENO); processStates_.back().fileDescriptorTable.push_back(STDOUT_FILENO); processStates_.back().fileDescriptorTable.push_back(STDERR_FILENO); + processStates_.back().process = &process; // Define vector of all currently supported special file paths & files. supportedSpecialFiles_.insert( @@ -649,5 +651,11 @@ int64_t Linux::writev(int64_t fd, const void* iovdata, int iovcnt) { return ::writev(hfd, reinterpret_cast(iovdata), iovcnt); } +/** Lookup symbol value from table in elf file. */ +bool Linux::lookupSymbolValue(const std::string symbol, uint64_t& value) +{ + processStates_[0].process->lookupSymbolValue(symbol,value); +} + } // namespace kernel } // namespace simeng diff --git a/src/lib/kernel/LinuxProcess.cc b/src/lib/kernel/LinuxProcess.cc index 31e36d7f48..3279652a91 100644 --- a/src/lib/kernel/LinuxProcess.cc +++ b/src/lib/kernel/LinuxProcess.cc @@ -24,7 +24,7 @@ LinuxProcess::LinuxProcess(const std::vector& commandLine, // Parse ELF file assert(commandLine.size() > 0); char* unwrappedProcImgPtr; - Elf elf(commandLine[0], &unwrappedProcImgPtr); + Elf elf(commandLine[0], &unwrappedProcImgPtr,symbols_); if (!elf.isValid()) { return; } @@ -178,5 +178,17 @@ void LinuxProcess::createStack(char** processImage) { (*processImage) + stackPointer_); } +bool LinuxProcess::lookupSymbolValue(const std::string symbol, uint64_t& value) const +{ + auto lookup = symbols_.find(symbol); + if (lookup==symbols_.end()) + return false; + else + { + value = lookup->second; + return true; + } +} + } // namespace kernel } // namespace simeng diff --git a/src/lib/models/emulation/Core.cc b/src/lib/models/emulation/Core.cc index 0eff31d5a5..d9268da25f 100644 --- a/src/lib/models/emulation/Core.cc +++ b/src/lib/models/emulation/Core.cc @@ -20,7 +20,8 @@ Core::Core(MemoryInterface& instructionMemory, MemoryInterface& dataMemory, isa_(isa), pc_(entryPoint), registerFileSet_(isa.getRegisterFileStructures()), - architecturalRegisterFileSet_(registerFileSet_) { + architecturalRegisterFileSet_(registerFileSet_), + interruptId_(-1) { // Pre-load the first instruction instructionMemory_.requestRead({pc_, FETCH_SIZE}); @@ -144,11 +145,16 @@ void Core::tick() { } execute(uop); - isa_.updateSystemTimerRegisters(®isterFileSet_, ticks_); + + interruptId_ = isa_.updateSystemTimerRegisters(®isterFileSet_, ticks_); } void Core::execute(std::shared_ptr& uop) { - uop->execute(); + + if (interruptId_>=0) + uop->raiseInterrupt(interruptId_); + else + uop->execute(); if (uop->exceptionEncountered()) { instructionsExecuted_++; diff --git a/src/lib/models/mcu/Core.cc b/src/lib/models/mcu/Core.cc new file mode 100644 index 0000000000..a085d7a3b4 --- /dev/null +++ b/src/lib/models/mcu/Core.cc @@ -0,0 +1,515 @@ +#include "simeng/models/mcu/Core.hh" + +#include +#include +#include +#include + +#include "simeng/arch/riscv/SystemRegister.hh" + +namespace simeng { +namespace models { +namespace mcu { + +// TODO: Replace with config options +const unsigned int blockSize = 16; +const unsigned int clockFrequency = 2.5 * 1e9; + +Core::Core(MemoryInterface& instructionMemory, MemoryInterface& dataMemory, + uint64_t processMemorySize, uint64_t entryPoint, + const arch::Architecture& isa, BranchPredictor& branchPredictor, YAML::Node config) + : dataMemory_(dataMemory), + isa_(isa), + registerFileSet_(isa.getRegisterFileStructures()), + architecturalRegisterFileSet_(registerFileSet_), + fetchToDecodeBuffer_(1, {}), + decodeToExecuteBuffer_(1, nullptr, 1), + completionSlots_(2, {1, nullptr}), + regDepMap_(isa.getRegisterFileStructures(), registerFileSet_), + fetchUnit_(fetchToDecodeBuffer_, instructionMemory, processMemorySize, + entryPoint, blockSize, isa, branchPredictor), + decodeUnit_(fetchToDecodeBuffer_, decodeToExecuteBuffer_, + branchPredictor, + [this](auto instruction) { return canIssue(instruction); }), + writebackUnit_(completionSlots_, registerFileSet_, [](auto insnId) {}, + [this](auto instruction) {removeDep(instruction);}, + [this](auto instruction) { return removeInstrOrderQ(instruction); }), + loadStoreQueue_(4, dataMemory, { completionSlots_.data()+1, 1 }, [this](auto regs, auto values) { forwardOperands(regs, values); }, false, 4, 4, 2, 1, 1), + executeUnit_( + decodeToExecuteBuffer_, completionSlots_[0], + [this](auto regs, auto values) { forwardOperands(regs, values); }, + [this](auto instruction) { loadStoreQueue_.addLoad(instruction); }, + [this](auto instruction) { loadStoreQueue_.addStore(instruction); }, + [this](auto instruction) { raiseException(instruction); }, + [this](auto instruction) { addInstrOrderQ(instruction); }, + [this]() { return isInterruptPending(); }, + branchPredictor, false), + interruptId_(-1) { + // Query and apply initial state + auto state = isa.getInitialState(); + applyStateChange(state); + + maxStallCycleTimeout = -1; + maxSimCycleTimeout = -1; + maxInstrTimeout = -1; + if(config["Core"]["EnableHaltCheck"].IsDefined() && config["Core"]["EnableHaltCheck"].as()) { + enableHaltCheck = true; + if(config["Core"]["MaxStallCycleTimeout"].IsDefined()) { + maxStallCycleTimeout = config["Core"]["MaxStallCycleTimeout"].as(); + } + if(config["Core"]["MaxSimCycleTimeout"].IsDefined()) { + maxSimCycleTimeout = config["Core"]["MaxSimCycleTimeout"].as(); + } + if(config["Core"]["MaxInstrTimeout"].IsDefined()) { + maxInstrTimeout = config["Core"]["MaxInstrTimeout"].as(); + } + } +}; + +void Core::checkHalting() { + if(!enableHaltCheck) return; + + if (((ticks_ - lastCommitTick_) > maxStallCycleTimeout)) { + std::cout << std::dec << "[SimEng:Core] Max Pipeline stall cycle timeout reached at tick: " << (ticks_ - lastCommitTick_) << std::endl; + hasHalted_ = true; + } + + if((ticks_ > maxSimCycleTimeout)) { + std::cout << std::dec << "[SimEng:Core] Max Simulation cycle timeout reached at tick: " << ticks_ << std::endl; + hasHalted_ = true; + } + + if((getInstructionsRetiredCount() > maxInstrTimeout)) { + std::cout << std::dec << "[SimEng:Core] Max Instruction count timeout reached at tick: " << ticks_ << std::endl; + hasHalted_ = true; + } +} + +void Core::tick() { + ticks_++; + + checkHalting(); + + if (hasHalted_) return; + + if (exceptionHandler_ != nullptr) { + processExceptionHandler(); + return; + } + + // Writeback must be ticked at start of cycle, to ensure decode reads the + // correct values + // writebackUnit_.tick(); + // for(std::shared_ptr inst: writebackUnit_.getInstsForTrace()) { + // uint16_t sysreg_instrret = isa_.getSystemRegisterTag(arch::riscv32::riscv_sysreg::SYSREG_INSTRRET); + // uint16_t sysreg_cycle = isa_.getSystemRegisterTag(arch::riscv32::riscv_sysreg::SYSREG_CYCLE); + // registerFileSet_.set(Register{0x2, sysreg_instrret}, RegisterValue(static_cast(writebackUnit_.getInstructionsWrittenCount()), 4)); + // registerFileSet_.set(Register{0x2, sysreg_cycle}, RegisterValue(static_cast(ticks_), 4)); + // isa_.updateInstrTrace(inst, ®isterFileSet_, ticks_); + // if(inst->isLoad()) { + // loadStoreQueue_.commitLoad(inst); + // } else if(inst->isStoreData()) { + // loadStoreQueue_.commitStore(inst); + // } + // lastCommitTick_ = ticks_; + // } + // writebackUnit_.traceFinished(); + + + loadStoreQueue_.processResponse(); + completionSlots_[1].tick(); + + // Tick units + fetchUnit_.tick(); + decodeUnit_.tick(); + executeUnit_.tick(); + + // Wipe any data read responses, as they will have been handled by this point + //dataMemory_.clearCompletedReads(); + + loadStoreQueue_.tick(); + // Writeback must be ticked at start of cycle, to ensure decode reads the + // correct values + writebackUnit_.tick(); + for(std::shared_ptr inst: writebackUnit_.getInstsForTrace()) { + uint16_t sysreg_instrret = isa_.getSystemRegisterTag(arch::riscv::riscv_sysreg::SYSREG_INSTRRET); + uint16_t sysreg_cycle = isa_.getSystemRegisterTag(arch::riscv::riscv_sysreg::SYSREG_CYCLE); + registerFileSet_.set(Register{0x2, sysreg_instrret}, RegisterValue(static_cast(writebackUnit_.getInstructionsWrittenCount()), 4)); + registerFileSet_.set(Register{0x2, sysreg_cycle}, RegisterValue(static_cast(ticks_), 4)); + isa_.updateInstrTrace(inst, ®isterFileSet_, ticks_); + if(inst->isLoad()) { + loadStoreQueue_.commitLoad(inst); + } else if(inst->isStoreData()) { + loadStoreQueue_.commitStore(inst); + } + lastCommitTick_ = ticks_; + } + // writebackUnit_.traceFinished(); + // Read pending registers for ready-to-execute uop; must happen after execute + // to allow operand forwarding to take place first + // readRegisters(); + + // Tick buffers + // Each unit must have wiped the entries at the head of the buffer after use, + // as these will now loop around and become the tail. + fetchToDecodeBuffer_.tick(); + decodeToExecuteBuffer_.tick(); + completionSlots_[0].tick(); + // for (auto& buffer : completionSlots_) { + // buffer.tick(); + // } + + // if (exceptionGenerated_) { + // handleException(); + // //fetchUnit_.requestFromPC(); + // return; + // } + + // Check for flush + if (executeUnit_.shouldFlush()) { + // Flush was requested at execute stage + // Update PC and wipe younger buffers (Fetch/Decode, Decode/Execute) + auto targetAddress = executeUnit_.getFlushAddress(); + + fetchUnit_.flushLoopBuffer(); + fetchUnit_.updatePC(targetAddress); + fetchUnit_.flushPredictor(targetAddress); + // Ensure instructions in the buffer if any are set to be flushed before being removed, this helps with removing the respective dependencies if any + decodeUnit_.purgeFlushed(); + executeUnit_.purgeFlushed(); + fetchToDecodeBuffer_.fill({}); + decodeToExecuteBuffer_.fill(nullptr); + loadStoreQueue_.purgeFlushed(); + regDepMap_.purgeFlushed(); + + flushes_++; + } else if (decodeUnit_.shouldFlush()) { + assert(false && "Decode unit should not generate flush"); + // Flush was requested at decode stage + // Update PC and wipe Fetch/Decode buffer. + auto targetAddress = decodeUnit_.getFlushAddress(); + + fetchUnit_.flushLoopBuffer(); + fetchUnit_.updatePC(targetAddress); + fetchToDecodeBuffer_.fill({}); + + flushes_++; + } + + if (exceptionGenerated_) { + handleException(); + //fetchUnit_.requestFromPC(); + return; + } + + fetchUnit_.requestFromPC(); + interruptId_ = isa_.updateSystemTimerRegisters(®isterFileSet_, ticks_); +} + +bool Core::hasHalted() const { + if (hasHalted_) { + return true; + } + + // Core is considered to have halted when the fetch unit has halted, there + // are no uops at the head of any buffer, and no exception is currently being + // handled. + bool decodePending = fetchToDecodeBuffer_.getHeadSlots()[0].size() > 0; + bool executePending = decodeToExecuteBuffer_.getHeadSlots()[0] != nullptr; + bool writebackPending = completionSlots_[0].getHeadSlots()[0] != nullptr; + writebackPending |= completionSlots_[1].getHeadSlots()[0] != nullptr; + + return (fetchUnit_.hasHalted() && !decodePending && !writebackPending && + !executePending && exceptionHandler_ == nullptr); +} + +const ArchitecturalRegisterFileSet& Core::getArchitecturalRegisterFileSet() + const { + return architecturalRegisterFileSet_; +} + +uint64_t Core::getInstructionsRetiredCount() const { + return writebackUnit_.getInstructionsWrittenCount(); +} + +uint64_t Core::getSystemTimer() const { + // TODO: This will need to be changed if we start supporting DVFS. + return ticks_ / (clockFrequency / 1e9); +} + +std::map Core::getStats() const { + auto retired = writebackUnit_.getInstructionsWrittenCount(); + auto ipc = retired / static_cast(ticks_); + std::ostringstream ipcStr; + ipcStr << std::setprecision(2) << ipc; + + // Sum up the branch stats reported across the execution units. + uint64_t totalBranchesExecuted = 0; + uint64_t totalBranchMispredicts = 0; + totalBranchesExecuted += executeUnit_.getBranchExecutedCount(); + totalBranchMispredicts += executeUnit_.getBranchMispredictedCount(); + auto branchMissRate = 100.0f * static_cast(totalBranchMispredicts) / + static_cast(totalBranchesExecuted); + std::ostringstream branchMissRateStr; + branchMissRateStr << std::setprecision(3) << branchMissRate << "%"; + + return {{"cycles", std::to_string(ticks_)}, + {"retired", std::to_string(retired)}, + {"ipc", ipcStr.str()}, + {"flushes", std::to_string(flushes_)}, + {"branch.executed", std::to_string(totalBranchesExecuted)}, + {"branch.mispredict", std::to_string(totalBranchMispredicts)}, + {"branch.missrate", branchMissRateStr.str()}, + {"lsu.ldminlatency", std::to_string(loadStoreQueue_.getMinLdLat())}, + {"lsu.ldmaxlatency", std::to_string(loadStoreQueue_.getMaxLdLat())}, + {"lsu.ldavglatency", std::to_string(loadStoreQueue_.getAvgLdLat())}}; +} + +void Core::raiseException(const std::shared_ptr& instruction) { + exceptionGenerated_ = true; + exceptionGeneratingInstruction_ = instruction; +} + +void Core::handleException() { + exceptionGenerated_ = false; + + exceptionHandler_ = + isa_.handleException(exceptionGeneratingInstruction_, *this, dataMemory_); + + processExceptionHandler(); +// isa_.updateInstrTrace(exceptionGeneratingInstruction_, ®isterFileSet_, ticks_); +// lastCommitTick_ = ticks_; +// assert(removeInstrOrderQ(exceptionGeneratingInstruction_) && "Unexpected instruction at the top of inorder instr queue on exception"); + + //TODO: This is not a good point to flush the pipeline if the exception is not changing the PC. + + // Flush pipeline +// decodeUnit_.purgeFlushed(); +// executeUnit_.purgeFlushed(); +// fetchToDecodeBuffer_.fill({}); +// decodeToExecuteBuffer_.fill(nullptr); +// loadStoreQueue_.purgeFlushed(); +// completionSlots_[0].fill(nullptr); +// completionSlots_[1].fill(nullptr); +// regDepMap_.purgeFlushed(); +} + +void Core::processExceptionHandler() { + assert(exceptionHandler_ != nullptr && + "Attempted to process an exception handler that wasn't present"); + if (dataMemory_.hasPendingRequests()) { + // Must wait for all memory requests to complete before processing the + // exception + return; + } + + auto success = exceptionHandler_->tick(); + if (!success) { + // Exception handler requires further ticks to complete + return; + } + + const auto& result = exceptionHandler_->getResult(); + + if (result.fatal) { + hasHalted_ = true; + std::cout << "[SimEng:Core] Halting due to fatal exception" << std::endl; + } else { + //fetchUnit_.flushLoopBuffer(); + fetchUnit_.updatePC(result.instructionAddress); + applyStateChange(result.stateChange); + } + + exceptionHandler_ = nullptr; +} + +void Core::loadData(const std::shared_ptr& instruction) { + const auto& addresses = instruction->getGeneratedAddresses(); + for (const auto& target : addresses) { + dataMemory_.requestRead(target); + } + + // NOTE: This model only supports zero-cycle data memory models, and will not + // work unless data requests are handled synchronously. + for (const auto& response : dataMemory_.getCompletedReads()) { + instruction->supplyData(response.target.address, response.data); + } + + assert(instruction->hasAllData() && + "Load instruction failed to obtain all data this cycle"); + + instruction->execute(); + + if (instruction->isStoreData()) { + storeData(instruction); + } +} + +void Core::storeData(const std::shared_ptr& instruction) { + if (instruction->isStoreAddress()) { + auto addresses = instruction->getGeneratedAddresses(); + for (auto const& target : addresses) { + previousAddresses_.push(target); + } + } + if (instruction->isStoreData()) { + const auto data = instruction->getData(); + for (size_t i = 0; i < data.size(); i++) { + dataMemory_.requestWrite(previousAddresses_.front(), data[i]); + previousAddresses_.pop(); + } + } +} + +void Core::forwardOperands(const span& registers, + const span& values) { + return; + // assert(registers.size() == values.size() && + // "Mismatched register and value vector sizes"); + + // const auto& uop = decodeToExecuteBuffer_.getTailSlots()[0]; + // if (uop == nullptr) { + // return; + // } + + // auto sourceRegisters = uop->getOperandRegisters(); + // for (size_t i = 0; i < registers.size(); i++) { + // // Check each forwarded register vs source operands and supply for each + // // match + // for (size_t operand = 0; operand < sourceRegisters.size(); operand++) { + // const auto& sourceReg = sourceRegisters[operand]; + // if (uop->canExecute()) { + // return; + // } + // if (sourceReg == registers[i] && !uop->isOperandReady(operand)) { + // // Supply the operand + // uop->supplyOperand(operand, values[i]); + // } + // } + // } +} + +bool Core::canIssue(const std::shared_ptr& uop) { + if (uop->isSysCall() && inorderIQ_.size() > 0) { + return false; + } + if((uop->isLoad() || uop->isStoreData()) && loadStoreQueue_.isBusy()) { + return false; + } + if (regDepMap_.canRead(uop) && regDepMap_.canWrite(uop)) { + regDepMap_.insert(uop); + return true; + } + return false; +} + +void Core::removeDep(const std::shared_ptr& uop) { + regDepMap_.remove(uop); +} + +void Core::readRegisters() { + if (decodeToExecuteBuffer_.isStalled()) { + return; + } + + const auto& uop = decodeToExecuteBuffer_.getTailSlots()[0]; + if (uop == nullptr) { + return; + } + + // Register read + // Identify missing registers and supply values + const auto& sourceRegisters = uop->getOperandRegisters(); + for (size_t i = 0; i < sourceRegisters.size(); i++) { + const auto& reg = sourceRegisters[i]; + if (!uop->isOperandReady(i)) { + uop->supplyOperand(i, registerFileSet_.get(reg)); + } + } +} + +void Core::applyStateChange(const arch::ProcessStateChange& change) { + // Update registers in accoradance with the ProcessStateChange type + switch (change.type) { + case arch::ChangeType::INCREMENT: { + for (size_t i = 0; i < change.modifiedRegisters.size(); i++) { + registerFileSet_.set( + change.modifiedRegisters[i], + registerFileSet_.get(change.modifiedRegisters[i]).get() + + change.modifiedRegisterValues[i].get()); + } + break; + } + case arch::ChangeType::DECREMENT: { + for (size_t i = 0; i < change.modifiedRegisters.size(); i++) { + registerFileSet_.set( + change.modifiedRegisters[i], + registerFileSet_.get(change.modifiedRegisters[i]).get() - + change.modifiedRegisterValues[i].get()); + } + break; + } + default: { // arch::ChangeType::REPLACEMENT + // If type is ChangeType::REPLACEMENT, set new values + for (size_t i = 0; i < change.modifiedRegisters.size(); i++) { + registerFileSet_.set(change.modifiedRegisters[i], + change.modifiedRegisterValues[i]); + } + break; + } + } + + // Update memory + // TODO: Analyse if ChangeType::INCREMENT or ChangeType::DECREMENT case is + // required for memory changes + for (size_t i = 0; i < change.memoryAddresses.size(); i++) { + dataMemory_.requestWrite(change.memoryAddresses[i], + change.memoryAddressValues[i]); + } +} + +void Core::handleLoad(const std::shared_ptr& instruction) { + loadData(instruction); + if (instruction->exceptionEncountered()) { + raiseException(instruction); + return; + } + + forwardOperands(instruction->getDestinationRegisters(), + instruction->getResults()); + // Manually add the instruction to the writeback input buffer + completionSlots_[1].getTailSlots()[0] = instruction; +} + +void Core::addInstrOrderQ(const std::shared_ptr& insn) { + //std::cout << std::dec << ticks_ << ": Adding instruction at address: 0x" << std::hex << insn->getInstructionAddress() << std::endl; + inorderIQ_.push_back(insn); +} + +bool Core::removeInstrOrderQ(const std::shared_ptr& insn) { + if (insn == inorderIQ_.front()) { + //std::cout << std::dec << ticks_ << ": Removing instruction at address: 0x" << std::hex << insn->getInstructionAddress() << std::endl; + // if(insn->exceptionEncountered()) { + // exceptionGenerated_ = true; + // exceptionGeneratingInstruction_ = insn; + // handleException(); + // } + inorderIQ_.pop_front(); + return true; + } else { + return false; + } +} + +int16_t Core::isInterruptPending() { + if (interruptId_>=0) { + std::cout << std::dec << "[SimEng:Core] Interrupt Pending id: " << interruptId_ << ", at tick: " << ticks_ << std::endl; + return interruptId_; + } else { + return -1; + } +} + +} // namespace mcu +} // namespace models +} // namespace simeng diff --git a/src/lib/pipeline/FetchUnit.cc b/src/lib/pipeline/FetchUnit.cc index ade3d307c0..28d2eaba51 100644 --- a/src/lib/pipeline/FetchUnit.cc +++ b/src/lib/pipeline/FetchUnit.cc @@ -129,7 +129,7 @@ void FetchUnit::tick() { BranchPrediction prediction = {false, 0}; if (macroOp[0]->isBranch()) { prediction = branchPredictor_.predict(pc_, macroOp[0]->getBranchType(), - macroOp[0]->getKnownTarget()); + macroOp[0]->getKnownOffset()); macroOp[0]->setBranchPrediction(prediction); } diff --git a/src/lib/pipeline_hi/DecodeUnit.cc b/src/lib/pipeline_hi/DecodeUnit.cc new file mode 100644 index 0000000000..86a298a1a3 --- /dev/null +++ b/src/lib/pipeline_hi/DecodeUnit.cc @@ -0,0 +1,117 @@ +#include "simeng/pipeline_hi/DecodeUnit.hh" + +#include + +namespace simeng { +namespace pipeline_hi { + +DecodeUnit::DecodeUnit(PipelineBuffer& input, + PipelineBuffer>& output, + BranchPredictor& predictor, + std::function&)> canIssue) + : input_(input), output_(output), predictor_(predictor), canIssue_(canIssue){}; + +void DecodeUnit::tick() { + // Stall if output buffer is stalled + if (output_.isStalled()) { + input_.stall(true); + return; + } + + shouldFlush_ = false; + input_.stall(false); + + // Stall if internal uop is overpopulated, otherwise add uops from input to + // internal buffer + if (microOps_.size() >= output_.getWidth()) { + input_.stall(true); + } else { + // Populate uop buffer with newly fetched macro-ops + for (size_t slot = 0; slot < input_.getWidth(); slot++) { + auto& macroOp = input_.getHeadSlots()[slot]; + + if (macroOp.size() == 0) { + // Nothing to process for this macro-op + continue; + } + + for (uint8_t index = 0; index < macroOp.size(); index++) { + microOps_.push_back(std::move(macroOp[index])); + } + + input_.getHeadSlots()[slot].clear(); + } + } + + // Process uops in buffer + for (size_t slot = 0; slot < output_.getWidth(); slot++) { + // If there's no more uops to decode, exit loop early + if (!microOps_.size()) break; + + //Check for dependencies before forwarding to next stage + //Stop-gap implementation + if (!canIssue_(microOps_.front())) break; + + // Move uop to output buffer and remove from internal buffer + auto& uop = (output_.getTailSlots()[slot] = std::move(microOps_.front())); + microOps_.pop_front(); + + // Check preliminary branch prediction results now that the instruction is + // decoded. Identifies: + // - Non-branch instructions mistakenly predicted as branches + // - Incorrect targets for immediate branches + // auto [misprediction, correctAddress] = uop->checkEarlyBranchMisprediction(); + // if (misprediction) { + // earlyFlushes_++; + // shouldFlush_ = true; + // pc_ = correctAddress; + + // if (!uop->isBranch()) { + // // Non-branch incorrectly predicted as a branch; let the predictor know + // predictor_.update(uop->getInstructionAddress(), false, pc_, + // uop->getBranchType()); + // } + // // Remove macro-operations in microOps_ buffer after macro-operation + // // decoded in this cycle + // auto uopIt = microOps_.begin(); + // // Find first microOps_ entry not belonging to same address as flushing + // // instruction + // while (uopIt != microOps_.end()) { + // if ((*uopIt)->getInstructionAddress() != uop->getInstructionAddress()) { + // break; + // } else { + // uopIt++; + // } + // } + // // Remove all entries after first macro-operation in buffer + // while (uopIt != microOps_.end()) { + // uopIt = microOps_.erase(uopIt); + // } + + // // Skip processing remaining uops, as they need to be flushed + // break; + // } + } +} + +bool DecodeUnit::shouldFlush() const { return shouldFlush_; } +uint64_t DecodeUnit::getFlushAddress() const { return pc_; } +uint64_t DecodeUnit::getEarlyFlushes() const { return earlyFlushes_; }; + +void DecodeUnit::purgeFlushed() { + if (output_.getTailSlots()[0] != nullptr) { + output_.getTailSlots()[0]->setFlushed(); + } + + if (input_.getHeadSlots()[0].size() != 0) { + input_.getHeadSlots()[0][0]->setFlushed(); + } + + if (microOps_.size()) + microOps_.front()->setFlushed(); + microOps_.clear(); + input_.stall(false); +} + +} // namespace pipeline_hi +} // namespace simeng diff --git a/src/lib/pipeline_hi/DispatchIssueUnit.cc b/src/lib/pipeline_hi/DispatchIssueUnit.cc new file mode 100644 index 0000000000..93ce9fa324 --- /dev/null +++ b/src/lib/pipeline_hi/DispatchIssueUnit.cc @@ -0,0 +1,269 @@ +#include "simeng/pipeline_hi/DispatchIssueUnit.hh" + +#include +#include + +namespace simeng { +namespace pipeline_hi { + +DispatchIssueUnit::DispatchIssueUnit( + PipelineBuffer>& fromRename, + std::vector>>& issuePorts, + const RegisterFileSet& registerFileSet, PortAllocator& portAllocator, + const std::vector& physicalRegisterStructure, YAML::Node config) + : input_(fromRename), + issuePorts_(issuePorts), + registerFileSet_(registerFileSet), + scoreboard_(physicalRegisterStructure.size()), + dependencyMatrix_(physicalRegisterStructure.size()), + portAllocator_(portAllocator) { + // Initialise scoreboard + for (size_t type = 0; type < physicalRegisterStructure.size(); type++) { + scoreboard_[type].assign(physicalRegisterStructure[type], true); + dependencyMatrix_[type].resize(physicalRegisterStructure[type]); + } + // Create set of reservation station structs with correct issue port + // mappings + for (size_t i = 0; i < config["Reservation-Stations"].size(); i++) { + // Iterate over each reservation station in config + auto reservation_station = config["Reservation-Stations"][i]; + // Create ReservationStation struct to be stored + ReservationStation rs = { + reservation_station["Size"].as(), + reservation_station["Dispatch-Rate"].as(), + 0, + {}}; + // Resize rs port attribute to match what's defined in config file + rs.ports.resize(reservation_station["Ports"].size()); + for (size_t j = 0; j < reservation_station["Ports"].size(); j++) { + // Iterate over issue ports in config + uint16_t issue_port = reservation_station["Ports"][j].as(); + rs.ports[j].issuePort = issue_port; + // Add port mapping entry, resizing vector if needed + if ((issue_port + 1) > portMapping_.size()) { + portMapping_.resize((issue_port + 1)); + } + portMapping_[issue_port] = {i, j}; + } + reservationStations_.push_back(rs); + } + for (uint16_t i = 0; i < reservationStations_.size(); i++) + flushed_.emplace(i, std::initializer_list>{}); +} + +void DispatchIssueUnit::tick() { + input_.stall(false); + + /** Stores the number of instructions dispatched for each + * reservation station. */ + std::vector dispatches( + static_cast(reservationStations_.size()), 0); + + for (size_t slot = 0; slot < input_.getWidth(); slot++) { + auto& uop = input_.getHeadSlots()[slot]; + if (uop == nullptr) { + continue; + } + + const std::vector& supportedPorts = uop->getSupportedPorts(); + if (uop->exceptionEncountered()) { + // Exception; mark as ready to commit, and remove from pipeline + uop->setCommitReady(); + input_.getHeadSlots()[slot] = nullptr; + continue; + } + // Allocate issue port to uop + uint16_t port = portAllocator_.allocate(supportedPorts); + uint16_t RS_Index = portMapping_[port].first; + uint16_t RS_Port = portMapping_[port].second; + assert(RS_Index < reservationStations_.size() && + "Allocated port inaccessible"); + ReservationStation& rs = reservationStations_[RS_Index]; + + // When appropriate, stall uop or input buffer if stall buffer full + if (rs.currentSize == rs.capacity || + dispatches[RS_Index] == rs.dispatchRate) { + // Deallocate port given + portAllocator_.deallocate(port); + input_.stall(true); + rsStalls_++; + return; + } + + // Assume the uop will be ready + bool ready = true; + + // Register read + // Identify remaining missing registers and supply values + auto& sourceRegisters = uop->getOperandRegisters(); + for (uint16_t i = 0; i < sourceRegisters.size(); i++) { + const auto& reg = sourceRegisters[i]; + + if (!uop->isOperandReady(i)) { + // The operand hasn't already been supplied + if (scoreboard_[reg.type][reg.tag]) { + // The scoreboard says it's ready; read and supply the register value + uop->supplyOperand(i, registerFileSet_.get(reg)); + } else { + // This register isn't ready yet. Register this uop to the dependency + // matrix for a more efficient lookup later + dependencyMatrix_[reg.type][reg.tag].push_back({uop, port, i}); + ready = false; + } + } + } + + // Set scoreboard for all destination registers as not ready + auto& destinationRegisters = uop->getDestinationRegisters(); + for (const auto& reg : destinationRegisters) { + scoreboard_[reg.type][reg.tag] = false; + } + + // Increment dispatches made and RS occupied entries size + dispatches[RS_Index]++; + rs.currentSize++; + + if (ready) { + rs.ports[RS_Port].ready.push_back(std::move(uop)); + } + + input_.getHeadSlots()[slot] = nullptr; + } +} + +void DispatchIssueUnit::issue() { + int issued = 0; + // Check the ready queues, and issue an instruction from each if the + // corresponding port isn't blocked + for (size_t i = 0; i < issuePorts_.size(); i++) { + ReservationStation& rs = reservationStations_[portMapping_[i].first]; + auto& queue = rs.ports[portMapping_[i].second].ready; + if (issuePorts_[i].isStalled()) { + if (queue.size() > 0) { + portBusyStalls_++; + } + continue; + } + + if (queue.size() > 0) { + auto& uop = queue.front(); + issuePorts_[i].getTailSlots()[0] = std::move(uop); + queue.pop_front(); + + // Inform the port allocator that an instruction issued + portAllocator_.issued(i); + issued++; + + assert(rs.currentSize > 0); + rs.currentSize--; + } + } + + if (issued == 0) { + for (const auto& rs : reservationStations_) { + if (rs.currentSize != 0) { + backendStalls_++; + return; + } + } + frontendStalls_++; + } +} + +void DispatchIssueUnit::forwardOperands(const span& registers, + const span& values) { + assert(registers.size() == values.size() && + "Mismatched register and value vector sizes"); + + for (size_t i = 0; i < registers.size(); i++) { + const auto& reg = registers[i]; + // Flag scoreboard as ready now result is available + scoreboard_[reg.type][reg.tag] = true; + + // Supply the value to all dependent uops + const auto& dependents = dependencyMatrix_[reg.type][reg.tag]; + for (auto& entry : dependents) { + entry.uop->supplyOperand(entry.operandIndex, values[i]); + if (entry.uop->canExecute()) { + // Add the now-ready instruction to the relevant ready queue + auto rsInfo = portMapping_[entry.port]; + reservationStations_[rsInfo.first].ports[rsInfo.second].ready.push_back( + std::move(entry.uop)); + } + } + + // Clear the dependency list + dependencyMatrix_[reg.type][reg.tag].clear(); + } +} + +void DispatchIssueUnit::setRegisterReady(Register reg) { + scoreboard_[reg.type][reg.tag] = true; +} + +void DispatchIssueUnit::purgeFlushed() { + for (size_t i = 0; i < reservationStations_.size(); i++) { + // Search the ready queues for flushed instructions and remove them + auto& rs = reservationStations_[i]; + for (auto& port : rs.ports) { + // Ready queue + auto readyIter = port.ready.begin(); + while (readyIter != port.ready.end()) { + auto& uop = *readyIter; + if (uop->isFlushed()) { + portAllocator_.deallocate(port.issuePort); + readyIter = port.ready.erase(readyIter); + assert(rs.currentSize > 0); + rs.currentSize--; + } else { + readyIter++; + } + } + } + } + + // Collect flushed instructions and remove them from the dependency matrix + for (auto& it : flushed_) it.second.clear(); + for (auto& registerType : dependencyMatrix_) { + for (auto& dependencyList : registerType) { + auto it = dependencyList.begin(); + while (it != dependencyList.end()) { + auto& entry = *it; + if (entry.uop->isFlushed()) { + auto rsIndex = portMapping_[entry.port].first; + if (!flushed_[rsIndex].count(entry.uop)) { + flushed_[rsIndex].insert(entry.uop); + portAllocator_.deallocate(entry.port); + } + it = dependencyList.erase(it); + } else { + it++; + } + } + } + } + + // Update reservation station size + for (uint8_t i = 0; i < reservationStations_.size(); i++) { + assert(reservationStations_[i].currentSize >= flushed_[i].size()); + reservationStations_[i].currentSize -= flushed_[i].size(); + } +} + +uint64_t DispatchIssueUnit::getRSStalls() const { return rsStalls_; } +uint64_t DispatchIssueUnit::getFrontendStalls() const { + return frontendStalls_; +} +uint64_t DispatchIssueUnit::getBackendStalls() const { return backendStalls_; } +uint64_t DispatchIssueUnit::getPortBusyStalls() const { + return portBusyStalls_; +} + +void DispatchIssueUnit::getRSSizes(std::vector& sizes) const { + for (auto& rs : reservationStations_) { + sizes.push_back(rs.capacity - rs.currentSize); + } +} + +} // namespace pipeline_hi +} // namespace simeng diff --git a/src/lib/pipeline_hi/ExecuteUnit.cc b/src/lib/pipeline_hi/ExecuteUnit.cc new file mode 100644 index 0000000000..e3b5089d5c --- /dev/null +++ b/src/lib/pipeline_hi/ExecuteUnit.cc @@ -0,0 +1,255 @@ +#include "simeng/pipeline_hi/ExecuteUnit.hh" + +#include +#include + +namespace simeng { +namespace pipeline_hi { + +ExecuteUnit::ExecuteUnit( + PipelineBuffer>& input, + PipelineBuffer>& output, + std::function, span)> forwardOperands, + std::function&)> handleLoad, + std::function&)> handleStore, + std::function&)> raiseException, + std::function&)> addInstrOrderQ, + std::function isInterruptPending, + BranchPredictor& predictor, bool pipelined, + const std::vector& blockingGroups) + : input_(input), + output_(output), + forwardOperands_(forwardOperands), + handleLoad_(handleLoad), + handleStore_(handleStore), + raiseException_(raiseException), + addInstrOrderQ_(addInstrOrderQ), + isInterruptPending_(isInterruptPending), + predictor_(predictor), + pipelined_(pipelined), + blockingGroups_(blockingGroups) {} + +void ExecuteUnit::tick() { + tickCounter_++; + shouldFlush_ = false; + + if (stallUntil_ <= tickCounter_) { + input_.stall(false); + // Input isn't stalled; process instruction and add to pipeline + + auto& uop = input_.getHeadSlots()[0]; + if (uop != nullptr) { + if (!uop->isFlushed()) { + // Retrieve execution latency from the instruction + auto latency = uop->getLatency(); + cycles_++; + // Block uop execution if appropriate + if (std::find(blockingGroups_.begin(), blockingGroups_.end(), + uop->getGroup()) != blockingGroups_.end()) { + if (operationsStalled_.size() == 0) { + // Add uop to pipeline + pipeline_.push_back({nullptr, tickCounter_ + latency - 1}); + pipeline_.back().insn = std::move(uop); + operationsStalled_.push_back(pipeline_.back().insn); + } else { + // Stall execution start cycle + operationsStalled_.push_back(nullptr); + operationsStalled_.back() = std::move(uop); + } + } else if (latency == 1 && pipeline_.size() == 0) { + // Pipeline is empty and insn will execute this cycle; bypass + execute(uop); + } else { + // This instruction may take more than a single cycle; check for a + // stall. For unpipelined units, the unit will stall for the full + // instruction duration. + auto stallCycles = + pipelined_ ? uop->getStallCycles() : uop->getLatency(); + if (stallCycles > 1) { + stallUntil_ = tickCounter_ + stallCycles - 1; + input_.stall(true); + } + + // Add insn to pipeline + pipeline_.push_back({nullptr, tickCounter_ + latency - 1}); + pipeline_.back().insn = std::move(uop); + } + } + input_.getHeadSlots()[0] = nullptr; + } + } + + if (pipeline_.size() == 0) { + return; + } + + auto& head = pipeline_.front(); + if (head.readyAt <= tickCounter_) { + // Check if the completion of an operation would unblock + // another stalled operation. + if (std::find(blockingGroups_.begin(), blockingGroups_.end(), + head.insn->getGroup()) != blockingGroups_.end()) { + operationsStalled_.pop_front(); + if (operationsStalled_.size() > 0) { + // Add uop to pipeline + auto& uop = operationsStalled_.front(); + pipeline_.push_back({nullptr, tickCounter_ + uop->getLatency() - 1}); + pipeline_.back().insn = std::move(uop); + operationsStalled_.front() = pipeline_.back().insn; + } + } + execute(head.insn); + pipeline_.pop_front(); + } +} + +void ExecuteUnit::execute(std::shared_ptr& uop) { + assert(uop->canExecute() && + "Attempted to execute an instruction before it was ready"); + + int16_t pendingInterruptId = isInterruptPending_(); + if(pendingInterruptId>=0) { + //std::cout << std::hex << "Execution encountered pending interrupt, PC 0x" << uop->getInstructionAddress() << std::endl; + uop->raiseInterrupt(pendingInterruptId); + uop->setFlushed(); + raiseException_(uop); + shouldFlush_ = true; + return; + } + + addInstrOrderQ_(uop); + if (uop->exceptionEncountered()) { + // Exception encountered prior to execution + // TODO: Identify whether this can be removed; executing an + // exception-encountered uop would have to be guaranteed to be safe + raiseException_(uop); + return; + } + + if (uop->isLoad()) { + uop->generateAddresses(); + if (uop->exceptionEncountered()) { + // Exception; don't pass handle load function + raiseException_(uop); + return; + } + handleLoad_(uop); + return; + } else if (uop->isStoreAddress() || uop->isStoreData()) { + if (uop->isStoreAddress()) { + uop->generateAddresses(); + } + if (uop->isStoreData()) { + uop->execute(); + } + handleStore_(uop); + } else { + uop->execute(); + } + + if (uop->exceptionEncountered()) { + // Exception; don't forward results, don't pass uop forward + raiseException_(uop); + shouldFlush_ = true; + //TODO: Let the instruction go into writeback stage + // return; + } + + if (uop->isBranch()) { + pc_ = uop->getBranchAddress(); + + // Update branch predictor with branch results + predictor_.update(uop->getInstructionAddress(), uop->wasBranchTaken(), pc_, + uop->getBranchType()); + + // Update the branch instruction counter + branchesExecuted_++; + + if (uop->wasBranchMispredicted()) { + //std::cout << std::dec << tickCounter_ << std::hex << ": Misprediction iaddr: 0x" << uop->getInstructionAddress() << ", " << uop->getBranchPrediction().taken << std::endl; + // Misprediction; flush the pipeline + shouldFlush_ = true; + flushAfter_ = uop->getInstructionId(); + // Update the branch misprediction counter + branchMispredicts_++; + } + } + + // Operand forwarding; allows a dependent uop to execute next cycle + //if (!uop->isMul() && !uop->isDiv()) { + // forwardOperands_(uop->getDestinationRegisters(), uop->getResults()); + //} + + output_.getTailSlots()[0] = std::move(uop); +} + +bool ExecuteUnit::shouldFlush() const { return shouldFlush_; } +uint64_t ExecuteUnit::getFlushAddress() const { return pc_; } +uint64_t ExecuteUnit::getFlushSeqId() const { return flushAfter_; } + +void ExecuteUnit::purgeFlushed() { + auto& uop = input_.getHeadSlots()[0]; + if (uop != nullptr) { + if (!uop->isFlushed()) { + uop->setFlushed(); + } + } + + if (pipeline_.size() == 0) { + return; + } + + // If the newest instruction has been flushed, clear any stalls. + if (pipeline_.back().insn->isFlushed()) { + stallUntil_ = tickCounter_; + } + + // Iterate over the pipeline and remove flushed instructions + auto it = pipeline_.begin(); + while (it != pipeline_.end()) { + auto& entry = *it; + if (entry.insn->isFlushed()) { + it = pipeline_.erase(it); + } else { + it++; + } + } + + // If first blocking in-flight instruction is flushed, ensure another + // non-flushed stalled instruction takes it place in the pipeline if + // available. + bool replace = false; + if (operationsStalled_.size() > 0 && + operationsStalled_.front()->isFlushed()) { + replace = true; + } + auto itStall = operationsStalled_.begin(); + while (itStall != operationsStalled_.end()) { + auto& entry = *itStall; + if (entry->isFlushed()) { + itStall = operationsStalled_.erase(itStall); + } else { + itStall++; + } + } + + if (replace && operationsStalled_.size() > 0) { + // Add uop to pipeline + auto& uop = operationsStalled_.front(); + pipeline_.push_back({nullptr, tickCounter_ + uop->getLatency() - 1}); + pipeline_.back().insn = std::move(uop); + operationsStalled_.front() = pipeline_.back().insn; + } +} + +uint64_t ExecuteUnit::getBranchExecutedCount() const { + return branchesExecuted_; +} +uint64_t ExecuteUnit::getBranchMispredictedCount() const { + return branchMispredicts_; +} + +uint64_t ExecuteUnit::getCycles() const { return cycles_; } + +} // namespace pipeline_hi +} // namespace simeng diff --git a/src/lib/pipeline_hi/FetchUnit.cc b/src/lib/pipeline_hi/FetchUnit.cc new file mode 100644 index 0000000000..4de190efca --- /dev/null +++ b/src/lib/pipeline_hi/FetchUnit.cc @@ -0,0 +1,265 @@ +#include "simeng/pipeline_hi/FetchUnit.hh" + +namespace simeng { +namespace pipeline_hi { + +FetchUnit::FetchUnit(PipelineBuffer& output, + MemoryInterface& instructionMemory, + uint64_t programByteLength, uint64_t entryPoint, + uint8_t blockSize, const arch::Architecture& isa, + BranchPredictor& branchPredictor) + : output_(output), + pc_(entryPoint), + instructionMemory_(instructionMemory), + programByteLength_(programByteLength), + isa_(isa), + branchPredictor_(branchPredictor), + blockSize_(blockSize), + blockMask_(~(blockSize_ - 1)) { + assert(blockSize_ >= isa_.getMaxInstructionSize() && + "fetch block size must be larger than the largest instruction"); + fetchBuffer_ = new uint8_t[2 * blockSize_]; + requestFromPC(); +} + +FetchUnit::~FetchUnit() { delete[] fetchBuffer_; } + +void FetchUnit::tick() { + if (output_.isStalled()) { + return; + } + + if (hasHalted_ || waitSCEval_) { + return; + } + + // If loop buffer has been filled, fill buffer to decode + // if (loopBufferState_ == LoopBufferState::SUPPLYING) { + // auto outputSlots = output_.getTailSlots(); + // for (size_t slot = 0; slot < output_.getWidth(); slot++) { + // auto& macroOp = outputSlots[slot]; + // auto bytesRead = isa_.predecode(&(loopBuffer_.front().encoding), + // loopBuffer_.front().instructionSize, + // loopBuffer_.front().address, macroOp); + + // assert(bytesRead != 0 && "predecode failure for loop buffer entry"); + + // // Set prediction to recorded value during loop buffer filling + // if (macroOp[0]->isBranch()) { + // macroOp[0]->setBranchPrediction(loopBuffer_.front().prediction); + // } + + // // Cycle queue by moving front entry to back + // loopBuffer_.push_back(loopBuffer_.front()); + // loopBuffer_.pop_front(); + // } + // return; + // } + + // Pointer to the instruction data to decode from + const uint8_t* buffer; + uint8_t bufferOffset; + + // Check if more instruction data is required + if (bufferedBytes_ < isa_.getMaxInstructionSize()) { + // Calculate the address of the next fetch block + uint64_t blockAddress; + if (bufferedBytes_ > 0) { + // There is already some data in the buffer, so check for the next block + bufferOffset = 0; + blockAddress = pc_ + bufferedBytes_; + assert((blockAddress & ~blockMask_) == 0 && "misaligned fetch buffer"); + } else { + // Fetch buffer is empty, so start from the PC + blockAddress = pc_ & blockMask_; + bufferOffset = pc_ - blockAddress; + } + + // Find fetched memory that matches the desired block + const auto& fetched = instructionMemory_.getCompletedReads(); + + size_t fetchIndex; + for (fetchIndex = 0; fetchIndex < fetched.size(); fetchIndex++) { + if (fetched[fetchIndex].target.address == blockAddress) { + break; + } + } + if (fetchIndex == fetched.size()) { + // Need to wait for fetched instructions + return; + } + + // TODO: Handle memory faults + assert(fetched[fetchIndex].data && "Memory read failed"); + const uint8_t* fetchData = fetched[fetchIndex].data.getAsVector(); + + // Copy fetched data to fetch buffer after existing data + std::memcpy(fetchBuffer_ + bufferedBytes_, fetchData + bufferOffset, + blockSize_ - bufferOffset); + + bufferedBytes_ += blockSize_ - bufferOffset; + buffer = fetchBuffer_; + // Decoding should start from the beginning of the fetchBuffer_. + bufferOffset = 0; + } else { + // There is already enough data in the fetch buffer, so use that + buffer = fetchBuffer_; + bufferOffset = 0; + } + + // Check we have enough data to begin decoding + if (bufferedBytes_ == isa_.getMinInstructionSize()) { + //Check if those bytes points to a instruction with minimum size or more data is required. If more data is required return + // TODO: this is not generic solution, just trying to make it work + uint16_t rawBits; + memcpy(&rawBits, buffer + bufferOffset, 2); + if((rawBits & 0x3) == 0x3) { + //std::cout << std::hex << "Only 2 bytes left in fetch buffer and not compresses instr type, current PC: 0x" << pc_ << std::endl; + return; + } + } + + auto outputSlots = output_.getTailSlots(); + for (size_t slot = 0; slot < output_.getWidth(); slot++) { + auto& macroOp = outputSlots[slot]; + + auto bytesRead = + isa_.predecode(buffer + bufferOffset, bufferedBytes_, pc_, macroOp); + + // If predecode fails, bail and wait for more data + if (bytesRead == 0) { + assert(bufferedBytes_ < isa_.getMinInstructionSize() && + "unexpected predecode failure"); + break; + } + + // Create branch prediction after identifing instruction type + // (e.g. RET, BL, etc). + BranchPrediction prediction = {false, 0}; + if (macroOp[0]->isBranch()) { + prediction = branchPredictor_.predict(pc_, macroOp[0]->getBranchType(), + macroOp[0]->getKnownOffset(), + (uint8_t)bytesRead); + macroOp[0]->setBranchPrediction(prediction); + } + + // if (loopBufferState_ == LoopBufferState::FILLING) { + // // Record instruction fetch information in loop body + // uint32_t encoding; + // memcpy(&encoding, buffer + bufferOffset, sizeof(uint32_t)); + // loopBuffer_.push_back( + // {encoding, bytesRead, pc_, macroOp[0]->getBranchPrediction()}); + + // if (pc_ == loopBoundaryAddress_) { + // // loopBoundaryAddress_ has been fetched whilst filling the loop buffer. + // // Stop filling as loop body has been recorded and begin to supply + // // decode unit with instructions from the loop buffer + // loopBufferState_ = LoopBufferState::SUPPLYING; + // bufferedBytes_ = 0; + // break; + // } + // } else if (loopBufferState_ == LoopBufferState::WAITING && + // pc_ == loopBoundaryAddress_) { + // // Once set loopBoundaryAddress_ is fetched, start to fill loop buffer + // loopBufferState_ = LoopBufferState::FILLING; + // } + + assert(bytesRead <= bufferedBytes_ && + "Predecode consumed more bytes than were available"); + // Increment the offset, decrement available bytes + bufferOffset += bytesRead; + bufferedBytes_ -= bytesRead; + + if (!prediction.taken) { + // Predicted as not taken; increment PC to next instruction + pc_ += bytesRead; + } else { + // Predicted as taken; set PC to predicted target address + pc_ = prediction.target; + } +// std::cout << std::hex << "PC: 0x" << pc_ << ", PBL: 0x" << programByteLength_ << std::endl; + if (pc_ == 0 && (macroOp[0]->getBranchType() == BranchType::SubroutineCall)) { + waitSCEval_ = true; + break; + } + + if (pc_ >= programByteLength_) { + hasHalted_ = true; + break; + } + + if (prediction.taken) { + if (slot + 1 < output_.getWidth()) { + branchStalls_++; + } + // Can't continue fetch immediately after a branch + bufferedBytes_ = 0; + break; + } + + // Too few bytes remaining in buffer to continue + if (bufferedBytes_ == 0) { + break; + } + } + + if (bufferedBytes_ > 0) { + // Move start of fetched data to beginning of fetch buffer + std::memmove(fetchBuffer_, buffer + bufferOffset, bufferedBytes_); + } + + instructionMemory_.clearCompletedReads(); +} + +void FetchUnit::registerLoopBoundary(uint64_t branchAddress) { + // Set branch which forms the loop as the loopBoundaryAddress_ and place loop + // buffer in state to begin filling once the loopBoundaryAddress_ has been + // fetched + loopBufferState_ = LoopBufferState::WAITING; + loopBoundaryAddress_ = branchAddress; +} + +bool FetchUnit::hasHalted() const { return hasHalted_; } + +void FetchUnit::updatePC(uint64_t address) { + pc_ = address; + bufferedBytes_ = 0; + hasHalted_ = (pc_ >= programByteLength_); + waitSCEval_ = false; +} + +void FetchUnit::requestFromPC() { + // Do nothing if buffer already contains enough data + if (bufferedBytes_ >= isa_.getMaxInstructionSize()) return; + + // Do nothing if unit has halted to avoid invalid speculative memory reads + // beyond the programByteLength_ + if (hasHalted_ || waitSCEval_) return; + + uint64_t blockAddress; + if (bufferedBytes_ > 0) { + // There's already some data in the buffer, so fetch the next block + blockAddress = pc_ + bufferedBytes_; + assert((blockAddress & ~blockMask_) == 0 && "misaligned fetch buffer"); + } else { + // Fetch buffer is empty, so fetch from the PC + blockAddress = pc_ & blockMask_; + } + + instructionMemory_.requestRead({blockAddress, blockSize_}); +} + +uint64_t FetchUnit::getBranchStalls() const { return branchStalls_; } + +void FetchUnit::flushLoopBuffer() { + // loopBuffer_.clear(); + // loopBufferState_ = LoopBufferState::IDLE; + // loopBoundaryAddress_ = 0; +} + +void FetchUnit::flushPredictor(uint64_t address) { + branchPredictor_.flush(address); +} + +} // namespace pipeline_hi +} // namespace simeng diff --git a/src/lib/pipeline_hi/LoadStoreQueue.cc b/src/lib/pipeline_hi/LoadStoreQueue.cc new file mode 100644 index 0000000000..c0b752e8af --- /dev/null +++ b/src/lib/pipeline_hi/LoadStoreQueue.cc @@ -0,0 +1,315 @@ +#include "simeng/pipeline_hi/LoadStoreQueue.hh" + +#include +#include +#include +#include + +namespace simeng { +namespace pipeline_hi { + +/** Check whether requests `a` and `b` overlap. */ +bool requestsOverlap(MemoryAccessTarget a, MemoryAccessTarget b) { + // Check whether one region ends before the other begins, implying no overlap, + // and negate + return !(a.address + a.size <= b.address || b.address + b.size <= a.address); +} + +LoadStoreQueue::LoadStoreQueue( + unsigned int maxCombinedSpace, MemoryInterface& memory, + span>> completionSlots, + std::function, span)> forwardOperands, + bool exclusive, uint16_t loadBandwidth, uint16_t storeBandwidth, + uint16_t permittedRequests, uint16_t permittedLoads, + uint16_t permittedStores) + : completionSlots_(completionSlots), + forwardOperands_(forwardOperands), + maxCombinedSpace_(maxCombinedSpace), + combined_(true), + memory_(memory), + exclusive_(exclusive), + loadBandwidth_(loadBandwidth), + storeBandwidth_(storeBandwidth), + totalLimit_(permittedRequests), + // Set per-cycle limits for each request type + reqLimits_{permittedLoads, permittedStores} {}; + +LoadStoreQueue::LoadStoreQueue( + unsigned int maxLoadQueueSpace, unsigned int maxStoreQueueSpace, + MemoryInterface& memory, + span>> completionSlots, + std::function, span)> forwardOperands, + bool exclusive, uint16_t loadBandwidth, uint16_t storeBandwidth, + uint16_t permittedRequests, uint16_t permittedLoads, + uint16_t permittedStores) + : completionSlots_(completionSlots), + forwardOperands_(forwardOperands), + maxLoadQueueSpace_(maxLoadQueueSpace), + maxStoreQueueSpace_(maxStoreQueueSpace), + combined_(false), + memory_(memory), + exclusive_(exclusive), + loadBandwidth_(loadBandwidth), + storeBandwidth_(storeBandwidth), + totalLimit_(permittedRequests), + // Set per-cycle limits for each request type + reqLimits_{permittedLoads, permittedStores} {}; + +unsigned int LoadStoreQueue::getLoadQueueSpace() const { + if (combined_) { + return getCombinedSpace(); + } else { + return getLoadQueueSplitSpace(); + } +} +unsigned int LoadStoreQueue::getStoreQueueSpace() const { + if (combined_) { + return getCombinedSpace(); + } else { + return getStoreQueueSplitSpace(); + } +} +unsigned int LoadStoreQueue::getTotalSpace() const { + if (combined_) { + return getCombinedSpace(); + } else { + return getLoadQueueSplitSpace() + getStoreQueueSplitSpace(); + } +} + +unsigned int LoadStoreQueue::getLoadQueueSplitSpace() const { + return maxLoadQueueSpace_ - loadQueue_.size(); +} +unsigned int LoadStoreQueue::getStoreQueueSplitSpace() const { + return maxStoreQueueSpace_ - storeQueue_.size(); +} +unsigned int LoadStoreQueue::getCombinedSpace() const { + return maxCombinedSpace_ - loadQueue_.size() - storeQueue_.size(); +} + +bool isMisAligned(uint64_t addr, uint8_t sz) { + if(((addr & 0x1) && sz==2) || ((addr & 0x3) && sz==4)) { + return true; + } + return false; +} + +void LoadStoreQueue::addLoad(const std::shared_ptr& insn) { + + const auto& addresses = insn->getGeneratedAddresses(); + + assert(addresses.size()==1 && "Expecting only 1 address in load request"); + // Do something to split into multiple requests if alignment is required for case like crossing 4 byte boundary. + + loadQueue_.push_back(insn); + uint64_t add_tick = 1; + bool isMisAlign = false; + if (isMisAligned(addresses[0].address, addresses[0].size)) { + add_tick+=1; + isMisAlign=true; + } + requestQueue_.push_back({{}, {}, insn, LOAD, (tickCounter_+add_tick) + insn->getLSQLatency(), isMisAlign}); + // Submit request write to memory interface early as the architectural state + // considers the store to be retired and thus its operation complete + + for (size_t i = 0; i < addresses.size(); i++) { + //memory_.requestWrite(addresses[i], data[i]); + // Still add addresses to requestQueue_ to ensure contention of resources is + // correctly simulated + requestQueue_.back().reqAddresses.push(addresses[i]); + } + + //loadQueue_.push_back(insn); + //startLoad(insn); +} + +void LoadStoreQueue::addStore(const std::shared_ptr& insn) { + + const auto& addresses = insn->getGeneratedAddresses(); + span data = insn->getData(); + + assert(addresses.size()==1 && "Expecting only 1 address in store request"); + // Do something to split into multiple requests if alignment is required for case like crossing 4 byte boundary. + + storeQueue_.push_back({insn, data}); + + uint64_t add_tick = 1; + bool isMisAlign = false; + if (isMisAligned(addresses[0].address, addresses[0].size)) { + add_tick+=1; + isMisAlign = true; + } + + requestQueue_.push_back({{}, {}, insn, STORE, (tickCounter_+add_tick) + insn->getLSQLatency(), isMisAlign}); + // Submit request write to memory interface early as the architectural state + // considers the store to be retired and thus its operation complete + + for (size_t i = 0; i < addresses.size(); i++) { + //memory_.requestWrite(addresses[i], data[i]); + // Still add addresses to requestQueue_ to ensure contention of resources is + // correctly simulated + requestQueue_.back().reqAddresses.push(addresses[i]); + requestQueue_.back().data.push(data[i]); + } + //storeQueue_.push_back({insn, {}}); + //supplyStoreData(insn); + //commitStore(insn); +} + +void LoadStoreQueue::startLoad(const std::shared_ptr& insn) { + return; +} + +void LoadStoreQueue::supplyStoreData(const std::shared_ptr& insn) { + return; +} + +bool LoadStoreQueue::commitStore(const std::shared_ptr& uop) { + + if (storeQueue_.front().first == uop) { + storeQueue_.pop_front(); + } else { + assert(false && "The commited store is not the one in the front of the storeQueue_"); + } + return true; +} + +void LoadStoreQueue::commitLoad(const std::shared_ptr& uop) { + + if (loadQueue_.front() == uop) { + loadQueue_.pop_front(); + } else { + assert(false && "The commited store is not the one in the front of the loadQueue_"); + } + return; +} + +void LoadStoreQueue::purgeFlushed() { + + return; + +} + +bool LoadStoreQueue::isBusy() const { + // TODO: This is just to allow only 1 outstanding request to be used for SST integeration. + //if (activeMisAlignedOpr() || loadQueue_.size()>=1 || storeQueue_.size()>=1) { + if (activeMisAlignedOpr() || (loadQueue_.size()+storeQueue_.size())>=2) { + return true; + } + return false; +} + +void LoadStoreQueue::tick() { + tickCounter_++; + + //Request at the front of the queue should be sent to memory first + //Ensure its scheduled after necessary tick + if (requestQueue_.size() > 0) { + requestEntry1& oldestreq = requestQueue_.front(); + if (tickCounter_ >= oldestreq.reqtick) { + if(oldestreq.type == LOAD) { + memory_.requestRead(oldestreq.reqAddresses.front(), (uint64_t) busReqId); + oldestreq.reqAddresses.pop(); + if (oldestreq.reqAddresses.size() == 0) { // All requests sent + requestQueue_.pop_front(); + } + requestedLoads_.emplace(busReqId, oldestreq.insn); + numLoads++; + latencyLoads_.emplace(busReqId, tickCounter_); + busReqId++; + } else if(oldestreq.type == STORE) { + memory_.requestWrite(oldestreq.reqAddresses.front(), oldestreq.data.front()); + oldestreq.reqAddresses.pop(); + oldestreq.data.pop(); + if (oldestreq.reqAddresses.size() == 0) { // All requests sent + requestQueue_.pop_front(); + //Verify same instruction. and remove from the storeQueue_ as well + //storeQueue_.pop_front();//No need + } + } else { + assert(false && "Unknown request type to be scheduled to memory"); + } + } + } + + //processResponse(); +} + +void LoadStoreQueue::processResponse() { + // Process completed read requests + for (const auto& response : memory_.getCompletedReads()) { + const auto& address = response.target.address; + const auto& data = response.data; + + // TODO: Detect and handle non-fatal faults (e.g. page fault) + + // Find instruction that requested the memory read + const auto& itr = requestedLoads_.find(response.requestId); + if (itr == requestedLoads_.end()) { + continue; + } else { + requestedLoads_.erase(response.requestId); + uint32_t ldLatency = ((tickCounter_ + 1) - latencyLoads_.at(response.requestId)); + if (ldLatency > maxLdLatency) { + maxLdLatency = ldLatency; + } + if (ldLatency < minLdLatency) { + minLdLatency = ldLatency; + } + totalLdLatency += ldLatency; + //std::cout << std::dec << "Total Ld latency: " << totalLdLatency << ", numLoads: " << numLoads << std::endl; + latencyLoads_.erase(response.requestId); + } + // Supply data to the instruction and execute if it is ready + const auto& load = itr->second; + load->supplyData(address, data); + if (load->hasAllData()) { + // This load has completed + load->execute(); + /*if (load->isStoreData()) { + supplyStoreData(load); + }*/ + completedLoads_.push(load); + } + } + memory_.clearCompletedReads(); + + // Pop from the front of the completed loads queue and send to writeback + size_t count = 0; + while (completedLoads_.size() > 0 && count < completionSlots_.size()) { + const auto& insn = completedLoads_.front(); + + // Don't process load instruction if it has been flushed + if (insn->isFlushed()) { + completedLoads_.pop(); + continue; + } + + // Forward the results + // forwardOperands_(insn->getDestinationRegisters(), insn->getResults()); + + completionSlots_[count].getTailSlots()[0] = std::move(insn); + + completedLoads_.pop(); + + count++; + } +} + +std::shared_ptr LoadStoreQueue::getViolatingLoad() const { + return violatingLoad_; +} + +//Clean up is required! +bool LoadStoreQueue::activeMisAlignedOpr() const { + //if the front of the request queue has a misaligned request that is not yet being sent to the bus then its better to halt LSU taking new requests. + // if(storeQueue_.size() > 0 && activeMisAlignedStore) { + // return true; + // } + return (requestQueue_.size() > 0 && requestQueue_.front().isMisAligned && ((requestQueue_.front().reqtick-tickCounter_)==1)); +} + +bool LoadStoreQueue::isCombined() const { return combined_; } + +} // namespace pipeline_hi +} // namespace simeng diff --git a/src/lib/pipeline_hi/RegDepMap.cc b/src/lib/pipeline_hi/RegDepMap.cc new file mode 100644 index 0000000000..4ab004bfdb --- /dev/null +++ b/src/lib/pipeline_hi/RegDepMap.cc @@ -0,0 +1,143 @@ +#include "simeng/pipeline_hi/RegDepMap.hh" + +#include + +//#define RDMDEBUG +#ifdef RDMDEBUG +#define DEBUG(x) std::cout << "Core: " << std::hex << x << std::endl; +#else +#define DEBUG(x) do { } while (false); +#endif + +namespace simeng { +namespace pipeline_hi { + +const Register l_ZERO_REGISTER = {0, 0}; + +RegDepMap::RegDepMap(const std::vector registerFileStructures, + const RegisterFileSet& registerFileSet) : + registerFileStructures_(registerFileStructures), + registerFileSet_(registerFileSet) { + regMap_.resize(registerFileStructures_.size());//Just for Integer Register File for now + for (size_t type=0; typegetDestinationRegisters(); + for(const auto& reg: destinationRegisters) { + if(reg != l_ZERO_REGISTER) { //Not X0 + outstandingDep_++; + DEBUG("Adding Depencency: addr, 0x" << instr->getInstructionAddress() << std::dec << ", dest: " << reg << ", outstanding: " << outstandingDep_); + regMap_[reg.type][reg.tag].push_back(instr); + } + } +} + +void RegDepMap::remove(InstrPtr instr) +{ + auto& destinationRegisters = instr->getDestinationRegisters(); + for(const auto& reg: destinationRegisters) { + auto it = regMap_[reg.type][reg.tag].begin(); + while (it != regMap_[reg.type][reg.tag].end()) { + if(*it == instr) { + outstandingDep_--; + DEBUG("Removing Depencency: addr, 0x" << instr->getInstructionAddress() << std::dec << ", dest: " << reg << ", outstanding: " << outstandingDep_); + it = regMap_[reg.type][reg.tag].erase(it); + break; + } else { + it++; + } + } + } +} + +bool RegDepMap::canRead(InstrPtr instr) +{ + bool dependency = false; + auto& sourceRegisters = instr->getOperandRegisters(); + for (uint16_t i = 0; i < sourceRegisters.size(); i++) { + const auto& srcReg = sourceRegisters[i]; + + if (!instr->isOperandReady(i)) { + // The operand hasn't already been supplied + if (regMap_[srcReg.type][srcReg.tag].size() == 0) {//pick up value from register file + instr->supplyOperand(i, registerFileSet_.get(srcReg)); + } else if (regMap_[srcReg.type][srcReg.tag].back()->hasExecuted() && + !(regMap_[srcReg.type][srcReg.tag].back()->isMul() || regMap_[srcReg.type][srcReg.tag].back()->isDiv() || + (regMap_[srcReg.type][srcReg.tag].back()->isLoad() && !instr->isStoreData()))) {//pick up value from last executed instruction + const auto& destRegisters = regMap_[srcReg.type][srcReg.tag].back()->getDestinationRegisters(); + const auto& destValues = regMap_[srcReg.type][srcReg.tag].back()->getResults(); + for (size_t j = 0; j < destRegisters.size(); j++) { + const auto& destReg = destRegisters[j]; + if (destReg == srcReg) { + instr->supplyOperand(i, destValues[j]); + break; + } + } + } else { + dependency = true; + } + } + } + + return !dependency; +} + +bool RegDepMap::canWrite(InstrPtr instr) +{ + bool dependency = false; + auto& destRegisters = instr->getDestinationRegisters(); + for(uint16_t i = 0; i < destRegisters.size(); i++) { + const auto& destReg = destRegisters[i]; + if (regMap_[destReg.type][destReg.tag].size() > 0 && + !regMap_[destReg.type][destReg.tag].back()->hasExecuted()) { + dependency = true; + break; + } + } + return !dependency || (instr->isLoad()); +} + +//Clean up the options logic to ensure all of them work well together +bool RegDepMap::canForward(InstrPtr instr) +{ + return true; +} + +void RegDepMap::purgeFlushed() { + for (auto& registerType : regMap_) { + for (auto& dependencyList : registerType) { + auto it = dependencyList.begin(); + while (it != dependencyList.end()) { + DEBUG("Purge entry present at addr: 0x" << (*it)->getInstructionAddress()); + if ((*it)->isFlushed()) { + outstandingDep_--; + it = dependencyList.erase(it); + } else { + it++; + } + } + } + } +} + +void RegDepMap::dump() +{ +} + +} // namespace pipeline_hi +} // namespace simeng diff --git a/src/lib/pipeline_hi/RegisterAliasTable.cc b/src/lib/pipeline_hi/RegisterAliasTable.cc new file mode 100644 index 0000000000..0c813a6f7c --- /dev/null +++ b/src/lib/pipeline_hi/RegisterAliasTable.cc @@ -0,0 +1,110 @@ +#include "simeng/pipeline_hi/RegisterAliasTable.hh" + +#include + +namespace simeng { +namespace pipeline_hi { + +RegisterAliasTable::RegisterAliasTable( + std::vector architecturalStructure, + std::vector physicalRegisterCounts) + : mappingTable_(architecturalStructure.size()), + historyTable_(architecturalStructure.size()), + destinationTable_(architecturalStructure.size()), + freeQueues_(architecturalStructure.size()) { + assert(architecturalStructure.size() == physicalRegisterCounts.size() && + "The number of physical register types does not match the number of " + "architectural register types"); + + for (size_t type = 0; type < architecturalStructure.size(); type++) { + auto archCount = architecturalStructure[type].quantity; + auto physCount = physicalRegisterCounts[type]; + assert(archCount <= physCount && + "Cannot have fewer physical registers than architectural registers"); + + // Set up the initial mapping table state for this register type + mappingTable_[type].resize(archCount); + + for (size_t tag = 0; tag < archCount; tag++) { + // Pre-assign a physical register to each architectural register + mappingTable_[type][tag] = tag; + } + + // Add remaining physical registers to free queue + for (size_t tag = archCount; tag < physCount; tag++) { + freeQueues_[type].push(tag); + } + + // Set up history/destination tables + historyTable_[type].resize(physCount); + destinationTable_[type].resize(physCount); + } +}; + +Register RegisterAliasTable::getMapping(Register architectural) const { + // Asserts to ensure mapping isn't attempted for an out-of-bound index (i.e. + // mapping of WZR / XZR) + assert(architectural.type < mappingTable_.size() && + "Invalid register type. Cannot find RAT mapping."); + assert(architectural.type >= 0 && + "Invalid register type. Cannot find RAT mapping."); + + auto tag = mappingTable_[architectural.type][architectural.tag]; + return {architectural.type, tag}; +} + +bool RegisterAliasTable::canAllocate(uint8_t type, + unsigned int quantity) const { + return (freeQueues_[type].size() >= quantity); +} + +bool RegisterAliasTable::canRename(uint8_t type) const { + // Renaming possible iff there are more physical than architectural registers + return destinationTable_[type].size() > mappingTable_[type].size(); +} + +unsigned int RegisterAliasTable::freeRegistersAvailable(uint8_t type) const { + return freeQueues_[type].size(); +} + +Register RegisterAliasTable::allocate(Register architectural) { + std::queue& freeQueue = freeQueues_[architectural.type]; + assert(freeQueue.size() > 0 && + "Attempted to allocate free register when none were available"); + + auto tag = freeQueue.front(); + freeQueue.pop(); + + // Keep the old physical register in the history table + historyTable_[architectural.type][tag] = + mappingTable_[architectural.type][architectural.tag]; + + // Update the mapping table with the new tag, and mark the architectural + // register it replaces in the destination table + mappingTable_[architectural.type][architectural.tag] = tag; + destinationTable_[architectural.type][tag] = architectural.tag; + + return {architectural.type, tag}; +} + +void RegisterAliasTable::commit(Register physical) { + // Find the register previously mapped to the same architectural register and + // free it + auto oldTag = historyTable_[physical.type][physical.tag]; + freeQueues_[physical.type].push(oldTag); +} +void RegisterAliasTable::rewind(Register physical) { + // Find which architectural tag this referred to + auto destinationTag = destinationTable_[physical.type][physical.tag]; + // Rewind the mapping table to the old physical tag + mappingTable_[physical.type][destinationTag] = + historyTable_[physical.type][physical.tag]; + // Add the rewound physical tag back to the free queue + freeQueues_[physical.type].push(physical.tag); +} +void RegisterAliasTable::free(Register physical) { + freeQueues_[physical.type].push(physical.tag); +} + +} // namespace pipeline_hi +} // namespace simeng diff --git a/src/lib/pipeline_hi/ReorderBuffer.cc b/src/lib/pipeline_hi/ReorderBuffer.cc new file mode 100644 index 0000000000..c653ffd5ea --- /dev/null +++ b/src/lib/pipeline_hi/ReorderBuffer.cc @@ -0,0 +1,206 @@ +#include "simeng/pipeline_hi/ReorderBuffer.hh" + +#include +#include +#include + +namespace simeng { +namespace pipeline_hi { + +ReorderBuffer::ReorderBuffer( + unsigned int maxSize, RegisterAliasTable& rat, LoadStoreQueue& lsq, + std::function&)> raiseException, + std::function sendLoopBoundary, + BranchPredictor& predictor, uint16_t loopBufSize, + uint16_t loopDetectionThreshold) + : rat_(rat), + lsq_(lsq), + maxSize_(maxSize), + raiseException_(raiseException), + sendLoopBoundary_(sendLoopBoundary), + predictor_(predictor), + loopBufSize_(loopBufSize), + loopDetectionThreshold_(loopDetectionThreshold) {} + +void ReorderBuffer::reserve(const std::shared_ptr& insn) { + assert(buffer_.size() < maxSize_ && + "Attempted to reserve entry in reorder buffer when already full"); + insn->setSequenceId(seqId_); + seqId_++; + insn->setInstructionId(insnId_); + if (insn->isLastMicroOp()) insnId_++; + + buffer_.push_back(insn); +} + +void ReorderBuffer::commitMicroOps(uint64_t insnId) { + if (buffer_.size()) { + size_t index = 0; + int firstOp = -1; + bool validForCommit = false; + + // Find first instance of uop belonging to macro-op instruction + for (; index < buffer_.size(); index++) { + if (buffer_[index]->getInstructionId() == insnId) { + firstOp = index; + break; + } + } + + if (firstOp > -1) { + // If found, see if all uops are committable + for (; index < buffer_.size(); index++) { + if (buffer_[index]->getInstructionId() != insnId) break; + if (!buffer_[index]->isWaitingCommit()) { + return; + } else if (buffer_[index]->isLastMicroOp()) { + // all microOps must be in ROB for the commit to be valid + validForCommit = true; + } + } + if (!validForCommit) return; + + // No early return thus all uops are committable + for (; firstOp < buffer_.size(); firstOp++) { + if (buffer_[firstOp]->getInstructionId() != insnId) break; + buffer_[firstOp]->setCommitReady(); + } + } + } + return; +} + +unsigned int ReorderBuffer::commit(unsigned int maxCommitSize) { + shouldFlush_ = false; + size_t maxCommits = + std::min(static_cast(maxCommitSize), buffer_.size()); + + unsigned int n; + for (n = 0; n < maxCommits; n++) { + auto& uop = buffer_[0]; + if (!uop->canCommit()) { + break; + } + + if (uop->isLastMicroOp()) instructionsCommitted_++; + + if (uop->exceptionEncountered()) { + raiseException_(uop); + buffer_.pop_front(); + return n + 1; + } + + const auto& destinations = uop->getDestinationRegisters(); + for (int i = 0; i < destinations.size(); i++) { + rat_.commit(destinations[i]); + } + + // If it's a memory op, commit the entry at the head of the respective queue + if (uop->isLoad()) { + lsq_.commitLoad(uop); + } + if (uop->isStoreAddress()) { + bool violationFound = lsq_.commitStore(uop); + if (violationFound) { + loadViolations_++; + // Memory order violation found; aborting commits and flushing + auto load = lsq_.getViolatingLoad(); + shouldFlush_ = true; + flushAfter_ = load->getInstructionId() - 1; + pc_ = load->getInstructionAddress(); + + buffer_.pop_front(); + return n + 1; + } + } + + // Increment or swap out branch counter for loop detection + if (uop->isBranch() && !loopDetected_) { + bool increment = true; + if (branchCounter_.first.address != uop->getInstructionAddress()) { + // Mismatch on instruction address, reset + increment = false; + } else if (branchCounter_.first.outcome != uop->getBranchPrediction()) { + // Mismatch on branch outcome, reset + increment = false; + } else if ((instructionsCommitted_ - branchCounter_.first.commitNumber) > + loopBufSize_) { + // Loop too big to fit in loop buffer, reset + increment = false; + } + + if (increment) { + // Reset commitNumber value + branchCounter_.first.commitNumber = instructionsCommitted_; + // Increment counter + branchCounter_.second++; + + if (branchCounter_.second > loopDetectionThreshold_) { + // If the same branch with the same outcome is sequentially retired + // more times than the loopDetectionThreshold_ value, identify as a + // loop boundary + loopDetected_ = true; + sendLoopBoundary_(uop->getInstructionAddress()); + } + } else { + // Swap out latest branch + branchCounter_ = {{uop->getInstructionAddress(), + uop->getBranchPrediction(), instructionsCommitted_}, + 0}; + } + } + buffer_.pop_front(); + } + + return n; +} + +void ReorderBuffer::flush(uint64_t afterSeqId) { + // Iterate backwards from the tail of the queue to find and remove ops newer + // than `afterSeqId` + while (!buffer_.empty()) { + auto& uop = buffer_.back(); + if (uop->getInstructionId() <= afterSeqId) { + break; + } + + // To rewind destination registers in correct history order, rewinding of + // register renaming is done backwards + auto destinations = uop->getDestinationRegisters(); + for (int i = destinations.size() - 1; i >= 0; i--) { + const auto& reg = destinations[i]; + rat_.rewind(reg); + } + uop->setFlushed(); + // If the instruction is a branch, supply address to branch flushing logic + if (uop->isBranch()) { + predictor_.flush(uop->getInstructionAddress()); + } + buffer_.pop_back(); + } + + // Reset branch counter and loop detection + branchCounter_ = {{0, {false, 0}, 0}, 0}; + loopDetected_ = false; +} + +unsigned int ReorderBuffer::size() const { return buffer_.size(); } + +unsigned int ReorderBuffer::getFreeSpace() const { + return maxSize_ - buffer_.size(); +} + +bool ReorderBuffer::shouldFlush() const { return shouldFlush_; } +uint64_t ReorderBuffer::getFlushAddress() const { return pc_; } +uint64_t ReorderBuffer::getFlushSeqId() const { return flushAfter_; } + +uint64_t ReorderBuffer::getInstructionsCommittedCount() const { + return instructionsCommitted_; +} + +uint64_t ReorderBuffer::getViolatingLoadsCount() const { + return loadViolations_; +} + +} // namespace pipeline_hi +} // namespace simeng diff --git a/src/lib/pipeline_hi/StaticPredictor.cc b/src/lib/pipeline_hi/StaticPredictor.cc new file mode 100644 index 0000000000..6619989942 --- /dev/null +++ b/src/lib/pipeline_hi/StaticPredictor.cc @@ -0,0 +1,120 @@ +#include "simeng/pipeline_hi/StaticPredictor.hh" + +#include + +namespace simeng { +namespace pipeline_hi { +//TODO: temp for get rid of yaml, delete it later +StaticPredictor::StaticPredictor(uint8_t sType) + : staticType_(sType) {} + +StaticPredictor::StaticPredictor(YAML::Node config) + : staticType_(config["Branch-Predictor"]["Static-Type"].as()), + rasSize_(config["Branch-Predictor"]["RAS-entries"].as()){} + +StaticPredictor::~StaticPredictor() { + ras_.clear(); + rasHistory_.clear(); +} + +BranchPrediction StaticPredictor::predict(uint64_t address, BranchType type, + uint64_t knownOffset, + uint8_t byteLength) { + int64_t offset = knownOffset; + uint64_t predict_target = (knownOffset) ? knownOffset + address : 0; + BranchPrediction prediction = {false, 0}; + + assert(byteLength > 1 && "byteLength <= 1"); + + if (type == BranchType::Unconditional) { + prediction = { true, predict_target}; + } else if (type == BranchType::Return) { + if (ras_.size() > 0) { + predict_target = ras_.back(); + // Record top of RAS used for target prediction + rasHistory_[address] = ras_.back(); + ras_.pop_back(); + } + prediction = {true, predict_target}; + } else if (type == BranchType::SubroutineCall) { //JAL and JALR + if (ras_.size() >= rasSize_) { + ras_.pop_front(); + } + ras_.push_back(address + byteLength); + // Record that this address is a branch-and-link instruction + rasHistory_[address] = 0; + prediction = {true, predict_target}; + } else if (type == BranchType::Conditional) { + switch (staticType_) { + case 0: //always-taken + prediction = {true, predict_target}; + break; + + case 1: //always-not-taken; + prediction = {false, 0}; + break; + + case 2: //Backward Taken, Forward Not Taken + { + if (offset >= 0) { + //not taken + prediction = {false, address+byteLength}; + } else { + prediction = {true, predict_target}; + } + break; + } + + case 3: //Forward Taken, Backward Not Taken + { + if (offset <= 0) { + //not taken + prediction = {false, address+byteLength}; + } else { + prediction = {true, predict_target}; + } + break; + } + + default: + assert(staticType_ < 4 && "Non-supported type for static predictor"); + break; + } + } + + return prediction; +} + +void StaticPredictor::update(uint64_t address, bool taken, + uint64_t targetAddress, BranchType type) {} + +void StaticPredictor::flush(uint64_t address) { + // If address interacted with RAS, rewind entry + auto it = rasHistory_.find(address); + if (it != rasHistory_.end()) { + uint64_t target = it->second; + if (target != 0) { + // If history entry belongs to a return instruction, push target back onto + // stack + if (ras_.size() >= rasSize_) { + ras_.pop_front(); + } + ras_.push_back(target); + } else { + // If history entry belongs to a branch-and-link instruction, pop target + // off of stack + if (ras_.size()) { + ras_.pop_back(); + } + } + rasHistory_.erase(it); + } +} +BranchPrediction StaticPredictor::predict(uint64_t address, BranchType type, + uint64_t knownTarget) { + printf("StaticPredictor::predict(), This is overloaded and deprecated! \n"); + return predict(address, type, knownTarget, 4); +} + +} // namespace pipeline_hi +} // namespace simeng diff --git a/src/lib/pipeline_hi/WritebackUnit.cc b/src/lib/pipeline_hi/WritebackUnit.cc new file mode 100644 index 0000000000..b0dfd97161 --- /dev/null +++ b/src/lib/pipeline_hi/WritebackUnit.cc @@ -0,0 +1,74 @@ +#include "simeng/pipeline_hi/WritebackUnit.hh" + +#include + +namespace simeng { +namespace pipeline_hi { + +WritebackUnit::WritebackUnit( + std::vector>>& completionSlots, + RegisterFileSet& registerFileSet, + std::function flagMicroOpCommits, + std::function&)> removeDep, + std::function&)> removeInstrOrderQ) + : completionSlots_(completionSlots), + registerFileSet_(registerFileSet), + flagMicroOpCommits_(flagMicroOpCommits), + removeDep_(removeDep), + removeInstrOrderQ_(removeInstrOrderQ) {} + +void WritebackUnit::tick() { + for (size_t slot = 0; slot < completionSlots_.size(); slot++) { + auto& uop = completionSlots_[slot].getHeadSlots()[0]; + + if (uop == nullptr) { + continue; + } + + auto& results = uop->getResults(); + auto& destinations = uop->getDestinationRegisters(); + for (size_t i = 0; i < results.size(); i++) { + // Write results to register file + registerFileSet_.set(destinations[i], results[i]); + } + if (uop->isMicroOp()) { + uop->setWaitingCommit(); + flagMicroOpCommits_(uop->getInstructionId()); + if (uop->isLastMicroOp()) { + instructionsWritten_++; + committedInstsForTrace_.push_back(uop); + } + } else { + uop->setCommitReady(); + removeDep_(uop); + instructionsWritten_++; + committedInstsForTrace_.push_back(uop); + } + + completionSlots_[slot].getHeadSlots()[0] = nullptr; + } +} + +uint64_t WritebackUnit::getInstructionsWrittenCount() const { + return instructionsWritten_; +} + +std::vector> WritebackUnit::getInstsForTrace() { + std::shared_ptr instr; + std::deque>::iterator it = committedInstsForTrace_.begin(); + while(it != committedInstsForTrace_.end()) { + instr = *it; + if (removeInstrOrderQ_(instr)) { + committedInstsForTrace_.erase(it); + return {instr}; + } + it++; + } + return {}; //committedInstsForTrace_; +} +void WritebackUnit::traceFinished() { + //committedInstsForTrace_.clear(); +} + +} // namespace pipeline_hi +} // namespace simeng diff --git a/src/tools/simeng/main.cc b/src/tools/simeng/main.cc index fa9b58baa1..f5cfa53513 100644 --- a/src/tools/simeng/main.cc +++ b/src/tools/simeng/main.cc @@ -10,7 +10,7 @@ #include "simeng/version.hh" /** Tick the provided core model until it halts. */ -int simulate(simeng::Core& core, simeng::MemoryInterface& dataMemory, +uint64_t simulate(simeng::Core& core, simeng::MemoryInterface& dataMemory, simeng::MemoryInterface& instructionMemory) { uint64_t iterations = 0; @@ -91,7 +91,7 @@ int main(int argc, char** argv) { // Run simulation std::cout << "[SimEng] Starting...\n" << std::endl; - int iterations = 0; + uint64_t iterations = 0; auto startTime = std::chrono::high_resolution_clock::now(); iterations = simulate(*core, *dataMemory, *instructionMemory); diff --git a/sst/SimEngCoreWrapper.cc b/sst/SimEngCoreWrapper.cc index 45c1bdde70..668439f52e 100644 --- a/sst/SimEngCoreWrapper.cc +++ b/sst/SimEngCoreWrapper.cc @@ -10,9 +10,85 @@ #include "Assemble.hh" +#include + using namespace SST::SSTSimEng; using namespace SST::Interfaces; +//For now just make sure that the code and data is loaded into memory +// at the correct addresses instead of sending the entire process image +void SimEngCoreWrapper::processMemoryImage() { + std::ifstream file(executablePath_, std::ios::binary); + if (!file.is_open()) { + return; + } + + char elfMagic[4] = {0x7f, 'E', 'L', 'F'}; + char fileMagic[4]; + file.read(fileMagic, 4); + if (std::memcmp(elfMagic, fileMagic, sizeof(elfMagic))) { + return; + } + + /** + * The fifth byte of the ELF Header identifies the architecture + * of the ELF binary i.e 32-bit or 64-bit. + */ + + // Check whether this is a 32-bit executable + char bitFormat; + file.read(&bitFormat, sizeof(bitFormat)); + if (bitFormat != ElfBitFormat::Format32) { + return; + } + struct Elf32Header { + uint32_t type; + uint32_t offset; + uint32_t virtualAddress; + uint32_t physicalAddress; + uint32_t fileSize; + uint32_t memorySize; + }; + uint32_t entryPoint32_; + std::vector headers32_; + + file.seekg(0x18); + file.read(reinterpret_cast(&entryPoint32_), sizeof(entryPoint32_)); + uint32_t headerOffset; + file.read(reinterpret_cast(&headerOffset), sizeof(headerOffset)); + file.seekg(0x2a); + uint16_t headerEntrySize; + file.read(reinterpret_cast(&headerEntrySize), sizeof(headerEntrySize)); + uint16_t headerEntries; + file.read(reinterpret_cast(&headerEntries), sizeof(headerEntries)); + headers32_.resize(headerEntries); + // Loop over all headers and extract them. + for (size_t i = 0; i < headerEntries; i++) { + file.seekg(headerOffset + (i * headerEntrySize)); + auto& header = headers32_[i]; + + const int fieldBytes = 4; + file.read(reinterpret_cast(&(header.type)), sizeof(header.type)); + file.read(reinterpret_cast(&(header.offset)), fieldBytes); + file.read(reinterpret_cast(&(header.virtualAddress)), fieldBytes); + file.read(reinterpret_cast(&(header.physicalAddress)), fieldBytes); + file.read(reinterpret_cast(&(header.fileSize)), fieldBytes); + file.read(reinterpret_cast(&(header.memorySize)), fieldBytes); + } + // Process headers; only observe LOAD sections for this basic implementation + for (const auto& header : headers32_) { + if (header.type == 1) { // LOAD + char* imagePointer; + imagePointer = (char*)calloc(header.memorySize, sizeof(char)); + file.seekg(header.offset); + file.read(imagePointer, header.fileSize); + dataMemory_->sendProcessImageToSST(imagePointer, header.memorySize, header.virtualAddress); + } + } + std::cout << "[SSTSimEng:SimEngCoreWrapper] Done exporting elf data into SST memory" << std::endl; + //assert(false && "Incomplete implementation"); +} + SimEngCoreWrapper::SimEngCoreWrapper(SST::ComponentId_t id, SST::Params& params) : SST::Component(id) { output_.init("[SSTSimEng:SimEngCoreWrapper] " + getName() + ":@p:@l ", 999, 0, @@ -95,7 +171,7 @@ void SimEngCoreWrapper::finish() { std::cout << "[SimEng] " << key << ": " << value << "\n"; } - std::cout << "\n[SimEng] Finished " << iterations_ << " ticks in " << duration + std::cout << "\n[SimEng] Finished " << std::dec << iterations_ << " ticks in " << duration << "ms (" << std::round(khz) << " kHz, " << std::setprecision(2) << mips << " MIPS)" << std::endl; } @@ -284,13 +360,13 @@ void SimEngCoreWrapper::fabricateSimEngCore() { : std::make_unique( a64fxConfigPath_, executablePath_, executableArgs_); } - if (coreInstance_->getSimulationMode() != + /*if (coreInstance_->getSimulationMode() != simeng::SimulationMode::OutOfOrder) { output_.verbose(CALL_INFO, 1, 0, "SimEng currently only supports Out-of-Order " "archetypes with SST."); std::exit(EXIT_FAILURE); - } + }*/ // Set the SST data memory SimEng should use coreInstance_->setL1DataMemory(dataMemory_); @@ -303,7 +379,7 @@ void SimEngCoreWrapper::fabricateSimEngCore() { // This check ensures that SST has enough memory to store the entire // processImage constructed by SimEng. - if (maxAddrMemory_ < coreInstance_->getProcessImageSize()) { + /*if (maxAddrMemory_ < coreInstance_->getProcessImageSize()) { output_.verbose( CALL_INFO, 1, 0, "Error: SST backend memory is less than processImage size. " @@ -312,7 +388,7 @@ void SimEngCoreWrapper::fabricateSimEngCore() { "\'addr_range_end\'. \n"); primaryComponentOKToEndSim(); std::exit(EXIT_FAILURE); - } + }*/ // If testing is enabled populate heap if heap values have been specified. #ifdef SIMENG_ENABLE_SST_TESTS if (heapStr_ != "") { @@ -320,8 +396,10 @@ void SimEngCoreWrapper::fabricateSimEngCore() { } #endif // Send the process image data over to the SST memory - dataMemory_->sendProcessImageToSST(coreInstance_->getProcessImage().get(), - coreInstance_->getProcessImageSize()); + //dataMemory_->sendProcessImageToSST(coreInstance_->getProcessImage().get(), + // coreInstance_->getProcessImageSize()); + + processMemoryImage(); output_.verbose(CALL_INFO, 1, 0, "SimEng core setup successfully.\n"); // Print out build metadata @@ -356,4 +434,4 @@ std::vector SimEngCoreWrapper::splitHeapStr() { } out.push_back(static_cast(std::stoull(acc))); return out; -} \ No newline at end of file +} diff --git a/sst/SimEngMemInterface.cc b/sst/SimEngMemInterface.cc index 4e07801f21..678d985329 100644 --- a/sst/SimEngMemInterface.cc +++ b/sst/SimEngMemInterface.cc @@ -18,7 +18,7 @@ SimEngMemInterface::SimEngMemInterface(StandardMem* mem, uint64_t cl, this->debug_ = debug; }; -void SimEngMemInterface::sendProcessImageToSST(char* image, uint64_t size) { +void SimEngMemInterface::sendProcessImageToSST(char* image, uint64_t size, uint64_t startAddr) { std::vector data; data.reserve(size); @@ -26,7 +26,8 @@ void SimEngMemInterface::sendProcessImageToSST(char* image, uint64_t size) { data.push_back((uint8_t)image[i]); } - StandardMem::Request* req = new StandardMem::Write(0, data.size(), data); + StandardMem::Request* req = new StandardMem::Write(startAddr, data.size(), data); + std::cout << std::hex << "[SSTSimEng:SimEngMemInterface] Sending image section to SST Memory at address 0x" << startAddr << ", size 0x" << data.size() << std::endl; sstMem_->sendUntimedData(req); return; }; @@ -176,7 +177,7 @@ void SimEngMemInterface::requestRead(const MemoryAccessTarget& target, if (debug_) { std::cout << "[SSTSimEng:SSTDebug] MemRead" << "-read-request-" << requestId << "-cycle-" << tickCounter_ - << "-split-" << requests.size() << std::endl; + << "-split-" << requests.size() << "-addr-0x" << std::hex << addrStart << std::endl; } for (StandardMem::Request* req : requests) { sstMem_->send(req); @@ -192,7 +193,11 @@ void SimEngMemInterface::requestWrite(const MemoryAccessTarget& target, AggregateWriteRequest* aggrReq = new AggregateWriteRequest(target, data); std::vector requests = makeSSTRequests(aggrReq, addrStart, addrEnd, size); - + if (debug_) { + std::cout << "[SSTSimEng:SSTDebug] MemWrite" + << "-write-request-xx" << "-cycle-" << tickCounter_ + << "-split-" << requests.size() << "-addr-0x" << std::hex << addrStart << std::endl; + } for (StandardMem::Request* req : requests) { sstMem_->send(req); } diff --git a/sst/config/mcu_int_example_config.py b/sst/config/mcu_int_example_config.py new file mode 100644 index 0000000000..fdd3b9682b --- /dev/null +++ b/sst/config/mcu_int_example_config.py @@ -0,0 +1,74 @@ +import sst +import os + +DEBUG_L1 = 1 +DEBUG_MEM = 1 +DEBUG_LEVEL = 1 + +clw = "32" + +# Assume this is run from SimEng root dir +simeng_path = os.getcwd() +binary_file = simeng_path + "/share/dhrystone_rv32imc/memory.elf" # Apply the appropriate binary +config_file = simeng_path + "/configs/DEMO_RISCV32_mcu_sst.yaml" + +# Define the simulation components +cpu = sst.Component("core", "sstsimeng.simengcore") +cpu.addParams({ + "simeng_config_path": config_file, + "executable_path": binary_file, + "executable_args": "", + "clock" : "1GHz", + "max_addr_memory": 4*1024*1024*1024-1, + "cache_line_width": clw, + "source": "", + "assemble_with_source": False, + "heap": "", + "debug": False +}) + +iface = cpu.setSubComponent("memory", "memHierarchy.standardInterface") + +l1cache = sst.Component("l1cache.mesi", "memHierarchy.Cache") +l1cache.addParams({ + "access_latency_cycles" : "1", + "cache_frequency" : "1Ghz", + "replacement_policy" : "nmru", + "coherence_protocol" : "MESI", + "associativity" : "4", + "cache_line_size" : clw, + "debug" : DEBUG_L1, + "debug_level" : DEBUG_LEVEL, + "L1" : "1", + "cache_size" : "32KiB" +}) + +# Explicitly set the link subcomponents instead of having cache figure them out based on connected port names +l1toC = l1cache.setSubComponent("cpulink", "memHierarchy.MemLink") +l1toM = l1cache.setSubComponent("memlink", "memHierarchy.MemLink") + +# Memory controller +memctrl = sst.Component("memory", "memHierarchy.MemController") +memctrl.addParams({ + "clock" : "1GHz", + "request_width" : clw, + "debug" : DEBUG_MEM, + "debug_level" : DEBUG_LEVEL, + "addr_range_end" : 4*1024*1024*1024-1, +}) +Mtol1 = memctrl.setSubComponent("cpulink", "memHierarchy.MemLink") + +# Memory model +memory = memctrl.setSubComponent("backend", "memHierarchy.simpleMem") +memory.addParams({ + "access_time" : "10ns", + "mem_size" : "4GiB", + "request_width": clw +}) + +# Define the simulation links +link_cpu_cache_link = sst.Link("link_cpu_cache_link") +link_cpu_cache_link.connect( (iface, "port", "0ps"), (l1toC, "port", "0ps") ) +link_mem_bus_link = sst.Link("link_mem_bus_link") +link_mem_bus_link.connect( (l1toM, "port", "0ps"), (Mtol1, "port", "0ps") ) + diff --git a/sst/include/SimEngCoreWrapper.hh b/sst/include/SimEngCoreWrapper.hh index cb53c0f50a..fc841949de 100644 --- a/sst/include/SimEngCoreWrapper.hh +++ b/sst/include/SimEngCoreWrapper.hh @@ -141,6 +141,8 @@ class SimEngCoreWrapper : public SST::Component { /** Method used to assemble SimEng core. */ void fabricateSimEngCore(); + void processMemoryImage(); + /** Method to split the passed executable argument's string into a vector of * individual arguments. */ std::vector splitArgs(std::string argString); @@ -210,7 +212,7 @@ class SimEngCoreWrapper : public SST::Component { std::shared_ptr dataMemory_; /** Number of clock iterations. */ - int iterations_; + uint64_t iterations_; /** Start time of simulation. */ std::chrono::high_resolution_clock::time_point startTime_; diff --git a/sst/include/SimEngMemInterface.hh b/sst/include/SimEngMemInterface.hh index 79789a9f39..463d0dc9d5 100644 --- a/sst/include/SimEngMemInterface.hh +++ b/sst/include/SimEngMemInterface.hh @@ -33,7 +33,7 @@ class SimEngMemInterface : public MemoryInterface { bool debug); /** Send SimEng's processImage to SST memory backend during `init` lifecycle * phase of SST. */ - void sendProcessImageToSST(char* image, uint64_t size); + void sendProcessImageToSST(char* image, uint64_t size, uint64_t startAddr=0); /** * Construct an AggregatedReadRequest and use it to generate From 70fdf36f6a1048fb4681892a1f1334d4c8f579c5 Mon Sep 17 00:00:00 2001 From: dANW34V3R Date: Wed, 6 Sep 2023 11:51:20 +0100 Subject: [PATCH 5/5] Clang format --- src/include/simeng/BranchPredictor.hh | 4 +- src/include/simeng/Elf.hh | 117 +++--- src/include/simeng/Instruction.hh | 8 +- src/include/simeng/RegisterValue.hh | 19 +- src/include/simeng/arch/Architecture.hh | 16 +- .../simeng/arch/aarch64/Architecture.hh | 2 +- src/include/simeng/arch/riscv/Architecture.hh | 26 +- .../simeng/arch/riscv/ExceptionHandler.hh | 2 +- src/include/simeng/arch/riscv/Instruction.hh | 17 +- .../simeng/arch/riscv/SystemRegister.hh | 348 +++++++++--------- src/include/simeng/kernel/LinuxProcess.hh | 2 +- src/include/simeng/models/emulation/Core.hh | 5 +- .../simeng/pipeline/PipelineBuffer1.hh | 19 +- .../simeng/pipeline_hi/LoadStoreQueue.hh | 12 +- .../simeng/pipeline_hi/StaticPredictor.hh | 3 +- src/lib/CoreInstance.cc | 10 +- src/lib/Elf.cc | 47 +-- src/lib/ModelConfig.cc | 10 +- src/lib/arch/aarch64/Architecture.cc | 10 +- src/lib/arch/riscv/Architecture.cc | 130 ++++--- src/lib/arch/riscv/InstructionMetadata.cc | 33 +- src/lib/arch/riscv/InstructionMetadata.hh | 6 +- src/lib/kernel/Linux.cc | 22 +- src/lib/models/emulation/Core.cc | 23 +- src/lib/pipeline_hi/FetchUnit.cc | 19 +- src/lib/pipeline_hi/RegDepMap.cc | 100 ++--- src/lib/pipeline_hi/WritebackUnit.cc | 9 +- sst/SimEngMemInterface.cc | 19 +- sst/include/SimEngMemInterface.hh | 3 +- 29 files changed, 538 insertions(+), 503 deletions(-) diff --git a/src/include/simeng/BranchPredictor.hh b/src/include/simeng/BranchPredictor.hh index 8d76f08753..dd34a067ae 100644 --- a/src/include/simeng/BranchPredictor.hh +++ b/src/include/simeng/BranchPredictor.hh @@ -48,8 +48,8 @@ class BranchPredictor { /** Overload predict() with more information in parameters */ virtual BranchPrediction predict(uint64_t address, BranchType type, - uint64_t knownTarget, uint8_t instByteLength) - = 0; + uint64_t knownTarget, + uint8_t instByteLength) = 0; /** Generate a branch prediction for the specified instruction address with a * branch type and possible known target. */ diff --git a/src/include/simeng/Elf.hh b/src/include/simeng/Elf.hh index 485debea60..88e101c88b 100644 --- a/src/include/simeng/Elf.hh +++ b/src/include/simeng/Elf.hh @@ -1,8 +1,8 @@ #pragma once #include -#include #include +#include #include "simeng/span.hh" @@ -33,83 +33,76 @@ struct Elf32Header { typedef struct { unsigned char e_ident[16]; - uint16_t e_type; - uint16_t e_machine; - uint32_t e_version; - uint32_t e_entry; - uint32_t e_phoff; - uint32_t e_shoff; - uint32_t e_flags; - uint16_t e_ehsize; - uint16_t e_phentsize; - uint16_t e_phnum; - uint16_t e_shentsize; - uint16_t e_shnum; - uint16_t e_shstrndx; + uint16_t e_type; + uint16_t e_machine; + uint32_t e_version; + uint32_t e_entry; + uint32_t e_phoff; + uint32_t e_shoff; + uint32_t e_flags; + uint16_t e_ehsize; + uint16_t e_phentsize; + uint16_t e_phnum; + uint16_t e_shentsize; + uint16_t e_shnum; + uint16_t e_shstrndx; } Elf32_Ehdr; typedef struct { - uint32_t p_type; - uint32_t p_offset; - uint32_t p_vaddr; - uint32_t p_paddr; - uint32_t p_filesz; - uint32_t p_memsz; - uint32_t p_flags; - uint32_t p_align; + uint32_t p_type; + uint32_t p_offset; + uint32_t p_vaddr; + uint32_t p_paddr; + uint32_t p_filesz; + uint32_t p_memsz; + uint32_t p_flags; + uint32_t p_align; } Elf32_Phdr; typedef struct { - uint32_t sh_name; - uint32_t sh_type; - uint32_t sh_flags; - uint32_t sh_addr; - uint32_t sh_offset; - uint32_t sh_size; - uint32_t sh_link; - uint32_t sh_info; - uint32_t sh_addralign; - uint32_t sh_entsize; + uint32_t sh_name; + uint32_t sh_type; + uint32_t sh_flags; + uint32_t sh_addr; + uint32_t sh_offset; + uint32_t sh_size; + uint32_t sh_link; + uint32_t sh_info; + uint32_t sh_addralign; + uint32_t sh_entsize; } Elf32_Shdr; typedef struct { - uint32_t st_name; - uint32_t st_value; - uint32_t st_size; - unsigned char st_info; - unsigned char st_other; - uint16_t st_shndx; + uint32_t st_name; + uint32_t st_value; + uint32_t st_size; + unsigned char st_info; + unsigned char st_other; + uint16_t st_shndx; } Elf32_Sym; -enum ElfPhType { - PT_NULL, - PT_LOAD -}; +enum ElfPhType { PT_NULL, PT_LOAD }; -enum ElfShType { - SHT_NULL, - SHT_PROGBITS, - SHT_SYMTAB, - SHT_STRTAB -}; +enum ElfShType { SHT_NULL, SHT_PROGBITS, SHT_SYMTAB, SHT_STRTAB }; /** A processed Executable and Linkable Format (ELF) file. */ class Elf { - public: - Elf(std::string path, char** imagePointer, std::unordered_map& symbols); - ~Elf(); - uint64_t getProcessImageSize() const; - bool isValid() const; - uint64_t getEntryPoint() const; + public: + Elf(std::string path, char** imagePointer, + std::unordered_map& symbols); + ~Elf(); + uint64_t getProcessImageSize() const; + bool isValid() const; + uint64_t getEntryPoint() const; - private: - uint64_t entryPoint_; - std::vector headers_; - uint32_t entryPoint32_; - std::vector headers32_; - bool isValid_ = false; - uint64_t processImageSize_; - bool mode32bit_; + private: + uint64_t entryPoint_; + std::vector headers_; + uint32_t entryPoint32_; + std::vector headers32_; + bool isValid_ = false; + uint64_t processImageSize_; + bool mode32bit_; }; } // namespace simeng diff --git a/src/include/simeng/Instruction.hh b/src/include/simeng/Instruction.hh index 9ffc4a8d27..8c681b1076 100644 --- a/src/include/simeng/Instruction.hh +++ b/src/include/simeng/Instruction.hh @@ -24,7 +24,7 @@ class Instruction { bool exceptionEncountered() const; /** Binds an interrupt to this instruction */ - virtual void raiseInterrupt(int16_t& interruptId) {} + virtual void raiseInterrupt(int16_t& interruptId) {} /** Retrieve the source registers this instruction reads. */ virtual const span getOperandRegisters() const = 0; @@ -102,7 +102,8 @@ class Instruction { /** Retrieve branch type. */ virtual BranchType getBranchType() const = 0; - /** Retrieve an offset of branch target from the instruction's metadata if known. */ + /** Retrieve an offset of branch target from the instruction's metadata if + * known. */ virtual uint64_t getKnownOffset() const = 0; /** Is this a store address operation (a subcategory of store operations which @@ -217,7 +218,8 @@ class Instruction { /** What type of branch this instruction is. */ BranchType branchType_ = BranchType::Unknown; - /** If the offset of branch target is known at the time of decode, store it. */ + /** If the offset of branch target is known at the time of decode, store it. + */ uint64_t knownOffset_ = 0; // Flushing diff --git a/src/include/simeng/RegisterValue.hh b/src/include/simeng/RegisterValue.hh index d85471eda3..ebee3fa730 100644 --- a/src/include/simeng/RegisterValue.hh +++ b/src/include/simeng/RegisterValue.hh @@ -26,12 +26,13 @@ class RegisterValue { * number of bytes (defaulting to the size of the template type). */ template , T>* = nullptr> - RegisterValue(T value, uint16_t bytes = sizeof(T), bool relaxFor32 = true) : bytes(bytes) { + RegisterValue(T value, uint16_t bytes = sizeof(T), bool relaxFor32 = true) + : bytes(bytes) { relaxedFor32bit_ = relaxFor32; std::memset(this->value, 0, MAX_LOCAL_BYTES); if (isLocal()) { T* view = reinterpret_cast(this->value); - if (sizeof(T) > bytes) { // e.g. when T is int64 and bytes is 4 + if (sizeof(T) > bytes) { // e.g. when T is int64 and bytes is 4 std::memcpy(this->value, &value, bytes); } else { view[0] = value; @@ -98,13 +99,15 @@ class RegisterValue { const T* getAsVector() const { static_assert(alignof(T) <= 8 && "Alignment over 8 bytes not guaranteed"); assert(bytes > 0 && "Attempted to access an uninitialised RegisterValue"); - assert((sizeof(T) <= bytes || (bytes == 4 && sizeof(T) == 8)) && "Attempted" + assert((sizeof(T) <= bytes || (bytes == 4 && sizeof(T) == 8)) && + "Attempted" " to access a RegisterValue as a datatype larger than the " - "data held" ); - if(!relaxedFor32bit_) { // maybe #ifdef if it makes slower? - assert(sizeof(T) <= bytes && - "Attempted to access a RegisterValue as a datatype larger than the " - "data held"); + "data held"); + if (!relaxedFor32bit_) { // maybe #ifdef if it makes slower? + assert( + sizeof(T) <= bytes && + "Attempted to access a RegisterValue as a datatype larger than the " + "data held"); } if (isLocal()) { return reinterpret_cast(value); diff --git a/src/include/simeng/arch/Architecture.hh b/src/include/simeng/arch/Architecture.hh index 29874c6d69..0b31130c3b 100644 --- a/src/include/simeng/arch/Architecture.hh +++ b/src/include/simeng/arch/Architecture.hh @@ -15,10 +15,7 @@ using MacroOp = std::vector>; namespace arch { /** Modes. Assume only has 32-bit and 64-bit. */ -enum arch_mode { - ARCH_32BIT=1, - ARCH_64BIT=0 -}; +enum arch_mode { ARCH_32BIT = 1, ARCH_64BIT = 0 }; /** The types of changes that can be made to values within the process state. */ enum class ChangeType { REPLACEMENT, INCREMENT, DECREMENT }; @@ -116,17 +113,16 @@ class Architecture { YAML::Node config) const = 0; /** Updates System registers of any system-based timers. */ - virtual int16_t updateSystemTimerRegisters(RegisterFileSet* regFile, - const uint64_t iterations) const = 0; + virtual int16_t updateSystemTimerRegisters( + RegisterFileSet* regFile, const uint64_t iterations) const = 0; /** Update trace file */ virtual void updateInstrTrace(const std::shared_ptr& instruction, - RegisterFileSet* regFile, uint64_t tick) const = 0; + RegisterFileSet* regFile, + uint64_t tick) const = 0; /** Return the mode (32-bit or 64-bit) */ - arch_mode is32BitMode() const { - return is32Bit_; - } + arch_mode is32BitMode() const { return is32Bit_; } protected: /** Mode, either 32-bit or 64-bit */ diff --git a/src/include/simeng/arch/aarch64/Architecture.hh b/src/include/simeng/arch/aarch64/Architecture.hh index 3c1ce27f59..3a0b8457cd 100644 --- a/src/include/simeng/arch/aarch64/Architecture.hh +++ b/src/include/simeng/arch/aarch64/Architecture.hh @@ -63,7 +63,7 @@ class Architecture : public arch::Architecture { /** Updates System registers of any system-based timers. */ int16_t updateSystemTimerRegisters(RegisterFileSet* regFile, - const uint64_t iterations) const override; + const uint64_t iterations) const override; /** Returns the physical register structure as defined within the config file */ diff --git a/src/include/simeng/arch/riscv/Architecture.hh b/src/include/simeng/arch/riscv/Architecture.hh index 3bdb6287e9..2833113fe5 100644 --- a/src/include/simeng/arch/riscv/Architecture.hh +++ b/src/include/simeng/arch/riscv/Architecture.hh @@ -1,19 +1,18 @@ #pragma once #include -#include #include #include +#include #include "simeng/arch/Architecture.hh" - #include "simeng/arch/riscv/Instruction.hh" #include "simeng/kernel/Linux.hh" using csh = size_t; -#include "simeng/arch/riscv/SystemRegister.hh" #include "simeng/arch/riscv/ExceptionHandler.hh" +#include "simeng/arch/riscv/SystemRegister.hh" namespace simeng { namespace arch { @@ -30,13 +29,14 @@ struct constantsPool { struct archConstants { uint8_t alignMask; uint8_t bytesLimit; /* Minimum bytes the decoder needs to process */ - uint8_t regWidth; /* Register width in bytes */ + uint8_t regWidth; /* Register width in bytes */ }; /* A basic RISC-V implementation of the `Architecture` interface. */ class Architecture : public arch::Architecture { public: - Architecture(kernel::Linux& kernel, YAML::Node config, std::shared_ptr& dataMemory); + Architecture(kernel::Linux& kernel, YAML::Node config, + std::shared_ptr& dataMemory); ~Architecture(); /** Pre-decode instruction memory into a macro-op of `Instruction` * instances. Returns the number of bytes consumed to produce it (always 4), @@ -74,9 +74,10 @@ class Architecture : public arch::Architecture { /** Returns the minimum size of a valid instruction in bytes. */ uint8_t getMinInstructionSize() const override; - /** Updates System registers of any system-based timers. Return +ve id if interrupt occurs */ + /** Updates System registers of any system-based timers. Return +ve id if + * interrupt occurs */ int16_t updateSystemTimerRegisters(RegisterFileSet* regFile, - const uint64_t iterations) const override; + const uint64_t iterations) const override; /** Returns the physical register structure as defined within the config file */ @@ -115,15 +116,18 @@ class Architecture : public arch::Architecture { std::unordered_map systemRegisterMap_; /** Ordered map of memory mapped system regsiters banks **/ - std::map memoryMappedSystemRegisterBlocks; + std::map + memoryMappedSystemRegisterBlocks; - /* Memory Interface through which memory mapped system registers are accessed */ + /* Memory Interface through which memory mapped system registers are accessed + */ std::shared_ptr systemRegisterMemoryInterface; /* Optional Clint block which replicates that functionality in spike */ std::shared_ptr clint; - /* Optional Host Target Interface block which replicates that functionality in spike */ + /* Optional Host Target Interface block which replicates that functionality in + * spike */ std::shared_ptr htif; /** A map to hold the relationship between aarch64 instruction groups and @@ -141,7 +145,7 @@ class Architecture : public arch::Architecture { kernel::Linux& linux_; /** A pointer to the trace file */ - std::ofstream *traceFile_; + std::ofstream* traceFile_; /** Switch for updateInstrTrace() */ bool traceOn_ = false; diff --git a/src/include/simeng/arch/riscv/ExceptionHandler.hh b/src/include/simeng/arch/riscv/ExceptionHandler.hh index 36cfd5d187..501f52bc30 100644 --- a/src/include/simeng/arch/riscv/ExceptionHandler.hh +++ b/src/include/simeng/arch/riscv/ExceptionHandler.hh @@ -57,7 +57,7 @@ class ExceptionHandler : public simeng::arch::ExceptionHandler { */ bool readBufferThen(uint64_t ptr, uint64_t length, std::function then, bool firstCall = true); - + /** generate system register changes associated with taking an exception **/ void takeException(uint64_t causecode); diff --git a/src/include/simeng/arch/riscv/Instruction.hh b/src/include/simeng/arch/riscv/Instruction.hh index 60966ce044..55b72692d1 100644 --- a/src/include/simeng/arch/riscv/Instruction.hh +++ b/src/include/simeng/arch/riscv/Instruction.hh @@ -89,12 +89,11 @@ class Instruction : public simeng::Instruction { virtual InstructionException getException() const; /** Raise an interrupt. */ - void raiseInterrupt(int16_t& interruptId) - { - interruptId_ = interruptId; + void raiseInterrupt(int16_t& interruptId) { + interruptId_ = interruptId; exceptionEncountered_ = true; - exception_ = InstructionException::Interrupt; - interruptId = -1; + exception_ = InstructionException::Interrupt; + interruptId = -1; } /** Get Id of this interrupr */ @@ -152,7 +151,8 @@ class Instruction : public simeng::Instruction { /** Retrieve branch type. */ BranchType getBranchType() const override; - /** Retrieve an offset of branch target from the instruction's metadata if known. */ + /** Retrieve an offset of branch target from the instruction's metadata if + * known. */ uint64_t getKnownOffset() const override; /** Is this a store address operation (a subcategory of store operations which @@ -207,7 +207,8 @@ class Instruction : public simeng::Instruction { static const uint8_t MAX_SOURCE_REGISTERS = 2; /** The maximum number of destination registers any supported RISC-V * instruction can have. */ - static const uint8_t MAX_DESTINATION_REGISTERS = 2; //CSRs can be another destination apart from std RD + static const uint8_t MAX_DESTINATION_REGISTERS = + 2; // CSRs can be another destination apart from std RD /** A reference to the ISA instance this instruction belongs to. */ const Architecture& architecture_; @@ -307,7 +308,7 @@ class Instruction : public simeng::Instruction { std::vector memoryData; /** Return integer register value, to support both 32-bit and 64-bit mode */ - int64_t getSignedInt(RegisterValue& value) const; + int64_t getSignedInt(RegisterValue& value) const; int16_t interruptId_; }; diff --git a/src/include/simeng/arch/riscv/SystemRegister.hh b/src/include/simeng/arch/riscv/SystemRegister.hh index 0556156ef6..5f0fe74459 100644 --- a/src/include/simeng/arch/riscv/SystemRegister.hh +++ b/src/include/simeng/arch/riscv/SystemRegister.hh @@ -1,12 +1,11 @@ #pragma once #include -#include #include #include +#include #include "simeng/arch/Architecture.hh" - #include "simeng/arch/riscv/Instruction.hh" #include "simeng/kernel/Linux.hh" @@ -17,213 +16,198 @@ namespace riscv { // Should probably move to Capstone enum riscv_sysreg { - SYSREG_MSTATUS = 0x300, - SYSREG_MIE = 0x304, - SYSREG_MTVEC = 0x305, - SYSREG_MSTATUSH = 0x310, - SYSREG_MSCRATCH = 0x340, - SYSREG_MEPC = 0x341, - SYSREG_MCAUSE = 0x342, - SYSREG_MHARTID = 0xF14, - SYSREG_MXCPTSC = 0xFC2, - SYSREG_CYCLE = 0xC00, - SYSREG_TIME = 0xC01, - SYSREG_INSTRRET = 0xC02 + SYSREG_MSTATUS = 0x300, + SYSREG_MIE = 0x304, + SYSREG_MTVEC = 0x305, + SYSREG_MSTATUSH = 0x310, + SYSREG_MSCRATCH = 0x340, + SYSREG_MEPC = 0x341, + SYSREG_MCAUSE = 0x342, + SYSREG_MHARTID = 0xF14, + SYSREG_MXCPTSC = 0xFC2, + SYSREG_CYCLE = 0xC00, + SYSREG_TIME = 0xC01, + SYSREG_INSTRRET = 0xC02 }; enum riscv_causecode_enum { - CAUSE_IADDRESS_MISALIGN = 0, - CAUSE_IACCESS_FAULT = 1, + CAUSE_IADDRESS_MISALIGN = 0, + CAUSE_IACCESS_FAULT = 1, CAUSE_ILLEGAL_INSTRUCTION = 2, - CAUSE_BREAKPOINT = 3, - CAUSE_LDADDRESS_MISALIGN = 4, - CAUSE_LDACCESS_FAULT = 5, - CAUSE_STADDRESS_MISALIGN = 6, - CAUSE_STACCESS_FAULT = 7, - CAUSE_ECALL_FROM_M = 11 + CAUSE_BREAKPOINT = 3, + CAUSE_LDADDRESS_MISALIGN = 4, + CAUSE_LDACCESS_FAULT = 5, + CAUSE_STADDRESS_MISALIGN = 6, + CAUSE_STACCESS_FAULT = 7, + CAUSE_ECALL_FROM_M = 11 }; -enum class InterruptId { - HALT = 1, - TIMER = 7 -}; +enum class InterruptId { HALT = 1, TIMER = 7 }; -enum riscv_sysreg_masks { - MSTATUS_MIE_MASK = 0x8, - MSTATUS_MPIE_MASK = 0x80 -}; +enum riscv_sysreg_masks { MSTATUS_MIE_MASK = 0x8, MSTATUS_MPIE_MASK = 0x80 }; typedef uint16_t riscv_causecode; class MemoryMappedSystemRegister { - public: - MemoryMappedSystemRegister(const RegisterValue& val) : state(val) {} - bool size() { return state.size(); } - virtual void put(const RegisterValue& val) { state = val; } - virtual const RegisterValue& get() { return state; } - private: - RegisterValue state; + public: + MemoryMappedSystemRegister(const RegisterValue& val) : state(val) {} + bool size() { return state.size(); } + virtual void put(const RegisterValue& val) { state = val; } + virtual const RegisterValue& get() { return state; } + + private: + RegisterValue state; }; class MemoryMappedSystemRegisterBlock { - public: - MemoryMappedSystemRegisterBlock(size_t sz) : size_(sz) {} - size_t size() { return size_; } - virtual bool put(uint16_t, const RegisterValue&); - virtual bool get(uint16_t, RegisterValue&); - virtual void tick() {} - protected: - /** Ordered map of memory mapped system regsiters **/ - std::map memoryMappedSystemRegisters; - size_t size_; + public: + MemoryMappedSystemRegisterBlock(size_t sz) : size_(sz) {} + size_t size() { return size_; } + virtual bool put(uint16_t, const RegisterValue&); + virtual bool get(uint16_t, RegisterValue&); + virtual void tick() {} + + protected: + /** Ordered map of memory mapped system regsiters **/ + std::map memoryMappedSystemRegisters; + size_t size_; }; class SystemRegisterMemoryInterface : public MemoryInterface { - public: - SystemRegisterMemoryInterface( - std::shared_ptr& dataMemory, - std::map& memoryMappedSystemRegisterBlocks - ) : - dataMemory_(dataMemory), - memoryMappedSystemRegisterBlocks_(memoryMappedSystemRegisterBlocks) - {} - - /** Request a read from the supplied target location. */ - virtual void requestRead(const MemoryAccessTarget& target, - uint64_t requestId = 0) - { - RegisterValue data(0,target.size); - if (getMemoryMappedSystemRegister(target.address, data)) - completedReads_.push_back({target, data, requestId}); - else - dataMemory_.get()->requestRead(target,requestId); - } - - /** Request a write of `data` to the target location. */ - virtual void requestWrite(const MemoryAccessTarget& target, - const RegisterValue& data) - { - if (!putMemoryMappedSystemRegister(target.address, data)) - dataMemory_.get()->requestWrite(target,data); - } - - /** Retrieve all completed read requests. */ - virtual const span getCompletedReads() const - { - if (completedReads_.empty()) - return dataMemory_.get()->getCompletedReads(); - else - return {const_cast(completedReads_.data()), completedReads_.size()}; - } - - /** Clear the completed reads. */ - virtual void clearCompletedReads() - { - if (completedReads_.empty()) - dataMemory_.get()->clearCompletedReads(); - else - completedReads_.clear(); - } - - /** Returns true if there are any oustanding memory requests in-flight. */ - virtual bool hasPendingRequests() const - { - return dataMemory_.get()->hasPendingRequests(); - } - - /** Tick the memory interface to allow it to process internal tasks. - * - * TODO: Move ticking out of the memory interface and into a central "memory - * system" covering a set of related interfaces. - */ - virtual void tick() - { - dataMemory_.get()->tick(); - } - - private : - /** Put/Get Memory Mapped Registers */ - bool putMemoryMappedSystemRegister(uint64_t address, const RegisterValue& value); - bool getMemoryMappedSystemRegister(uint64_t address, RegisterValue& value); - - std::shared_ptr dataMemory_; - - /** Address map of all system register blocks */ - std::map& memoryMappedSystemRegisterBlocks_; - - /** A vector containing all completed read requests. */ - std::vector completedReads_; + public: + SystemRegisterMemoryInterface( + std::shared_ptr& dataMemory, + std::map& + memoryMappedSystemRegisterBlocks) + : dataMemory_(dataMemory), + memoryMappedSystemRegisterBlocks_(memoryMappedSystemRegisterBlocks) {} + + /** Request a read from the supplied target location. */ + virtual void requestRead(const MemoryAccessTarget& target, + uint64_t requestId = 0) { + RegisterValue data(0, target.size); + if (getMemoryMappedSystemRegister(target.address, data)) + completedReads_.push_back({target, data, requestId}); + else + dataMemory_.get()->requestRead(target, requestId); + } + + /** Request a write of `data` to the target location. */ + virtual void requestWrite(const MemoryAccessTarget& target, + const RegisterValue& data) { + if (!putMemoryMappedSystemRegister(target.address, data)) + dataMemory_.get()->requestWrite(target, data); + } + + /** Retrieve all completed read requests. */ + virtual const span getCompletedReads() const { + if (completedReads_.empty()) + return dataMemory_.get()->getCompletedReads(); + else + return {const_cast(completedReads_.data()), + completedReads_.size()}; + } + + /** Clear the completed reads. */ + virtual void clearCompletedReads() { + if (completedReads_.empty()) + dataMemory_.get()->clearCompletedReads(); + else + completedReads_.clear(); + } + + /** Returns true if there are any oustanding memory requests in-flight. */ + virtual bool hasPendingRequests() const { + return dataMemory_.get()->hasPendingRequests(); + } + + /** Tick the memory interface to allow it to process internal tasks. + * + * TODO: Move ticking out of the memory interface and into a central "memory + * system" covering a set of related interfaces. + */ + virtual void tick() { dataMemory_.get()->tick(); } + + private: + /** Put/Get Memory Mapped Registers */ + bool putMemoryMappedSystemRegister(uint64_t address, + const RegisterValue& value); + bool getMemoryMappedSystemRegister(uint64_t address, RegisterValue& value); + + std::shared_ptr dataMemory_; + + /** Address map of all system register blocks */ + std::map& + memoryMappedSystemRegisterBlocks_; + + /** A vector containing all completed read requests. */ + std::vector completedReads_; }; class Architecture; class HostTargetInterface : public MemoryMappedSystemRegisterBlock { - public: - enum { - PAYLOAD_OFFSET = 0, - DEVICEID_OFFSET = 4 - }; - - HostTargetInterface(Architecture& architecture) - : - MemoryMappedSystemRegisterBlock(8), - architecture_(architecture), - isHalted_(false) - { - memoryMappedSystemRegisters[PAYLOAD_OFFSET] = new MemoryMappedSystemRegister(static_cast(0)); - memoryMappedSystemRegisters[DEVICEID_OFFSET] = new MemoryMappedSystemRegister(static_cast(0)); - } - - bool put(uint16_t offset, const RegisterValue&value); - - int16_t updateSystemTimerRegisters(RegisterFileSet* regFile, const uint64_t iterations) { - if (isHalted_) - return static_cast(InterruptId::HALT); - return -1; - } - - private : - Architecture& architecture_; - bool isHalted_; + public: + enum { PAYLOAD_OFFSET = 0, DEVICEID_OFFSET = 4 }; + + HostTargetInterface(Architecture& architecture) + : MemoryMappedSystemRegisterBlock(8), + architecture_(architecture), + isHalted_(false) { + memoryMappedSystemRegisters[PAYLOAD_OFFSET] = + new MemoryMappedSystemRegister(static_cast(0)); + memoryMappedSystemRegisters[DEVICEID_OFFSET] = + new MemoryMappedSystemRegister(static_cast(0)); + } + + bool put(uint16_t offset, const RegisterValue& value); + + int16_t updateSystemTimerRegisters(RegisterFileSet* regFile, + const uint64_t iterations) { + if (isHalted_) return static_cast(InterruptId::HALT); + return -1; + } + + private: + Architecture& architecture_; + bool isHalted_; }; class Clint : public MemoryMappedSystemRegisterBlock { - public: - enum { - CLINT_BASE = 0x02000000, - CLINT_SIZE = 0x0000c000, - MTIMECMP_OFFSET = 0x4000, - MTIME_OFFSET = 0xbff8 - }; - - Clint(Architecture& architecture) - : - MemoryMappedSystemRegisterBlock(CLINT_SIZE), - architecture_(architecture), - mtime_(static_cast(0)), - mtimecmp_(static_cast(0)), - mtime_freq(100), - mtime_count(0), - last_tick(0) - { - memoryMappedSystemRegisters[MTIME_OFFSET] = &mtime_; - memoryMappedSystemRegisters[MTIMECMP_OFFSET] = &mtimecmp_; - } - - int16_t updateSystemTimerRegisters(RegisterFileSet* regFile, const uint64_t iterations); - - private : - Architecture& architecture_; - - MemoryMappedSystemRegister mtime_; - MemoryMappedSystemRegister mtimecmp_; - - uint32_t mtime_freq; - uint32_t mtime_count; - uint64_t last_tick; + public: + enum { + CLINT_BASE = 0x02000000, + CLINT_SIZE = 0x0000c000, + MTIMECMP_OFFSET = 0x4000, + MTIME_OFFSET = 0xbff8 + }; + + Clint(Architecture& architecture) + : MemoryMappedSystemRegisterBlock(CLINT_SIZE), + architecture_(architecture), + mtime_(static_cast(0)), + mtimecmp_(static_cast(0)), + mtime_freq(100), + mtime_count(0), + last_tick(0) { + memoryMappedSystemRegisters[MTIME_OFFSET] = &mtime_; + memoryMappedSystemRegisters[MTIMECMP_OFFSET] = &mtimecmp_; + } + + int16_t updateSystemTimerRegisters(RegisterFileSet* regFile, + const uint64_t iterations); + + private: + Architecture& architecture_; + + MemoryMappedSystemRegister mtime_; + MemoryMappedSystemRegister mtimecmp_; + + uint32_t mtime_freq; + uint32_t mtime_count; + uint64_t last_tick; }; - } // namespace riscv } // namespace arch } // namespace simeng diff --git a/src/include/simeng/kernel/LinuxProcess.hh b/src/include/simeng/kernel/LinuxProcess.hh index d6b2c4a967..a4b4ce428d 100644 --- a/src/include/simeng/kernel/LinuxProcess.hh +++ b/src/include/simeng/kernel/LinuxProcess.hh @@ -116,7 +116,7 @@ class LinuxProcess { /** Shared pointer to processImage. */ std::shared_ptr processImage_; - + std::unordered_map symbols_; }; diff --git a/src/include/simeng/models/emulation/Core.hh b/src/include/simeng/models/emulation/Core.hh index 1db10d2381..fb8767ec2f 100644 --- a/src/include/simeng/models/emulation/Core.hh +++ b/src/include/simeng/models/emulation/Core.hh @@ -11,7 +11,8 @@ #include "simeng/arch/Architecture.hh" #include "simeng/span.hh" -// TODO: This is architecture-specific, need to be refactored later. See comments in Core.cc +// TODO: This is architecture-specific, need to be refactored later. See +// comments in Core.cc #include "simeng/arch/riscv/Architecture.hh" namespace simeng { @@ -110,7 +111,7 @@ class Core : public simeng::Core { uint64_t branchesExecuted_ = 0; /** Set to interruptId when interrupt occurs, otherwise -1 */ - int16_t interruptId_; + int16_t interruptId_; }; } // namespace emulation diff --git a/src/include/simeng/pipeline/PipelineBuffer1.hh b/src/include/simeng/pipeline/PipelineBuffer1.hh index dd2ed70ce7..e677645fdf 100644 --- a/src/include/simeng/pipeline/PipelineBuffer1.hh +++ b/src/include/simeng/pipeline/PipelineBuffer1.hh @@ -15,13 +15,18 @@ class PipelineBuffer { /** Construct a pipeline buffer of width `width`, and fill all slots with * `initialValue`. */ PipelineBuffer(int width, const T& initialValue) - : width(width), buffer(width * defaultLength_, initialValue), - length_(defaultLength_), headIndex_(defaultLength_-1), + : width(width), + buffer(width * defaultLength_, initialValue), + length_(defaultLength_), + headIndex_(defaultLength_ - 1), tailIndex_(0) {} PipelineBuffer(int width, const T& initialValue, int length) - : width(width), buffer(width * length, initialValue), length_(length), - headIndex_(length_-1), tailIndex_(0) { + : width(width), + buffer(width * length, initialValue), + length_(length), + headIndex_(length_ - 1), + tailIndex_(0) { assert(length_ != 0 && "Pipeline buffer length cannot be 0"); } @@ -30,14 +35,14 @@ class PipelineBuffer { void tick() { if (isStalled_) return; - //length ==1 shortcut? condition check cost + // length ==1 shortcut? condition check cost - if (headIndex_) { // when headIndex != 0 + if (headIndex_) { // when headIndex != 0 headIndex_--; } else { headIndex_ = length_ - 1; } - if (tailIndex_) { // when tailIndex != 0 + if (tailIndex_) { // when tailIndex != 0 tailIndex_--; } else { tailIndex_ = length_ - 1; diff --git a/src/include/simeng/pipeline_hi/LoadStoreQueue.hh b/src/include/simeng/pipeline_hi/LoadStoreQueue.hh index 211b1ef72d..5f34b956d6 100644 --- a/src/include/simeng/pipeline_hi/LoadStoreQueue.hh +++ b/src/include/simeng/pipeline_hi/LoadStoreQueue.hh @@ -117,7 +117,7 @@ class LoadStoreQueue { bool isBusy() const; - float getAvgLdLat() const { return (totalLdLatency)/numLoads; }; + float getAvgLdLat() const { return (totalLdLatency) / numLoads; }; uint32_t getMaxLdLat() const { return maxLdLatency; }; uint32_t getMinLdLat() const { return minLdLatency; }; @@ -138,7 +138,7 @@ class LoadStoreQueue { /** Map of loads that have requested their data, keyed by sequence ID. */ std::unordered_map> requestedLoads_; - /** Map of loads that have requested their data, keyed by sequence ID. */ + /** Map of loads that have requested their data, keyed by sequence ID. */ std::unordered_map latencyLoads_; /** A function handler to call to forward the results of a completed load. */ @@ -214,15 +214,16 @@ class LoadStoreQueue { /** The number of loads and stores permitted per cycle. */ std::array reqLimits_; - /** A map between LSQ cycles and load or store requests ready on that cycle. */ + /** A map between LSQ cycles and load or store requests ready on that cycle. + */ std::deque requestQueue_; /* Identifier for request to memory*/ uint8_t busReqId = 0; - //bool activeMisAlignedStore = false; + // bool activeMisAlignedStore = false; - //Stats + // Stats uint64_t numLoads = 0; double totalLdLatency = 0; uint32_t maxLdLatency = 0; @@ -230,6 +231,5 @@ class LoadStoreQueue { float averageAccessLdLatency = 0.0; }; - } // namespace pipeline_hi } // namespace simeng diff --git a/src/include/simeng/pipeline_hi/StaticPredictor.hh b/src/include/simeng/pipeline_hi/StaticPredictor.hh index d8923dc23c..83c7f0e83e 100644 --- a/src/include/simeng/pipeline_hi/StaticPredictor.hh +++ b/src/include/simeng/pipeline_hi/StaticPredictor.hh @@ -12,7 +12,8 @@ namespace pipeline_hi { */ class StaticPredictor : public BranchPredictor { public: - StaticPredictor(uint8_t sType); //TODO: temp constructor, get rid of yaml, delete it later + StaticPredictor(uint8_t sType); // TODO: temp constructor, get rid of yaml, + // delete it later StaticPredictor(YAML::Node config); ~StaticPredictor(); diff --git a/src/lib/CoreInstance.cc b/src/lib/CoreInstance.cc index e8f91d3450..f8c76c930b 100644 --- a/src/lib/CoreInstance.cc +++ b/src/lib/CoreInstance.cc @@ -90,8 +90,7 @@ void CoreInstance::setSimulationMode() { "outoforder") { mode_ = SimulationMode::OutOfOrder; modeString_ = "Out-of-Order"; - } else if (config_["Core"]["Simulation-Mode"].as() == - "mcu") { + } else if (config_["Core"]["Simulation-Mode"].as() == "mcu") { mode_ = SimulationMode::MCU; modeString_ = "MCU"; } @@ -239,8 +238,8 @@ void CoreInstance::createCore() { // Create the architecture, with knowledge of the kernel if (config_["Core"]["ISA"].as() == "rv64" || config_["Core"]["ISA"].as() == "rv32") { - arch_ = - std::make_unique(kernel_, config_,dataMemory_); + arch_ = std::make_unique( + kernel_, config_, dataMemory_); } else if (config_["Core"]["ISA"].as() == "AArch64") { arch_ = std::make_unique(kernel_, config_); @@ -249,7 +248,8 @@ void CoreInstance::createCore() { // Construct branch predictor object predictor_ = std::make_unique(config_); if (mode_ == SimulationMode::MCU) { - predictor_ = std::make_unique(2); //config_ + predictor_ = + std::make_unique(2); // config_ } // Extract port arrangement from config file diff --git a/src/lib/Elf.cc b/src/lib/Elf.cc index 901f370eec..3b7e71e28d 100644 --- a/src/lib/Elf.cc +++ b/src/lib/Elf.cc @@ -14,8 +14,8 @@ namespace simeng { * https://man7.org/linux/man-pages/man5/elf.5.html */ -Elf::Elf(std::string path, char** imagePointer, std::unordered_map& symbols) -{ +Elf::Elf(std::string path, char** imagePointer, + std::unordered_map& symbols) { std::ifstream file(path, std::ios::binary); if (!file.is_open()) { @@ -49,7 +49,8 @@ Elf::Elf(std::string path, char** imagePointer, std::unordered_map(&headerEntrySize), sizeof(headerEntrySize)); + file.read(reinterpret_cast(&headerEntrySize), + sizeof(headerEntrySize)); uint16_t headerEntries; file.read(reinterpret_cast(&headerEntries), sizeof(headerEntries)); @@ -170,8 +172,8 @@ Elf::Elf(std::string path, char** imagePointer, std::unordered_map(&eheader), sizeof(eheader)); entryPoint32_ = eheader.e_entry; - + processImageSize_ = 0; // Loop over pheaders and extract them. file.seekg(eheader.e_phoff); std::vector pheaders(eheader.e_phnum); - for (auto &ph : pheaders) { - file.read(reinterpret_cast(&ph), sizeof(ph)); - if ((ph.p_type == PT_LOAD) && (ph.p_vaddr+ph.p_memsz > processImageSize_)) - processImageSize_ = ph.p_vaddr+ph.p_memsz; + for (auto& ph : pheaders) { + file.read(reinterpret_cast(&ph), sizeof(ph)); + if ((ph.p_type == PT_LOAD) && + (ph.p_vaddr + ph.p_memsz > processImageSize_)) + processImageSize_ = ph.p_vaddr + ph.p_memsz; } *imagePointer = (char*)malloc(processImageSize_ * sizeof(char)); for (const auto& ph : pheaders) { - if (ph.p_type == PT_LOAD) { + if (ph.p_type == PT_LOAD) { file.seekg(ph.p_offset); - // Read `fileSize` bytes from `file` into the appropriate place in process memory - file.read(*imagePointer+ph.p_vaddr, ph.p_filesz); + // Read `fileSize` bytes from `file` into the appropriate place in + // process memory + file.read(*imagePointer + ph.p_vaddr, ph.p_filesz); - if (ph.p_memsz>ph.p_filesz) + if (ph.p_memsz > ph.p_filesz) // Need to padd the rest of the section memory with zeros - memset(*imagePointer+ph.p_vaddr+ph.p_filesz, 0, ph.p_memsz-ph.p_filesz); + memset(*imagePointer + ph.p_vaddr + ph.p_filesz, 0, + ph.p_memsz - ph.p_filesz); } } @@ -214,13 +219,13 @@ Elf::Elf(std::string path, char** imagePointer, std::unordered_map sheaders(eheader.e_shnum); unsigned int sh_idx = 0; - for (auto &sh : sheaders) { + for (auto& sh : sheaders) { file.read(reinterpret_cast(&sh), sizeof(sh)); // find section header for strings to use for symbol table. - if (sh.sh_type==SHT_SYMTAB) + if (sh.sh_type == SHT_SYMTAB) sh_symtab = &sh; - else if (sh.sh_type==SHT_STRTAB && sh_idx!=eheader.e_shstrndx) + else if (sh.sh_type == SHT_STRTAB && sh_idx != eheader.e_shstrndx) sh_strtab = &sh; sh_idx++; }; @@ -232,9 +237,9 @@ Elf::Elf(std::string path, char** imagePointer, std::unordered_mapsh_offset); - unsigned num_symbols = sh_symtab->sh_size/sh_symtab->sh_entsize; + unsigned num_symbols = sh_symtab->sh_size / sh_symtab->sh_entsize; Elf32_Sym sym; - while(num_symbols--) { + while (num_symbols--) { file.read(reinterpret_cast(&sym), sizeof(sym)); if (strtab[sym.st_name]) { std::string name(&strtab[sym.st_name]); diff --git a/src/lib/ModelConfig.cc b/src/lib/ModelConfig.cc index 342476347c..1d00ce4bab 100644 --- a/src/lib/ModelConfig.cc +++ b/src/lib/ModelConfig.cc @@ -67,10 +67,12 @@ void ModelConfig::validate() { "Streaming-Vector-Length"}; validISA = nodeChecker( configFile_[root][subFields[0]], subFields[0], - std::vector({"AArch64", "rv64", "rv32"}), ExpectedValue::String); - nodeChecker(configFile_[root][subFields[1]], subFields[1], - {"emulation", "inorderpipelined", "mcu", "outoforder"}, - ExpectedValue::String); + std::vector({"AArch64", "rv64", "rv32"}), + ExpectedValue::String); + nodeChecker( + configFile_[root][subFields[1]], subFields[1], + {"emulation", "inorderpipelined", "mcu", "outoforder"}, + ExpectedValue::String); nodeChecker(configFile_[root][subFields[2]], subFields[2], std::make_pair(0.f, 10.f), ExpectedValue::Float); nodeChecker(configFile_[root][subFields[3]], subFields[3], diff --git a/src/lib/arch/aarch64/Architecture.cc b/src/lib/arch/aarch64/Architecture.cc index 5ad11c70d1..a61d9542ea 100644 --- a/src/lib/arch/aarch64/Architecture.cc +++ b/src/lib/arch/aarch64/Architecture.cc @@ -287,8 +287,8 @@ uint64_t Architecture::getVectorLength() const { return VL_; } uint64_t Architecture::getStreamingVectorLength() const { return SVL_; } -int16_t Architecture::updateSystemTimerRegisters(RegisterFileSet* regFile, - const uint64_t iterations) const { +int16_t Architecture::updateSystemTimerRegisters( + RegisterFileSet* regFile, const uint64_t iterations) const { // Update the Processor Cycle Counter to total cycles completed. regFile->set(PCCreg_, iterations); // Update Virtual Counter Timer at correct frequency. @@ -329,9 +329,9 @@ void Architecture::setSVCRval(const uint64_t newVal) const { SVCRval_ = newVal; } -void Architecture::updateInstrTrace(const std::shared_ptr& instruction, - simeng::RegisterFileSet* regFile, uint64_t tick) const { - } +void Architecture::updateInstrTrace( + const std::shared_ptr& instruction, + simeng::RegisterFileSet* regFile, uint64_t tick) const {} } // namespace aarch64 } // namespace arch diff --git a/src/lib/arch/riscv/Architecture.cc b/src/lib/arch/riscv/Architecture.cc index 84afcc0996..e9fcc7cd38 100644 --- a/src/lib/arch/riscv/Architecture.cc +++ b/src/lib/arch/riscv/Architecture.cc @@ -15,10 +15,9 @@ namespace riscv { std::unordered_map Architecture::decodeCache; std::forward_list Architecture::metadataCache; -Architecture::Architecture(kernel::Linux& kernel, YAML::Node config, std::shared_ptr& dataMemory) -: - linux_(kernel) -{ +Architecture::Architecture(kernel::Linux& kernel, YAML::Node config, + std::shared_ptr& dataMemory) + : linux_(kernel) { is32Bit_ = ARCH_64BIT; if (config["Core"]["ISA"].as() == "rv32") { is32Bit_ = ARCH_32BIT; @@ -27,8 +26,9 @@ Architecture::Architecture(kernel::Linux& kernel, YAML::Node config, std::shared cs_mode csMode = CS_MODE_RISCV64; constantsPool constantsPool; - if(is32Bit_) { - csMode = CS_MODE_RISCV32GC; // TODO Note: currently using local (1-line)modified capstone + if (is32Bit_) { + csMode = CS_MODE_RISCV32GC; // TODO Note: currently using local + // (1-line)modified capstone constants_.alignMask = constantsPool.alignMaskCompressed; constants_.regWidth = constantsPool.byteLength32; constants_.bytesLimit = constantsPool.bytesLimitCompressed; @@ -63,11 +63,12 @@ Architecture::Architecture(kernel::Linux& kernel, YAML::Node config, std::shared // Memory Mapped System Register Blocks - // if elf file includes the label tohost then assume that this binary supports HTIF protocol (used by spike) and include an HTI block + // if elf file includes the label tohost then assume that this binary supports + // HTIF protocol (used by spike) and include an HTI block uint64_t htifAddress; - if (linux_.lookupSymbolValue("tohost",htifAddress)) - { - std::cout << "[SimEng] HTIF detected at: " << std::hex << htifAddress << std::endl; + if (linux_.lookupSymbolValue("tohost", htifAddress)) { + std::cout << "[SimEng] HTIF detected at: " << std::hex << htifAddress + << std::endl; htif = std::make_shared(*this); memoryMappedSystemRegisterBlocks[htifAddress] = htif.get(); } @@ -76,9 +77,10 @@ Architecture::Architecture(kernel::Linux& kernel, YAML::Node config, std::shared clint = std::make_shared(*this); memoryMappedSystemRegisterBlocks[Clint::CLINT_BASE] = clint.get(); - if (!memoryMappedSystemRegisterBlocks.empty()) - { - systemRegisterMemoryInterface = std::make_shared(dataMemory, memoryMappedSystemRegisterBlocks); + if (!memoryMappedSystemRegisterBlocks.empty()) { + systemRegisterMemoryInterface = + std::make_shared( + dataMemory, memoryMappedSystemRegisterBlocks); dataMemory = systemRegisterMemoryInterface; } @@ -173,7 +175,8 @@ Architecture::Architecture(kernel::Linux& kernel, YAML::Node config, std::shared } } } - if (config["Core"]["Trace"].IsDefined() && config["Core"]["Trace"].as()) { + if (config["Core"]["Trace"].IsDefined() && + config["Core"]["Trace"].as()) { traceFile_ = new std::ofstream(); traceFile_->open("./trace.log"); traceOn_ = true; @@ -184,7 +187,7 @@ Architecture::~Architecture() { decodeCache.clear(); metadataCache.clear(); groupExecutionInfo_.clear(); - if(traceOn_) { + if (traceOn_) { traceFile_->close(); } } @@ -192,7 +195,6 @@ Architecture::~Architecture() { uint8_t Architecture::predecode(const void* ptr, uint8_t bytesAvailable, uint64_t instructionAddress, MacroOp& output) const { - // Check that instruction address is 4-byte aligned as required by RISC-V // 2-byte when Compressed ISA is supported if (instructionAddress & constants_.alignMask) { @@ -305,9 +307,9 @@ int32_t Architecture::getSystemRegisterTag(uint16_t reg) const { /** Returns a System Register index from a system register tag. reverse lookup slow but only used in printing so will be fine */ uint16_t Architecture::getSystemRegisterIdFromTag(int32_t tag) const { - for (auto it = systemRegisterMap_.begin();it != systemRegisterMap_.end();it++) - if (it->second == tag) - return it->first; + for (auto it = systemRegisterMap_.begin(); it != systemRegisterMap_.end(); + it++) + if (it->second == tag) return it->first; assert(0 && "Tag not found in systemRegisterMap"); } @@ -318,11 +320,10 @@ ProcessStateChange Architecture::getInitialState() const { changes.modifiedRegisters.push_back({RegisterType::GENERAL, 2}); uint64_t stackPointer; // TODO: check if this conditional expression is needed - if(is32Bit_) { + if (is32Bit_) { stackPointer = (uint32_t)linux_.getInitialStackPointer(); changes.modifiedRegisterValues.push_back((uint32_t)stackPointer); - } else - { + } else { stackPointer = linux_.getInitialStackPointer(); changes.modifiedRegisterValues.push_back(stackPointer); } @@ -335,8 +336,10 @@ uint8_t Architecture::getMinInstructionSize() const { return 2; } std::vector Architecture::getConfigPhysicalRegisterStructure(YAML::Node config) const { - return {{constants_.regWidth, config["Register-Set"]["GeneralPurpose-Count"].as()}, - {constants_.regWidth, config["Register-Set"]["FloatingPoint-Count"].as()}, + return {{constants_.regWidth, + config["Register-Set"]["GeneralPurpose-Count"].as()}, + {constants_.regWidth, + config["Register-Set"]["FloatingPoint-Count"].as()}, {constants_.regWidth, getNumSystemRegisters()}}; } @@ -350,15 +353,13 @@ uint16_t Architecture::getNumSystemRegisters() const { return static_cast(systemRegisterMap_.size()); } -int16_t Architecture::updateSystemTimerRegisters(RegisterFileSet* regFile, - const uint64_t iterations) const { +int16_t Architecture::updateSystemTimerRegisters( + RegisterFileSet* regFile, const uint64_t iterations) const { int16_t interruptId = -1; - if (htif) - { + if (htif) { interruptId = htif->updateSystemTimerRegisters(regFile, iterations); - if (interruptId>=0) - return interruptId; + if (interruptId >= 0) return interruptId; } if (clint) @@ -367,23 +368,27 @@ int16_t Architecture::updateSystemTimerRegisters(RegisterFileSet* regFile, return interruptId; } -void Architecture::updateInstrTrace(const std::shared_ptr& instruction, - RegisterFileSet* regFile, uint64_t tick) const { - if(traceOn_) { +void Architecture::updateInstrTrace( + const std::shared_ptr& instruction, + RegisterFileSet* regFile, uint64_t tick) const { + if (traceOn_) { Instruction instr_ = *static_cast(instruction.get()); auto& metadata = instr_.getMetadata(); std::stringstream s; s << "0x" << std::hex << instr_.getInstructionAddress() << " "; if (tick < 100000000) - s << "t(" << std::setfill('0') << std::setw(8) << std::dec << (uint32_t)tick << ") "; + s << "t(" << std::setfill('0') << std::setw(8) << std::dec + << (uint32_t)tick << ") "; else - s << "t(" << std::setfill('0') << std::setw(16) << std::dec << (uint32_t)tick << ") "; + s << "t(" << std::setfill('0') << std::setw(16) << std::dec + << (uint32_t)tick << ") "; s << "("; - if(metadata.len == IL_16B) { + if (metadata.len == IL_16B) { s << "0000"; } - for(int8_t i=metadata.lenBytes; i>0; i--) { - s << std::hex << std::setfill('0') << std::setw(2) << static_cast(metadata.encoding[i-1]); + for (int8_t i = metadata.lenBytes; i > 0; i--) { + s << std::hex << std::setfill('0') << std::setw(2) + << static_cast(metadata.encoding[i - 1]); } s << ") "; s << metadata.mnemonic << " " << metadata.operandStr; @@ -391,21 +396,25 @@ void Architecture::updateInstrTrace(const std::shared_ptr& auto destinations = instr_.getDestinationRegisters(); int8_t num_src = (int8_t)sources.size(); int8_t num_dest = (int8_t)destinations.size(); - if((num_src + num_dest) >0) { + if ((num_src + num_dest) > 0) { s << " "; if (num_dest > 0) { s << "(d: "; - for(int8_t i=0;iget(reg).get(); - if(i < (num_dest-1)) { + s << std::hex << std::setfill('0') << std::setw(8) + << regFile->get(reg).get(); + if (i < (num_dest - 1)) { s << " "; } } @@ -413,17 +422,21 @@ void Architecture::updateInstrTrace(const std::shared_ptr& } if (num_src > 0) { s << "(s: "; - for(int8_t i=0;iget(reg).get(); - if(i < (num_src-1)) { + s << std::hex << std::setfill('0') << std::setw(8) + << regFile->get(reg).get(); + if (i < (num_src - 1)) { s << " "; } } @@ -432,7 +445,8 @@ void Architecture::updateInstrTrace(const std::shared_ptr& } s << std::endl; *traceFile_ << s.str(); - traceFile_->flush(); //Helps with debugging sometimes as all the state of previous committed instr is written to file. + traceFile_->flush(); // Helps with debugging sometimes as all the state of + // previous committed instr is written to file. } } archConstants Architecture::getConstants() const { return constants_; } diff --git a/src/lib/arch/riscv/InstructionMetadata.cc b/src/lib/arch/riscv/InstructionMetadata.cc index d293bc7fdb..b929836b5c 100644 --- a/src/lib/arch/riscv/InstructionMetadata.cc +++ b/src/lib/arch/riscv/InstructionMetadata.cc @@ -38,7 +38,8 @@ InstructionMetadata::InstructionMetadata(const uint8_t* invalidEncoding, opcode(Opcode::RISCV_INSTRUCTION_LIST_END), implicitSourceCount(0), implicitDestinationCount(0), - operandCount(0), len(IL_INVALID) { + operandCount(0), + len(IL_INVALID) { assert(bytes <= sizeof(encoding)); std::memcpy(encoding, invalidEncoding, bytes); mnemonic[0] = '\0'; @@ -260,17 +261,17 @@ void InstructionMetadata::alterPseudoInstructions(const cs_insn& insn) { case Opcode::RISCV_CSRRWI: case Opcode::RISCV_CSRRSI: case Opcode::RISCV_CSRRCI: { - //Extract CSR info + // Extract CSR info csr = ((uint32_t)encoding[3] << 4) | ((uint32_t)encoding[2] >> 4); - //If there are less than 2 operands provided add necessary x0 operand - if(operandCount == 1) { - if((strcmp(mnemonic, "rdinstret") == 0) || - (strcmp(mnemonic, "rdcycle") == 0) || - (strcmp(mnemonic, "rdtime") == 0) || - (strcmp(mnemonic, "csrr") == 0)) { //csrrs rd,csr,x0 + // If there are less than 2 operands provided add necessary x0 operand + if (operandCount == 1) { + if ((strcmp(mnemonic, "rdinstret") == 0) || + (strcmp(mnemonic, "rdcycle") == 0) || + (strcmp(mnemonic, "rdtime") == 0) || + (strcmp(mnemonic, "csrr") == 0)) { // csrrs rd,csr,x0 operands[1].type = RISCV_OP_REG; operands[1].reg = 1; - } else { //csrrxx x0,csr,rs/imm + } else { // csrrxx x0,csr,rs/imm operands[1] = operands[0]; operands[0].type = RISCV_OP_REG; operands[0].reg = 1; @@ -305,13 +306,17 @@ void InstructionMetadata::includeZeroRegisterPosZero() { operandCount = 3; } - void InstructionMetadata::setLength(uint8_t size) { lenBytes = size; - switch(size) { - case 2: len = IL_16B; break; - case 4: len = IL_32B; break; - default: len = IL_INVALID; + switch (size) { + case 2: + len = IL_16B; + break; + case 4: + len = IL_32B; + break; + default: + len = IL_INVALID; } } diff --git a/src/lib/arch/riscv/InstructionMetadata.hh b/src/lib/arch/riscv/InstructionMetadata.hh index 4ce164a346..796afc96c2 100644 --- a/src/lib/arch/riscv/InstructionMetadata.hh +++ b/src/lib/arch/riscv/InstructionMetadata.hh @@ -14,11 +14,7 @@ namespace Opcode { #include "RISCVGenInstrInfo.inc" } // namespace Opcode -enum INSTR_LENGTH { - IL_16B, - IL_32B, - IL_INVALID -}; +enum INSTR_LENGTH { IL_16B, IL_32B, IL_INVALID }; /** A simplified RISC-V-only version of the Capstone instruction structure. */ struct InstructionMetadata { diff --git a/src/lib/kernel/Linux.cc b/src/lib/kernel/Linux.cc index bc060bbae4..424395c0f8 100644 --- a/src/lib/kernel/Linux.cc +++ b/src/lib/kernel/Linux.cc @@ -23,14 +23,15 @@ namespace kernel { void Linux::createProcess(const LinuxProcess& process) { assert(process.isValid() && "Attempted to use an invalid process"); assert(processStates_.size() == 0 && "Multiple processes not yet supported"); - processStates_.push_back({.pid = 0, // TODO: create unique PIDs - .path = process.getPath(), - .startBrk = process.getHeapStart(), - .currentBrk = process.getHeapStart(), - .initialStackPointer = process.getStackPointer(), - .mmapRegion = process.getMmapStart(), - .pageSize = process.getPageSize(), - }); + processStates_.push_back({ + .pid = 0, // TODO: create unique PIDs + .path = process.getPath(), + .startBrk = process.getHeapStart(), + .currentBrk = process.getHeapStart(), + .initialStackPointer = process.getStackPointer(), + .mmapRegion = process.getMmapStart(), + .pageSize = process.getPageSize(), + }); processStates_.back().fileDescriptorTable.push_back(STDIN_FILENO); processStates_.back().fileDescriptorTable.push_back(STDOUT_FILENO); processStates_.back().fileDescriptorTable.push_back(STDERR_FILENO); @@ -652,9 +653,8 @@ int64_t Linux::writev(int64_t fd, const void* iovdata, int iovcnt) { } /** Lookup symbol value from table in elf file. */ -bool Linux::lookupSymbolValue(const std::string symbol, uint64_t& value) -{ - processStates_[0].process->lookupSymbolValue(symbol,value); +bool Linux::lookupSymbolValue(const std::string symbol, uint64_t& value) { + processStates_[0].process->lookupSymbolValue(symbol, value); } } // namespace kernel diff --git a/src/lib/models/emulation/Core.cc b/src/lib/models/emulation/Core.cc index d9268da25f..a779ef7521 100644 --- a/src/lib/models/emulation/Core.cc +++ b/src/lib/models/emulation/Core.cc @@ -150,15 +150,15 @@ void Core::tick() { } void Core::execute(std::shared_ptr& uop) { - - if (interruptId_>=0) + if (interruptId_ >= 0) uop->raiseInterrupt(interruptId_); else uop->execute(); if (uop->exceptionEncountered()) { instructionsExecuted_++; - isa_.updateInstrTrace(uop, ®isterFileSet_, ticks_); // Handle ECALL into trace here + isa_.updateInstrTrace(uop, ®isterFileSet_, + ticks_); // Handle ECALL into trace here handleException(uop); return; } @@ -192,14 +192,19 @@ void Core::execute(std::shared_ptr& uop) { if (uop->isLastMicroOp()) { instructionsExecuted_++; - // TODO: This is architecture-specific. It's here for the reference and should(will) be refactored later - uint16_t sysreg_instrret = isa_.getSystemRegisterTag(arch::riscv::riscv_sysreg::SYSREG_INSTRRET); - uint16_t sysreg_cycle = isa_.getSystemRegisterTag(arch::riscv::riscv_sysreg::SYSREG_CYCLE); + // TODO: This is architecture-specific. It's here for the reference and + // should(will) be refactored later + uint16_t sysreg_instrret = + isa_.getSystemRegisterTag(arch::riscv::riscv_sysreg::SYSREG_INSTRRET); + uint16_t sysreg_cycle = + isa_.getSystemRegisterTag(arch::riscv::riscv_sysreg::SYSREG_CYCLE); // NOTE: 64-bit system registers are not implemented yet - //TODO: Maybe make use of byteLength and remove is32BitMode() function? + // TODO: Maybe make use of byteLength and remove is32BitMode() function? if (isa_.is32BitMode()) { - registerFileSet_.set(Register{0x2, sysreg_instrret}, RegisterValue(instructionsExecuted_, 4)); - registerFileSet_.set(Register{0x2, sysreg_cycle}, RegisterValue(ticks_, 4)); + registerFileSet_.set(Register{0x2, sysreg_instrret}, + RegisterValue(instructionsExecuted_, 4)); + registerFileSet_.set(Register{0x2, sysreg_cycle}, + RegisterValue(ticks_, 4)); } isa_.updateInstrTrace(uop, ®isterFileSet_, ticks_); } diff --git a/src/lib/pipeline_hi/FetchUnit.cc b/src/lib/pipeline_hi/FetchUnit.cc index 4de190efca..56f6526e7b 100644 --- a/src/lib/pipeline_hi/FetchUnit.cc +++ b/src/lib/pipeline_hi/FetchUnit.cc @@ -109,12 +109,14 @@ void FetchUnit::tick() { // Check we have enough data to begin decoding if (bufferedBytes_ == isa_.getMinInstructionSize()) { - //Check if those bytes points to a instruction with minimum size or more data is required. If more data is required return - // TODO: this is not generic solution, just trying to make it work + // Check if those bytes points to a instruction with minimum size or more + // data is required. If more data is required return + // TODO: this is not generic solution, just trying to make it work uint16_t rawBits; memcpy(&rawBits, buffer + bufferOffset, 2); - if((rawBits & 0x3) == 0x3) { - //std::cout << std::hex << "Only 2 bytes left in fetch buffer and not compresses instr type, current PC: 0x" << pc_ << std::endl; + if ((rawBits & 0x3) == 0x3) { + // std::cout << std::hex << "Only 2 bytes left in fetch buffer and not + // compresses instr type, current PC: 0x" << pc_ << std::endl; return; } } @@ -151,7 +153,8 @@ void FetchUnit::tick() { // {encoding, bytesRead, pc_, macroOp[0]->getBranchPrediction()}); // if (pc_ == loopBoundaryAddress_) { - // // loopBoundaryAddress_ has been fetched whilst filling the loop buffer. + // // loopBoundaryAddress_ has been fetched whilst filling the loop + // buffer. // // Stop filling as loop body has been recorded and begin to supply // // decode unit with instructions from the loop buffer // loopBufferState_ = LoopBufferState::SUPPLYING; @@ -177,8 +180,10 @@ void FetchUnit::tick() { // Predicted as taken; set PC to predicted target address pc_ = prediction.target; } -// std::cout << std::hex << "PC: 0x" << pc_ << ", PBL: 0x" << programByteLength_ << std::endl; - if (pc_ == 0 && (macroOp[0]->getBranchType() == BranchType::SubroutineCall)) { + // std::cout << std::hex << "PC: 0x" << pc_ << ", PBL: 0x" << + // programByteLength_ << std::endl; + if (pc_ == 0 && + (macroOp[0]->getBranchType() == BranchType::SubroutineCall)) { waitSCEval_ = true; break; } diff --git a/src/lib/pipeline_hi/RegDepMap.cc b/src/lib/pipeline_hi/RegDepMap.cc index 4ab004bfdb..45a5de7f60 100644 --- a/src/lib/pipeline_hi/RegDepMap.cc +++ b/src/lib/pipeline_hi/RegDepMap.cc @@ -6,7 +6,9 @@ #ifdef RDMDEBUG #define DEBUG(x) std::cout << "Core: " << std::hex << x << std::endl; #else -#define DEBUG(x) do { } while (false); +#define DEBUG(x) \ + do { \ + } while (false); #endif namespace simeng { @@ -14,48 +16,50 @@ namespace pipeline_hi { const Register l_ZERO_REGISTER = {0, 0}; -RegDepMap::RegDepMap(const std::vector registerFileStructures, - const RegisterFileSet& registerFileSet) : - registerFileStructures_(registerFileStructures), - registerFileSet_(registerFileSet) { - regMap_.resize(registerFileStructures_.size());//Just for Integer Register File for now - for (size_t type=0; type registerFileStructures, + const RegisterFileSet& registerFileSet) + : registerFileStructures_(registerFileStructures), + registerFileSet_(registerFileSet) { + regMap_.resize(registerFileStructures_ + .size()); // Just for Integer Register File for now + for (size_t type = 0; type < registerFileStructures_.size(); type++) { regMap_[type].resize(registerFileStructures_.at(type).quantity); } } -RegDepMap::~RegDepMap() -{ +RegDepMap::~RegDepMap() { for (unsigned i = 0; i < regMap_.size(); i++) { - for (unsigned j = 0; j < regMap_[i].size(); j++) - regMap_[i][j].clear(); + for (unsigned j = 0; j < regMap_[i].size(); j++) regMap_[i][j].clear(); regMap_[i].clear(); } regMap_.clear(); } -void RegDepMap::insert(InstrPtr instr) -{ - //TODO: IRF X0 is not a dependency! +void RegDepMap::insert(InstrPtr instr) { + // TODO: IRF X0 is not a dependency! auto& destinationRegisters = instr->getDestinationRegisters(); - for(const auto& reg: destinationRegisters) { - if(reg != l_ZERO_REGISTER) { //Not X0 + for (const auto& reg : destinationRegisters) { + if (reg != l_ZERO_REGISTER) { // Not X0 outstandingDep_++; - DEBUG("Adding Depencency: addr, 0x" << instr->getInstructionAddress() << std::dec << ", dest: " << reg << ", outstanding: " << outstandingDep_); + DEBUG("Adding Depencency: addr, 0x" + << instr->getInstructionAddress() << std::dec << ", dest: " << reg + << ", outstanding: " << outstandingDep_); regMap_[reg.type][reg.tag].push_back(instr); } } } -void RegDepMap::remove(InstrPtr instr) -{ +void RegDepMap::remove(InstrPtr instr) { auto& destinationRegisters = instr->getDestinationRegisters(); - for(const auto& reg: destinationRegisters) { + for (const auto& reg : destinationRegisters) { auto it = regMap_[reg.type][reg.tag].begin(); while (it != regMap_[reg.type][reg.tag].end()) { - if(*it == instr) { + if (*it == instr) { outstandingDep_--; - DEBUG("Removing Depencency: addr, 0x" << instr->getInstructionAddress() << std::dec << ", dest: " << reg << ", outstanding: " << outstandingDep_); + DEBUG("Removing Depencency: addr, 0x" + << instr->getInstructionAddress() << std::dec << ", dest: " << reg + << ", outstanding: " << outstandingDep_); it = regMap_[reg.type][reg.tag].erase(it); break; } else { @@ -65,8 +69,7 @@ void RegDepMap::remove(InstrPtr instr) } } -bool RegDepMap::canRead(InstrPtr instr) -{ +bool RegDepMap::canRead(InstrPtr instr) { bool dependency = false; auto& sourceRegisters = instr->getOperandRegisters(); for (uint16_t i = 0; i < sourceRegisters.size(); i++) { @@ -74,13 +77,19 @@ bool RegDepMap::canRead(InstrPtr instr) if (!instr->isOperandReady(i)) { // The operand hasn't already been supplied - if (regMap_[srcReg.type][srcReg.tag].size() == 0) {//pick up value from register file - instr->supplyOperand(i, registerFileSet_.get(srcReg)); + if (regMap_[srcReg.type][srcReg.tag].size() == + 0) { // pick up value from register file + instr->supplyOperand(i, registerFileSet_.get(srcReg)); } else if (regMap_[srcReg.type][srcReg.tag].back()->hasExecuted() && - !(regMap_[srcReg.type][srcReg.tag].back()->isMul() || regMap_[srcReg.type][srcReg.tag].back()->isDiv() || - (regMap_[srcReg.type][srcReg.tag].back()->isLoad() && !instr->isStoreData()))) {//pick up value from last executed instruction - const auto& destRegisters = regMap_[srcReg.type][srcReg.tag].back()->getDestinationRegisters(); - const auto& destValues = regMap_[srcReg.type][srcReg.tag].back()->getResults(); + !(regMap_[srcReg.type][srcReg.tag].back()->isMul() || + regMap_[srcReg.type][srcReg.tag].back()->isDiv() || + (regMap_[srcReg.type][srcReg.tag].back()->isLoad() && + !instr->isStoreData()))) { // pick up value from last + // executed instruction + const auto& destRegisters = + regMap_[srcReg.type][srcReg.tag].back()->getDestinationRegisters(); + const auto& destValues = + regMap_[srcReg.type][srcReg.tag].back()->getResults(); for (size_t j = 0; j < destRegisters.size(); j++) { const auto& destReg = destRegisters[j]; if (destReg == srcReg) { @@ -97,33 +106,30 @@ bool RegDepMap::canRead(InstrPtr instr) return !dependency; } -bool RegDepMap::canWrite(InstrPtr instr) -{ +bool RegDepMap::canWrite(InstrPtr instr) { bool dependency = false; auto& destRegisters = instr->getDestinationRegisters(); - for(uint16_t i = 0; i < destRegisters.size(); i++) { - const auto& destReg = destRegisters[i]; - if (regMap_[destReg.type][destReg.tag].size() > 0 && - !regMap_[destReg.type][destReg.tag].back()->hasExecuted()) { - dependency = true; - break; - } + for (uint16_t i = 0; i < destRegisters.size(); i++) { + const auto& destReg = destRegisters[i]; + if (regMap_[destReg.type][destReg.tag].size() > 0 && + !regMap_[destReg.type][destReg.tag].back()->hasExecuted()) { + dependency = true; + break; + } } return !dependency || (instr->isLoad()); } -//Clean up the options logic to ensure all of them work well together -bool RegDepMap::canForward(InstrPtr instr) -{ - return true; -} +// Clean up the options logic to ensure all of them work well together +bool RegDepMap::canForward(InstrPtr instr) { return true; } void RegDepMap::purgeFlushed() { for (auto& registerType : regMap_) { for (auto& dependencyList : registerType) { auto it = dependencyList.begin(); while (it != dependencyList.end()) { - DEBUG("Purge entry present at addr: 0x" << (*it)->getInstructionAddress()); + DEBUG("Purge entry present at addr: 0x" + << (*it)->getInstructionAddress()); if ((*it)->isFlushed()) { outstandingDep_--; it = dependencyList.erase(it); @@ -135,9 +141,7 @@ void RegDepMap::purgeFlushed() { } } -void RegDepMap::dump() -{ -} +void RegDepMap::dump() {} } // namespace pipeline_hi } // namespace simeng diff --git a/src/lib/pipeline_hi/WritebackUnit.cc b/src/lib/pipeline_hi/WritebackUnit.cc index b0dfd97161..dce0dd5e6a 100644 --- a/src/lib/pipeline_hi/WritebackUnit.cc +++ b/src/lib/pipeline_hi/WritebackUnit.cc @@ -55,8 +55,9 @@ uint64_t WritebackUnit::getInstructionsWrittenCount() const { std::vector> WritebackUnit::getInstsForTrace() { std::shared_ptr instr; - std::deque>::iterator it = committedInstsForTrace_.begin(); - while(it != committedInstsForTrace_.end()) { + std::deque>::iterator it = + committedInstsForTrace_.begin(); + while (it != committedInstsForTrace_.end()) { instr = *it; if (removeInstrOrderQ_(instr)) { committedInstsForTrace_.erase(it); @@ -64,10 +65,10 @@ std::vector> WritebackUnit::getInstsForTrace() { } it++; } - return {}; //committedInstsForTrace_; + return {}; // committedInstsForTrace_; } void WritebackUnit::traceFinished() { - //committedInstsForTrace_.clear(); + // committedInstsForTrace_.clear(); } } // namespace pipeline_hi diff --git a/sst/SimEngMemInterface.cc b/sst/SimEngMemInterface.cc index 678d985329..d01d7d216e 100644 --- a/sst/SimEngMemInterface.cc +++ b/sst/SimEngMemInterface.cc @@ -18,7 +18,8 @@ SimEngMemInterface::SimEngMemInterface(StandardMem* mem, uint64_t cl, this->debug_ = debug; }; -void SimEngMemInterface::sendProcessImageToSST(char* image, uint64_t size, uint64_t startAddr) { +void SimEngMemInterface::sendProcessImageToSST(char* image, uint64_t size, + uint64_t startAddr) { std::vector data; data.reserve(size); @@ -26,8 +27,12 @@ void SimEngMemInterface::sendProcessImageToSST(char* image, uint64_t size, uint6 data.push_back((uint8_t)image[i]); } - StandardMem::Request* req = new StandardMem::Write(startAddr, data.size(), data); - std::cout << std::hex << "[SSTSimEng:SimEngMemInterface] Sending image section to SST Memory at address 0x" << startAddr << ", size 0x" << data.size() << std::endl; + StandardMem::Request* req = + new StandardMem::Write(startAddr, data.size(), data); + std::cout << std::hex + << "[SSTSimEng:SimEngMemInterface] Sending image section to SST " + "Memory at address 0x" + << startAddr << ", size 0x" << data.size() << std::endl; sstMem_->sendUntimedData(req); return; }; @@ -177,7 +182,8 @@ void SimEngMemInterface::requestRead(const MemoryAccessTarget& target, if (debug_) { std::cout << "[SSTSimEng:SSTDebug] MemRead" << "-read-request-" << requestId << "-cycle-" << tickCounter_ - << "-split-" << requests.size() << "-addr-0x" << std::hex << addrStart << std::endl; + << "-split-" << requests.size() << "-addr-0x" << std::hex + << addrStart << std::endl; } for (StandardMem::Request* req : requests) { sstMem_->send(req); @@ -195,8 +201,9 @@ void SimEngMemInterface::requestWrite(const MemoryAccessTarget& target, makeSSTRequests(aggrReq, addrStart, addrEnd, size); if (debug_) { std::cout << "[SSTSimEng:SSTDebug] MemWrite" - << "-write-request-xx" << "-cycle-" << tickCounter_ - << "-split-" << requests.size() << "-addr-0x" << std::hex << addrStart << std::endl; + << "-write-request-xx" + << "-cycle-" << tickCounter_ << "-split-" << requests.size() + << "-addr-0x" << std::hex << addrStart << std::endl; } for (StandardMem::Request* req : requests) { sstMem_->send(req); diff --git a/sst/include/SimEngMemInterface.hh b/sst/include/SimEngMemInterface.hh index 463d0dc9d5..5a55ec5e10 100644 --- a/sst/include/SimEngMemInterface.hh +++ b/sst/include/SimEngMemInterface.hh @@ -33,7 +33,8 @@ class SimEngMemInterface : public MemoryInterface { bool debug); /** Send SimEng's processImage to SST memory backend during `init` lifecycle * phase of SST. */ - void sendProcessImageToSST(char* image, uint64_t size, uint64_t startAddr=0); + void sendProcessImageToSST(char* image, uint64_t size, + uint64_t startAddr = 0); /** * Construct an AggregatedReadRequest and use it to generate