From e77c2064e774efd22273641cfcab4bf9ee58902a Mon Sep 17 00:00:00 2001 From: dANW34V3R Date: Tue, 3 Sep 2024 17:27:01 +0100 Subject: [PATCH 01/16] Initial fix for Wmaybe-uninitialized --- src/include/simeng/RegisterValue.hh | 10 +++++----- src/include/simeng/arch/aarch64/helpers/neon.hh | 11 +++++++++-- src/lib/RegisterValue.cc | 4 ++-- src/lib/arch/aarch64/ExceptionHandler.cc | 2 +- src/lib/arch/riscv/ExceptionHandler.cc | 2 +- 5 files changed, 18 insertions(+), 11 deletions(-) diff --git a/src/include/simeng/RegisterValue.hh b/src/include/simeng/RegisterValue.hh index 4faadb301d..6dcb3d7cfe 100644 --- a/src/include/simeng/RegisterValue.hh +++ b/src/include/simeng/RegisterValue.hh @@ -28,12 +28,12 @@ class RegisterValue { typename std::enable_if_t, T>* = nullptr> RegisterValue(T value, uint16_t bytes = sizeof(T)) : bytes(bytes) { if (isLocal()) { - T* view = reinterpret_cast(this->value); + T* view = reinterpret_cast(this->localValue); view[0] = value; if (bytes > sizeof(T)) { // Zero the remaining bytes not set by the provided value - std::fill(this->value + sizeof(T), this->value + bytes, + std::fill(this->localValue + sizeof(T), this->localValue + bytes, 0); } } else { @@ -57,7 +57,7 @@ class RegisterValue { assert(capacity >= bytes && "Capacity is less than requested bytes"); char* dest; if (isLocal()) { - dest = this->value; + dest = this->localValue; } else { dest = static_cast(pool.allocate(capacity)); std::memset(dest, 0, capacity); @@ -96,7 +96,7 @@ class RegisterValue { "Attempted to access a RegisterValue as a datatype larger than the " "data held"); if (isLocal()) { - return reinterpret_cast(value); + return reinterpret_cast(localValue); } else { return reinterpret_cast(ptr.get()); } @@ -128,7 +128,7 @@ class RegisterValue { /** The underlying local member value. Aligned to 8 bytes to prevent * potential alignment issue when casting. */ - alignas(8) char value[MAX_LOCAL_BYTES]; + alignas(8) char localValue[MAX_LOCAL_BYTES] = {}; }; inline bool operator==(const RegisterValue& lhs, const RegisterValue& rhs) { diff --git a/src/include/simeng/arch/aarch64/helpers/neon.hh b/src/include/simeng/arch/aarch64/helpers/neon.hh index cc9aa03461..7ee2ac1c6c 100644 --- a/src/include/simeng/arch/aarch64/helpers/neon.hh +++ b/src/include/simeng/arch/aarch64/helpers/neon.hh @@ -795,14 +795,21 @@ RegisterValue vecSumElems_2ops(srcValContainer& sourceValues) { template RegisterValue vecXtn(srcValContainer& sourceValues, bool isXtn2) { const D* d; - if (isXtn2) d = sourceValues[0].getAsVector(); - const N* n = sourceValues[isXtn2 ? 1 : 0].getAsVector(); + const N* n; + if (isXtn2) { + d = sourceValues[0].getAsVector(); + n = sourceValues[1].getAsVector(); + } else { + d = {}; + n = sourceValues[0].getAsVector(); + } D out[16 / sizeof(D)] = {0}; int index = 0; for (int i = 0; i < I; i++) { if (isXtn2 & (i < (I / 2))) { + assert(isXtn2 && "isXtn2 is false so d is not initialised"); out[i] = d[i]; } else { out[i] = static_cast(n[index]); diff --git a/src/lib/RegisterValue.cc b/src/lib/RegisterValue.cc index ea27a2a5f3..adddeb375e 100644 --- a/src/lib/RegisterValue.cc +++ b/src/lib/RegisterValue.cc @@ -19,8 +19,8 @@ RegisterValue RegisterValue::zeroExtend(uint16_t fromBytes, auto extended = RegisterValue(0, toBytes); // Get the appropriate source/destination pointers and copy the data - const char* src = (isLocal() ? value : ptr.get()); - char* dest = (extended.isLocal() ? extended.value : extended.ptr.get()); + const char* src = (isLocal() ? localValue : ptr.get()); + char* dest = (extended.isLocal() ? extended.localValue : extended.ptr.get()); std::memcpy(dest, src, fromBytes); diff --git a/src/lib/arch/aarch64/ExceptionHandler.cc b/src/lib/arch/aarch64/ExceptionHandler.cc index ae98dddb1a..505f605ace 100644 --- a/src/lib/arch/aarch64/ExceptionHandler.cc +++ b/src/lib/arch/aarch64/ExceptionHandler.cc @@ -329,7 +329,7 @@ bool ExceptionHandler::init() { return readStringThen(filename, filenamePtr, kernel::Linux::LINUX_PATH_MAX, [=](auto length) { // Invoke the kernel - kernel::stat statOut; + kernel::stat statOut = {}; uint64_t retval = linux_.newfstatat( dfd, filename, statOut, flag); ProcessStateChange stateChange = { diff --git a/src/lib/arch/riscv/ExceptionHandler.cc b/src/lib/arch/riscv/ExceptionHandler.cc index 15a5518c64..423c7d1435 100644 --- a/src/lib/arch/riscv/ExceptionHandler.cc +++ b/src/lib/arch/riscv/ExceptionHandler.cc @@ -327,7 +327,7 @@ bool ExceptionHandler::init() { return readStringThen(filename, filenamePtr, kernel::Linux::LINUX_PATH_MAX, [=](auto length) { // Invoke the kernel - kernel::stat statOut; + kernel::stat statOut = {}; uint64_t retval = linux_.newfstatat( dfd, filename, statOut, flag); ProcessStateChange stateChange = { From 7292aaf9edcc6ad7957a1ef649f38f3fd4885257 Mon Sep 17 00:00:00 2001 From: dANW34V3R Date: Mon, 23 Sep 2024 18:04:36 +0100 Subject: [PATCH 02/16] Force clangformat --- src/include/simeng/RegisterValue.hh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/include/simeng/RegisterValue.hh b/src/include/simeng/RegisterValue.hh index 6dcb3d7cfe..5590427af7 100644 --- a/src/include/simeng/RegisterValue.hh +++ b/src/include/simeng/RegisterValue.hh @@ -33,8 +33,8 @@ class RegisterValue { if (bytes > sizeof(T)) { // Zero the remaining bytes not set by the provided value - std::fill(this->localValue + sizeof(T), this->localValue + bytes, - 0); + std::fill(this->localValue + sizeof(T), + this->localValue + bytes, 0); } } else { void* data = pool.allocate(bytes); From 7ec30b04ce38043cd185d027d5a37c718a882d39 Mon Sep 17 00:00:00 2001 From: dANW34V3R Date: Mon, 23 Sep 2024 19:12:31 +0100 Subject: [PATCH 03/16] Hacky fix for failing test --- test/unit/RegisterValueTest.cc | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/test/unit/RegisterValueTest.cc b/test/unit/RegisterValueTest.cc index c365ff8f7e..b80ccbe593 100644 --- a/test/unit/RegisterValueTest.cc +++ b/test/unit/RegisterValueTest.cc @@ -40,7 +40,10 @@ TEST(RegisterValueTest, Reinterpret) { TEST(RegisterValueTest, Vector) { uint64_t value = 0x0000000200000001; auto registerValue = simeng::RegisterValue(value, 8); - auto vector = registerValue.getAsVector(); + const uint32_t* vector = registerValue.getAsVector(); + // Print vector's address to prevent it from being optimised away causing the + // test to fail + std::cout << vector << std::endl; EXPECT_EQ(vector[0], 1); EXPECT_EQ(vector[1], 2); } From 9fe3e21d8dc73dab2ff5bb465341b8406b20aa75 Mon Sep 17 00:00:00 2001 From: dANW34V3R Date: Thu, 26 Sep 2024 11:55:47 +0100 Subject: [PATCH 04/16] Add no strict ailiasing flag --- CMakeLists.txt | 2 +- test/unit/RegisterValueTest.cc | 3 --- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index b8f4379b98..6e4b74e9a8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -66,7 +66,7 @@ set(CMAKE_MACOSX_RPATH 1) set(CMAKE_POSITION_INDEPENDENT_CODE ON) # Create variable to enable additional compiler warnings for SimEng targets only -set(SIMENG_COMPILE_OPTIONS -Wall -pedantic -Werror) #-Wextra +set(SIMENG_COMPILE_OPTIONS -Wall -pedantic -Werror -fno-strict-aliasing) #-Wextra # Disable RTTI for all targets add_compile_options($<$:-fno-rtti>) diff --git a/test/unit/RegisterValueTest.cc b/test/unit/RegisterValueTest.cc index b80ccbe593..034d6d8766 100644 --- a/test/unit/RegisterValueTest.cc +++ b/test/unit/RegisterValueTest.cc @@ -41,9 +41,6 @@ TEST(RegisterValueTest, Vector) { uint64_t value = 0x0000000200000001; auto registerValue = simeng::RegisterValue(value, 8); const uint32_t* vector = registerValue.getAsVector(); - // Print vector's address to prevent it from being optimised away causing the - // test to fail - std::cout << vector << std::endl; EXPECT_EQ(vector[0], 1); EXPECT_EQ(vector[1], 2); } From 6a7fa2fcce4b56caba0921813ac3257f215b816f Mon Sep 17 00:00:00 2001 From: dANW34V3R Date: Thu, 24 Oct 2024 13:39:49 +0100 Subject: [PATCH 05/16] [TEMP] Half updated with wrapper class --- CMakeLists.txt | 2 +- src/include/simeng/RegisterValue.hh | 77 +++-- .../simeng/arch/aarch64/helpers/bitmanip.hh | 2 +- .../simeng/arch/aarch64/helpers/sve.hh | 2 +- .../memory/FixedLatencyMemoryInterface.hh | 4 +- .../simeng/memory/FlatMemoryInterface.hh | 4 +- src/lib/RegisterValue.cc | 7 +- src/lib/arch/aarch64/ExceptionHandler.cc | 22 +- src/lib/arch/aarch64/Instruction_address.cc | 74 ++--- src/lib/arch/aarch64/Instruction_decode.cc | 2 +- src/lib/arch/aarch64/Instruction_execute.cc | 298 +++++++++--------- src/lib/arch/riscv/ExceptionHandler.cc | 22 +- src/lib/arch/riscv/Instruction_decode.cc | 4 +- src/lib/memory/FixedLatencyMemoryInterface.cc | 6 +- src/lib/memory/FlatMemoryInterface.cc | 6 +- src/lib/models/emulation/Core.cc | 2 +- src/lib/pipeline/FetchUnit.cc | 2 +- sst/SimEngMemInterface.cc | 2 +- test/regression/RegressionTest.hh | 2 +- .../aarch64/AArch64RegressionTest.hh | 8 +- test/unit/RegisterValueTest.cc | 2 +- 21 files changed, 286 insertions(+), 264 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 6e4b74e9a8..b8f4379b98 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -66,7 +66,7 @@ set(CMAKE_MACOSX_RPATH 1) set(CMAKE_POSITION_INDEPENDENT_CODE ON) # Create variable to enable additional compiler warnings for SimEng targets only -set(SIMENG_COMPILE_OPTIONS -Wall -pedantic -Werror -fno-strict-aliasing) #-Wextra +set(SIMENG_COMPILE_OPTIONS -Wall -pedantic -Werror) #-Wextra # Disable RTTI for all targets add_compile_options($<$:-fno-rtti>) diff --git a/src/include/simeng/RegisterValue.hh b/src/include/simeng/RegisterValue.hh index 5590427af7..058705fc2e 100644 --- a/src/include/simeng/RegisterValue.hh +++ b/src/include/simeng/RegisterValue.hh @@ -10,11 +10,30 @@ namespace simeng { +inline Pool pool = Pool(); + +template +struct safePointer { + // public: + // safePointer(const char* ptr) : ptr(ptr) {} + + T operator[](const int i) const { + T output; + memcpy(&output, ptr + (i * sizeof(T)), sizeof(T)); + return output; + } + + // private: + const uint8_t* ptr; +}; + /** Global memory pool used by RegisterValue class. */ extern Pool pool; -/** A class that holds an arbitrary region of immutable data, providing casting - * and data accessor functions. For values smaller than or equal to + +// TODO the data is NOT immutable as per AArch64_LD1_MXIPXX_V_D. We should change the class to enforce immutability or concede this functionality +/** A class that holds an arbitrary region of immutable data, providing + * casting and data accessor functions. For values smaller than or equal to * `MAX_LOCAL_BYTES`, this data is held in a local value, otherwise memory is * allocated and the data is stored there. */ class RegisterValue { @@ -28,14 +47,16 @@ class RegisterValue { typename std::enable_if_t, T>* = nullptr> RegisterValue(T value, uint16_t bytes = sizeof(T)) : bytes(bytes) { if (isLocal()) { - T* view = reinterpret_cast(this->localValue); - view[0] = value; + // T* view = reinterpret_cast(this->localValue); + // view[0] = value; + + memcpy(this->localValue, &value, bytes); - if (bytes > sizeof(T)) { - // Zero the remaining bytes not set by the provided value - std::fill(this->localValue + sizeof(T), - this->localValue + bytes, 0); - } + // if (bytes > sizeof(T)) { + // // Zero the remaining bytes not set by the provided value + // std::fill(this->localValue + sizeof(T), + // this->localValue + bytes, 0); + // } } else { void* data = pool.allocate(bytes); std::memset(data, 0, bytes); @@ -43,8 +64,8 @@ class RegisterValue { T* view = reinterpret_cast(data); view[0] = value; - this->ptr = std::shared_ptr( - static_cast(data), + this->ptr = std::shared_ptr( + static_cast(data), [bytes](void* ptr) { pool.deallocate(ptr, bytes); }); } } @@ -52,16 +73,16 @@ class RegisterValue { /** Create a new RegisterValue of size `capacity`, copying `bytes` * from `ptr`. */ - RegisterValue(const char* ptr, uint16_t bytes, uint16_t capacity) + RegisterValue(const uint8_t* ptr, uint16_t bytes, uint16_t capacity) : bytes(capacity) { assert(capacity >= bytes && "Capacity is less than requested bytes"); - char* dest; + uint8_t* dest; if (isLocal()) { dest = this->localValue; } else { - dest = static_cast(pool.allocate(capacity)); + dest = static_cast(pool.allocate(capacity)); std::memset(dest, 0, capacity); - this->ptr = std::shared_ptr( + this->ptr = std::shared_ptr( dest, [capacity](void* ptr) { pool.deallocate(ptr, capacity); }); } assert(dest && "Attempted to dereference a NULL pointer"); @@ -69,36 +90,38 @@ class RegisterValue { } /** Create a new RegisterValue of size `bytes`, copying data from `ptr`. */ - RegisterValue(const char* ptr, uint16_t bytes) + RegisterValue(const uint8_t* ptr, uint16_t bytes) : RegisterValue(ptr, bytes, bytes) {} /** Create a new RegisterValue by copying bytes from a fixed-size array. The - * resultant RegisterValue will have size `C` (defaulting to the no. of bytes - * in the array). + * resultant RegisterValue will have size `C` (defaulting to the no. of + * bytes in the array). */ template RegisterValue(T (&array)[N], size_t C = N * sizeof(T)) - : RegisterValue(reinterpret_cast(array), sizeof(T) * N, C) {} + : RegisterValue(reinterpret_cast(array), sizeof(T) * N, + C) {} /** Read the encapsulated raw memory as a specified datatype. */ template T get() const { - return *getAsVector(); + return getAsVector()[0]; } /** Retrieve a pointer to the encapsulated raw memory, reinterpreted as * the specified datatype. */ template - const T* getAsVector() const { + safePointer getAsVector() const { static_assert(alignof(T) <= 8 && "Alignment over 8 bytes not guaranteed"); assert(bytes > 0 && "Attempted to access an uninitialised RegisterValue"); assert(sizeof(T) <= bytes && "Attempted to access a RegisterValue as a datatype larger than the " "data held"); if (isLocal()) { - return reinterpret_cast(localValue); + return safePointer{this->localValue}; + // return reinterpret_cast(localValue); } else { - return reinterpret_cast(ptr.get()); + return safePointer{ptr.get()}; } } @@ -124,17 +147,17 @@ class RegisterValue { uint16_t bytes = 0; /** The underlying pointer each instance references. */ - std::shared_ptr ptr; + std::shared_ptr ptr; /** The underlying local member value. Aligned to 8 bytes to prevent * potential alignment issue when casting. */ - alignas(8) char localValue[MAX_LOCAL_BYTES] = {}; + alignas(8) uint8_t localValue[MAX_LOCAL_BYTES] = {}; }; inline bool operator==(const RegisterValue& lhs, const RegisterValue& rhs) { if (lhs.size() == rhs.size()) { - auto lhV = lhs.getAsVector(); - auto rhV = rhs.getAsVector(); + auto lhV = lhs.getAsVector(); + auto rhV = rhs.getAsVector(); for (size_t i = 0; i < lhs.size(); i++) { if (lhV[i] != rhV[i]) return false; } diff --git a/src/include/simeng/arch/aarch64/helpers/bitmanip.hh b/src/include/simeng/arch/aarch64/helpers/bitmanip.hh index 0f5de798dc..3255df1d7b 100644 --- a/src/include/simeng/arch/aarch64/helpers/bitmanip.hh +++ b/src/include/simeng/arch/aarch64/helpers/bitmanip.hh @@ -71,7 +71,7 @@ std::array rev(srcValContainer& sourceValues) { auto bytes = sourceValues[0].getAsVector(); std::array reversed; // Copy `bytes` backwards onto `reversed` - std::copy(bytes, bytes + sizeof(T), std::rbegin(reversed)); + std::copy(bytes.ptr, bytes.ptr + sizeof(T), std::rbegin(reversed)); return reversed; } diff --git a/src/include/simeng/arch/aarch64/helpers/sve.hh b/src/include/simeng/arch/aarch64/helpers/sve.hh index 2c33ccfbe6..0711fafc90 100644 --- a/src/include/simeng/arch/aarch64/helpers/sve.hh +++ b/src/include/simeng/arch/aarch64/helpers/sve.hh @@ -1325,7 +1325,7 @@ std::array svePtrue( * Returns an array of 4 uint64_t elements. */ std::array svePunpk(srcValContainer& sourceValues, const uint16_t VL_bits, bool isHi) { - const uint64_t* n = sourceValues[0].getAsVector(); + const auto n = sourceValues[0].getAsVector(); const uint16_t partition_num = VL_bits / 8; std::array out = {0, 0, 0, 0}; diff --git a/src/include/simeng/memory/FixedLatencyMemoryInterface.hh b/src/include/simeng/memory/FixedLatencyMemoryInterface.hh index 44cbf7adcc..822b855ad7 100644 --- a/src/include/simeng/memory/FixedLatencyMemoryInterface.hh +++ b/src/include/simeng/memory/FixedLatencyMemoryInterface.hh @@ -41,7 +41,7 @@ struct FixedLatencyMemoryInterfaceRequest { /** A memory interface where all requests respond with a fixed latency. */ class FixedLatencyMemoryInterface : public MemoryInterface { public: - FixedLatencyMemoryInterface(char* memory, size_t size, uint16_t latency); + FixedLatencyMemoryInterface(uint8_t* memory, size_t size, uint16_t latency); /** Queue a read request from the supplied target location. * @@ -67,7 +67,7 @@ class FixedLatencyMemoryInterface : public MemoryInterface { private: /** The array representing the memory system to access. */ - char* memory_; + uint8_t* memory_; /** The size of accessible memory. */ size_t size_; /** A vector containing all completed read requests. */ diff --git a/src/include/simeng/memory/FlatMemoryInterface.hh b/src/include/simeng/memory/FlatMemoryInterface.hh index a1cb1ff8d4..1a44066e06 100644 --- a/src/include/simeng/memory/FlatMemoryInterface.hh +++ b/src/include/simeng/memory/FlatMemoryInterface.hh @@ -11,7 +11,7 @@ namespace memory { /** A memory interface to a flat memory system. */ class FlatMemoryInterface : public MemoryInterface { public: - FlatMemoryInterface(char* memory, size_t size); + FlatMemoryInterface(uint8_t* memory, size_t size); /** Request a read from the supplied target location. * @@ -37,7 +37,7 @@ class FlatMemoryInterface : public MemoryInterface { private: /** The array representing the flat memory system to access. */ - char* memory_; + uint8_t* memory_; /** The size of accessible memory. */ size_t size_; /** A vector containing all completed read requests. */ diff --git a/src/lib/RegisterValue.cc b/src/lib/RegisterValue.cc index adddeb375e..38b068ad54 100644 --- a/src/lib/RegisterValue.cc +++ b/src/lib/RegisterValue.cc @@ -4,7 +4,7 @@ namespace simeng { -Pool pool = Pool(); +// Pool pool = Pool(); RegisterValue::RegisterValue() : bytes(0) {} @@ -19,8 +19,9 @@ RegisterValue RegisterValue::zeroExtend(uint16_t fromBytes, auto extended = RegisterValue(0, toBytes); // Get the appropriate source/destination pointers and copy the data - const char* src = (isLocal() ? localValue : ptr.get()); - char* dest = (extended.isLocal() ? extended.localValue : extended.ptr.get()); + const uint8_t* src = (isLocal() ? localValue : ptr.get()); + uint8_t* dest = + (extended.isLocal() ? extended.localValue : extended.ptr.get()); std::memcpy(dest, src, fromBytes); diff --git a/src/lib/arch/aarch64/ExceptionHandler.cc b/src/lib/arch/aarch64/ExceptionHandler.cc index 505f605ace..526a65138f 100644 --- a/src/lib/arch/aarch64/ExceptionHandler.cc +++ b/src/lib/arch/aarch64/ExceptionHandler.cc @@ -48,8 +48,8 @@ bool ExceptionHandler::init() { uint8_t outSize = static_cast(out.size()); stateChange = {ChangeType::REPLACEMENT, {R0}, {retval}}; stateChange.memoryAddresses.push_back({argp, outSize}); - stateChange.memoryAddressValues.push_back( - RegisterValue(reinterpret_cast(out.data()), outSize)); + stateChange.memoryAddressValues.push_back(RegisterValue( + reinterpret_cast(out.data()), outSize)); break; } case 46: { // ftruncate @@ -120,7 +120,7 @@ bool ExceptionHandler::init() { // Get pointer and size of the buffer uint64_t iDst = bufPtr; // Write data for this buffer in 128-byte chunks - auto iSrc = reinterpret_cast(dataBuffer_.data()); + auto iSrc = reinterpret_cast(dataBuffer_.data()); while (totalRead > 0) { uint8_t len = totalRead > 128 ? 128 : static_cast(totalRead); @@ -160,7 +160,7 @@ bool ExceptionHandler::init() { uint64_t iLength = static_cast(totalRead); // Write data for this buffer in 128-byte chunks - auto iSrc = reinterpret_cast(dataBuffer_.data()); + auto iSrc = reinterpret_cast(dataBuffer_.data()); while (iLength > 0) { uint8_t len = iLength > 128 ? 128 : static_cast(iLength); stateChange.memoryAddresses.push_back({iDst, len}); @@ -239,7 +239,7 @@ bool ExceptionHandler::init() { bytesRemaining -= iLength; // Write data for this buffer in 128-byte chunks - auto iSrc = reinterpret_cast(buffers[i].data()); + auto iSrc = reinterpret_cast(buffers[i].data()); while (iLength > 0) { uint8_t len = iLength > 128 ? 128 : static_cast(iLength); stateChange.memoryAddresses.push_back({iDst, len}); @@ -613,7 +613,7 @@ bool ExceptionHandler::init() { uint64_t bufPtr = registerFileSet.get(R0).get(); size_t buflen = registerFileSet.get(R1).get(); - std::vector buf; + std::vector buf; for (size_t i = 0; i < buflen; i++) { buf.push_back((uint8_t)rand()); } @@ -798,8 +798,10 @@ void ExceptionHandler::readLinkAt(span path) { const auto bufAddress = registerFileSet.get(R2).get(); const auto bufSize = registerFileSet.get(R3).get(); - char buffer[kernel::Linux::LINUX_PATH_MAX]; - auto result = linux_.readlinkat(dirfd, path.data(), buffer, bufSize); + uint8_t buffer[kernel::Linux::LINUX_PATH_MAX]; + // TODO check reinterpret cast is safe here + auto result = linux_.readlinkat(dirfd, path.data(), + reinterpret_cast(buffer), bufSize); if (result < 0) { // TODO: Handle error case @@ -814,7 +816,7 @@ void ExceptionHandler::readLinkAt(span path) { ProcessStateChange stateChange = {ChangeType::REPLACEMENT, {R0}, {result}}; // Slice the returned path into <256-byte chunks for writing - const char* bufPtr = buffer; + const uint8_t* bufPtr = buffer; for (size_t i = 0; i < bytesCopied; i += 256) { uint8_t size = std::min(bytesCopied - i, 256ul); stateChange.memoryAddresses.push_back({bufAddress + i, size}); @@ -856,7 +858,7 @@ bool ExceptionHandler::readBufferThen(uint64_t ptr, uint64_t length, // Append data to buffer assert(response->data && "unhandled failed read in exception handler"); uint8_t bytesRead = response->target.size; - const uint8_t* data = response->data.getAsVector(); + const uint8_t* data = response->data.getAsVector().ptr; dataBuffer_.insert(dataBuffer_.end(), data, data + bytesRead); memory_.clearCompletedReads(); diff --git a/src/lib/arch/aarch64/Instruction_address.cc b/src/lib/arch/aarch64/Instruction_address.cc index ec4f269a8f..da733f08db 100644 --- a/src/lib/arch/aarch64/Instruction_address.cc +++ b/src/lib/arch/aarch64/Instruction_address.cc @@ -18,7 +18,7 @@ void generateContiguousAddresses( void generatePredicatedContiguousAddressBlocks( uint64_t baseAddr, uint16_t numVecElems, uint8_t elemSize, uint8_t predSize, - const uint64_t* pred, + const safePointer pred, std::vector& addresses) { bool recordingBlock = false; uint64_t currAddr = 0; @@ -179,7 +179,7 @@ span Instruction::generateAddresses() { break; } case Opcode::AArch64_LD1RD_IMM: { // ld1rd {zt.d}, pg/z, [xn, #imm] - const uint64_t* p = sourceValues_[0].getAsVector(); + const auto p = sourceValues_[0].getAsVector(); for (int i = 0; i < 4; i++) { if (p[i] != 0) { setMemoryAddresses({{sourceValues_[1].get() + @@ -209,7 +209,7 @@ span Instruction::generateAddresses() { break; } case Opcode::AArch64_LD1RW_IMM: { // ld1rw {zt.s}, pg/z, [xn, #imm] - const uint64_t* p = sourceValues_[0].getAsVector(); + const auto p = sourceValues_[0].getAsVector(); for (int i = 0; i < 4; i++) { if (p[i] != 0) { setMemoryAddresses({{sourceValues_[1].get() + @@ -908,7 +908,7 @@ span Instruction::generateAddresses() { break; } case Opcode::AArch64_ST1B: { // st1b {zt.b}, pg, [xn, xm] - const uint64_t* p = sourceValues_[1].getAsVector(); + const auto p = sourceValues_[1].getAsVector(); const uint16_t partition_num = VL_bits / 8; const uint64_t base = sourceValues_[2].get(); @@ -923,7 +923,7 @@ span Instruction::generateAddresses() { break; } case Opcode::AArch64_ST1B_IMM: { // st1b {zt.b}, pg, [xn{, #imm, mul vl}] - const uint64_t* p = sourceValues_[1].getAsVector(); + const auto p = sourceValues_[1].getAsVector(); const uint16_t partition_num = VL_bits / 8; const uint64_t base = sourceValues_[2].get(); @@ -940,11 +940,11 @@ span Instruction::generateAddresses() { break; } case Opcode::AArch64_SST1B_D: { // st1b {zd.d}, pg, [xn, zm.d] - const uint64_t* p = sourceValues_[1].getAsVector(); + const auto p = sourceValues_[1].getAsVector(); const uint16_t partition_num = VL_bits / 64; const uint64_t base = sourceValues_[2].get(); - const uint64_t* offset = sourceValues_[3].getAsVector(); + const auto offset = sourceValues_[3].getAsVector(); std::vector addresses; @@ -959,11 +959,11 @@ span Instruction::generateAddresses() { break; } case Opcode::AArch64_SST1D: { // st1d {zt.d}, pg, [xn, zm.d] - const uint64_t* p = sourceValues_[1].getAsVector(); + const auto p = sourceValues_[1].getAsVector(); const uint16_t partition_num = VL_bits / 64; const uint64_t base = sourceValues_[2].get(); - const uint64_t* offset = sourceValues_[3].getAsVector(); + const auto offset = sourceValues_[3].getAsVector(); std::vector addresses; addresses.reserve(partition_num); @@ -980,11 +980,11 @@ span Instruction::generateAddresses() { } case Opcode::AArch64_SST1D_SCALED: { // st1d {zt.d}, pg, [xn, // zm.d, lsl #3] - const uint64_t* p = sourceValues_[1].getAsVector(); + const auto p = sourceValues_[1].getAsVector(); const uint16_t partition_num = VL_bits / 64; const uint64_t base = sourceValues_[2].get(); - const uint64_t* offset = sourceValues_[3].getAsVector(); + const auto offset = sourceValues_[3].getAsVector(); std::vector addresses; addresses.reserve(partition_num); @@ -1000,7 +1000,7 @@ span Instruction::generateAddresses() { break; } case Opcode::AArch64_ST1D: { // st1d {zt.d}, pg, [xn, xm, lsl #3] - const uint64_t* p = sourceValues_[1].getAsVector(); + const auto p = sourceValues_[1].getAsVector(); const uint16_t partition_num = VL_bits / 64; const uint64_t base = sourceValues_[2].get(); @@ -1015,7 +1015,7 @@ span Instruction::generateAddresses() { break; } case Opcode::AArch64_ST1D_IMM: { // st1d {zt.d}, pg, [xn{, #imm, mul vl}] - const uint64_t* p = sourceValues_[1].getAsVector(); + const auto p = sourceValues_[1].getAsVector(); const uint16_t partition_num = VL_bits / 64; const uint64_t base = sourceValues_[2].get(); @@ -1033,7 +1033,7 @@ span Instruction::generateAddresses() { } case Opcode::AArch64_ST2D_IMM: { // st2d {zt1.d, zt2.d}, pg, [{, // #imm, mul vl}] - const uint64_t* p = sourceValues_[2].getAsVector(); + const auto p = sourceValues_[2].getAsVector(); const uint16_t partition_num = VL_bits / 64; const uint64_t base = sourceValues_[3].get(); @@ -1081,7 +1081,7 @@ span Instruction::generateAddresses() { // [{, xm, lsl #3}] // SME const uint16_t partition_num = VL_bits / 64; - const uint64_t* pg = + const auto pg = sourceValues_[partition_num + 1].getAsVector(); const uint64_t n = sourceValues_[partition_num + 2].get(); uint64_t m = 0; @@ -1150,7 +1150,7 @@ span Instruction::generateAddresses() { // [{, xm, LSL #2}] // SME const uint16_t partition_num = VL_bits / 32; - const uint64_t* pg = + const auto pg = sourceValues_[partition_num + 1].getAsVector(); const uint64_t n = sourceValues_[partition_num + 2].get(); uint64_t m = 0; @@ -1166,7 +1166,7 @@ span Instruction::generateAddresses() { break; } case Opcode::AArch64_ST1W: { // st1w {zt.s}, pg, [xn, xm, lsl #2] - const uint64_t* p = sourceValues_[1].getAsVector(); + const auto p = sourceValues_[1].getAsVector(); const uint16_t partition_num = VL_bits / 32; const uint64_t base = sourceValues_[2].get(); @@ -1181,7 +1181,7 @@ span Instruction::generateAddresses() { break; } case Opcode::AArch64_ST1W_D: { // st1w {zt.d}, pg, [xn, xm, lsl #2] - const uint64_t* p = sourceValues_[1].getAsVector(); + const auto p = sourceValues_[1].getAsVector(); const uint16_t partition_num = VL_bits / 64; const uint64_t base = sourceValues_[2].get(); @@ -1197,7 +1197,7 @@ span Instruction::generateAddresses() { } case Opcode::AArch64_ST1W_IMM: { // st1w {zt.s}, pg, [xn{, #imm, mul // vl}] - const uint64_t* p = sourceValues_[1].getAsVector(); + const auto p = sourceValues_[1].getAsVector(); const uint16_t partition_num = VL_bits / 32; const uint64_t base = sourceValues_[2].get(); @@ -1214,10 +1214,10 @@ span Instruction::generateAddresses() { break; } case Opcode::AArch64_SST1W_D_IMM: { // st1w {zt.d}, pg, [zn.d{, #imm}] - const uint64_t* p = sourceValues_[1].getAsVector(); + const auto p = sourceValues_[1].getAsVector(); const uint16_t partition_num = VL_bits / 64; - const uint64_t* n = sourceValues_[2].getAsVector(); + const auto n = sourceValues_[2].getAsVector(); const int64_t offset = static_cast(metadata_.operands[2].mem.disp); @@ -1235,10 +1235,10 @@ span Instruction::generateAddresses() { break; } case Opcode::AArch64_SST1W_IMM: { // st1w {zt.s}, pg, [zn.s{, #imm}] - const uint64_t* p = sourceValues_[1].getAsVector(); + const auto p = sourceValues_[1].getAsVector(); const uint16_t partition_num = VL_bits / 32; - const uint32_t* n = sourceValues_[2].getAsVector(); + const auto n = sourceValues_[2].getAsVector(); const int64_t offset = static_cast( static_cast(metadata_.operands[2].mem.disp)); @@ -1256,11 +1256,11 @@ span Instruction::generateAddresses() { break; } case Opcode::AArch64_GLD1D_REAL: { // ld1d {zt.d}, pg/z, [xn, zm.d] - const uint64_t* p = sourceValues_[0].getAsVector(); + const auto p = sourceValues_[0].getAsVector(); const uint16_t partition_num = VL_bits / 64; const uint64_t base = sourceValues_[1].get(); - const uint64_t* offset = sourceValues_[2].getAsVector(); + const auto offset = sourceValues_[2].getAsVector(); std::vector addresses; addresses.reserve(partition_num); @@ -1277,11 +1277,11 @@ span Instruction::generateAddresses() { } case Opcode::AArch64_GLD1D_SCALED_REAL: { // ld1d {zt.d}, pg/z, [xn, // zm.d, LSL #3] - const uint64_t* p = sourceValues_[0].getAsVector(); + const auto p = sourceValues_[0].getAsVector(); const uint16_t partition_num = VL_bits / 64; const uint64_t base = sourceValues_[1].get(); - const uint64_t* offset = sourceValues_[2].getAsVector(); + const auto offset = sourceValues_[2].getAsVector(); std::vector addresses; addresses.reserve(partition_num); @@ -1298,10 +1298,10 @@ span Instruction::generateAddresses() { } case Opcode::AArch64_GLD1D_IMM_REAL: { // ld1d {zd.d}, pg/z, [zn.d{, // #imm}] - const uint64_t* p = sourceValues_[0].getAsVector(); + const auto p = sourceValues_[0].getAsVector(); const uint16_t partition_num = VL_bits / 64; - const uint64_t* n = sourceValues_[1].getAsVector(); + const auto n = sourceValues_[1].getAsVector(); const int64_t offset = static_cast(metadata_.operands[2].mem.disp); @@ -1320,10 +1320,10 @@ span Instruction::generateAddresses() { } case Opcode::AArch64_GLD1SW_D_IMM_REAL: { // ld1sw {zd.d}, pg/z, // [zn.d{, #imm}] - const uint64_t* p = sourceValues_[0].getAsVector(); + const auto p = sourceValues_[0].getAsVector(); const uint16_t partition_num = VL_bits / 64; - const uint64_t* n = sourceValues_[1].getAsVector(); + const auto n = sourceValues_[1].getAsVector(); const int64_t offset = static_cast(metadata_.operands[2].mem.disp); @@ -1342,11 +1342,11 @@ span Instruction::generateAddresses() { } case Opcode::AArch64_GLD1W_D_SCALED_REAL: { // ld1w {zd.d}, pg/z, // [, zm.d, lsl #2] - const uint64_t* p = sourceValues_[0].getAsVector(); + const auto p = sourceValues_[0].getAsVector(); const uint16_t partition_num = VL_bits / 64; const uint64_t n = sourceValues_[1].get(); - const uint64_t* m = sourceValues_[2].getAsVector(); + const auto m = sourceValues_[2].getAsVector(); std::vector addresses; addresses.reserve(partition_num); @@ -1363,11 +1363,11 @@ span Instruction::generateAddresses() { } case Opcode::AArch64_GLD1W_SXTW_REAL: { // ld1w {zd.s}, pg/z, // [, zm.s, sxtw] - const uint64_t* p = sourceValues_[0].getAsVector(); + const auto p = sourceValues_[0].getAsVector(); const uint16_t partition_num = VL_bits / 32; const uint64_t n = sourceValues_[1].get(); - const uint32_t* m = sourceValues_[2].getAsVector(); + const auto m = sourceValues_[2].getAsVector(); std::vector addresses; addresses.reserve(partition_num); @@ -1383,10 +1383,10 @@ span Instruction::generateAddresses() { break; } case Opcode::AArch64_SST1D_IMM: { // st1d {zt.d}, pg, [zn.d{, #imm}] - const uint64_t* p = sourceValues_[1].getAsVector(); + const auto p = sourceValues_[1].getAsVector(); const uint16_t partition_num = VL_bits / 64; - const uint64_t* n = sourceValues_[2].getAsVector(); + const auto n = sourceValues_[2].getAsVector(); const int64_t offset = static_cast(metadata_.operands[2].mem.disp); diff --git a/src/lib/arch/aarch64/Instruction_decode.cc b/src/lib/arch/aarch64/Instruction_decode.cc index 3535ce590f..62c86549ce 100644 --- a/src/lib/arch/aarch64/Instruction_decode.cc +++ b/src/lib/arch/aarch64/Instruction_decode.cc @@ -634,7 +634,7 @@ void Instruction::decode() { // applicable to SME instructions for (uint16_t i = 0; i < sourceRegisterCount_; i++) { if (sourceRegisters_[i] == RegisterType::ZERO_REGISTER) { - sourceValues_[i] = RegisterValue(0, 8); + sourceValues_[i] = RegisterValue(0ull, 8); sourceOperandsPending_--; } } diff --git a/src/lib/arch/aarch64/Instruction_execute.cc b/src/lib/arch/aarch64/Instruction_execute.cc index 8f4bc38142..f4cc21c727 100644 --- a/src/lib/arch/aarch64/Instruction_execute.cc +++ b/src/lib/arch/aarch64/Instruction_execute.cc @@ -568,7 +568,7 @@ void Instruction::execute() { break; } case Opcode::AArch64_AND_ZI: { // and zdn, zdn, #imm - const uint64_t* dn = sourceValues_[0].getAsVector(); + const auto dn = sourceValues_[0].getAsVector(); const uint64_t imm = static_cast(metadata_.operands[2].imm); const uint16_t partition_num = VL_bits / 64; @@ -1223,7 +1223,7 @@ void Instruction::execute() { // can use uint64_t. const uint16_t index = 2 * static_cast(metadata_.operands[1].vector_index); - const uint64_t* n = sourceValues_[0].getAsVector(); + const auto n = sourceValues_[0].getAsVector(); const uint16_t partition_num = VL_bits / 128; uint64_t out[32] = {0}; @@ -1377,14 +1377,14 @@ void Instruction::execute() { if (!ZAenabled) return ZAdisabled(); const uint16_t rowCount = VL_bits / 8; - const uint8_t* zd = sourceValues_[0].getAsVector(); - const uint64_t* pg = sourceValues_[1].getAsVector(); + const auto zd = sourceValues_[0].getAsVector(); + const auto pg = sourceValues_[1].getAsVector(); const uint32_t sliceNum = (sourceValues_[2 + rowCount].get() + static_cast( metadata_.operands[2].sme.slice_offset.imm)) % rowCount; - const uint8_t* zaRow = + const auto zaRow = sourceValues_[2 + sliceNum].getAsVector(); uint8_t out[256] = {0}; @@ -2299,17 +2299,16 @@ void Instruction::execute() { if (!ZAenabled) return ZAdisabled(); const uint16_t rowCount = VL_bits / 64; - const uint64_t* pn = sourceValues_[rowCount].getAsVector(); - const uint64_t* pm = - sourceValues_[rowCount + 1].getAsVector(); - const double* zn = sourceValues_[rowCount + 2].getAsVector(); - const double* zm = sourceValues_[rowCount + 3].getAsVector(); + const auto pn = sourceValues_[rowCount].getAsVector(); + const auto pm = sourceValues_[rowCount + 1].getAsVector(); + const auto zn = sourceValues_[rowCount + 2].getAsVector(); + const auto zm = sourceValues_[rowCount + 3].getAsVector(); // zn is row, zm is col for (int row = 0; row < rowCount; row++) { double outRow[32] = {0}; uint64_t shifted_active_row = 1ull << ((row % 8) * 8); - const double* zadaRow = sourceValues_[row].getAsVector(); + const auto zadaRow = sourceValues_[row].getAsVector(); for (int col = 0; col < rowCount; col++) { double zadaElem = zadaRow[col]; uint64_t shifted_active_col = 1ull << ((col % 8) * 8); @@ -2331,17 +2330,16 @@ void Instruction::execute() { if (!ZAenabled) return ZAdisabled(); const uint16_t rowCount = VL_bits / 32; - const uint64_t* pn = sourceValues_[rowCount].getAsVector(); - const uint64_t* pm = - sourceValues_[rowCount + 1].getAsVector(); - const float* zn = sourceValues_[rowCount + 2].getAsVector(); - const float* zm = sourceValues_[rowCount + 3].getAsVector(); + const auto pn = sourceValues_[rowCount].getAsVector(); + const auto pm = sourceValues_[rowCount + 1].getAsVector(); + const auto zn = sourceValues_[rowCount + 2].getAsVector(); + const auto zm = sourceValues_[rowCount + 3].getAsVector(); // zn is row, zm is col for (int row = 0; row < rowCount; row++) { float outRow[64] = {0}; uint64_t shifted_active_row = 1ull << ((row % 16) * 4); - const float* zadaRow = sourceValues_[row].getAsVector(); + const auto zadaRow = sourceValues_[row].getAsVector(); for (int col = 0; col < rowCount; col++) { float zadaElem = zadaRow[col]; uint64_t shifted_active_col = 1ull << ((col % 16) * 4); @@ -2780,7 +2778,7 @@ void Instruction::execute() { case Opcode::AArch64_GLD1D_IMM_REAL: { // ld1d {zd.d}, pg/z, [zn.d{, // #imm}] // LOAD - const uint64_t* p = sourceValues_[0].getAsVector(); + const auto p = sourceValues_[0].getAsVector(); const uint16_t partition_num = VL_bits / 64; uint64_t out[32] = {0}; @@ -2801,7 +2799,7 @@ void Instruction::execute() { case Opcode::AArch64_GLD1D_SCALED_REAL: { // ld1d {zt.d}, pg/z, [xn, // zm.d, LSL #3] // LOAD - const uint64_t* p = sourceValues_[0].getAsVector(); + const auto p = sourceValues_[0].getAsVector(); const uint16_t partition_num = VL_bits / 64; uint16_t index = 0; @@ -2821,7 +2819,7 @@ void Instruction::execute() { case Opcode::AArch64_GLD1SW_D_IMM_REAL: { // ld1sw {zd.d}, pg/z, [zn.d{, // #imm}] // LOAD - const uint64_t* p = sourceValues_[0].getAsVector(); + const auto p = sourceValues_[0].getAsVector(); const uint16_t partition_num = VL_bits / 64; int64_t out[32] = {0}; @@ -2839,7 +2837,7 @@ void Instruction::execute() { case Opcode::AArch64_GLD1W_D_SCALED_REAL: { // ld1w {zd.d}, pg/z, // [, zm.d, lsl #2] // LOAD - const uint64_t* p = sourceValues_[0].getAsVector(); + const auto p = sourceValues_[0].getAsVector(); const uint16_t partition_num = VL_bits / 64; uint64_t out[32] = {0}; @@ -2857,7 +2855,7 @@ void Instruction::execute() { case Opcode::AArch64_GLD1W_SXTW_REAL: { // ld1w {zd.s}, pg/z, // [, zm.s, sxtw] // LOAD - const uint64_t* p = sourceValues_[0].getAsVector(); + const auto p = sourceValues_[0].getAsVector(); const uint16_t partition_num = VL_bits / 32; uint32_t out[64] = {0}; @@ -3388,12 +3386,12 @@ void Instruction::execute() { const uint16_t partition_num = VL_bits / 64; const uint32_t ws = sourceValues_[partition_num].get(); - const uint64_t* pg = + const auto pg = sourceValues_[partition_num + 1].getAsVector(); const uint16_t sliceNum = (ws + metadata_.operands[0].sme.slice_offset.imm) % partition_num; - const uint64_t* data = memoryData_[0].getAsVector(); + const auto data = memoryData_[0].getAsVector(); uint64_t out[32] = {0}; for (int i = 0; i < partition_num; i++) { @@ -3421,7 +3419,7 @@ void Instruction::execute() { const uint16_t partition_num = VL_bits / 16; const uint32_t ws = sourceValues_[partition_num].get(); - const uint64_t* pg = + const auto pg = sourceValues_[partition_num + 1].getAsVector(); const uint32_t sliceNum = @@ -3497,7 +3495,7 @@ void Instruction::execute() { const uint32_t sliceNum = (ws + metadata_.operands[0].sme.slice_offset.imm) % partition_num; - const uint32_t* data = memoryData_[0].getAsVector(); + const auto data = memoryData_[0].getAsVector(); uint32_t out[64] = {0}; for (int i = 0; i < partition_num; i++) { @@ -3643,7 +3641,7 @@ void Instruction::execute() { const uint32_t sliceNum = (ws + metadata_.operands[0].sme.slice_offset.imm) % partition_num; - const uint32_t* data = memoryData_[0].getAsVector(); + const auto data = memoryData_[0].getAsVector(); for (int i = 0; i < partition_num; i++) { const uint32_t* row = sourceValues_[i].getAsVector(); @@ -3659,10 +3657,10 @@ void Instruction::execute() { } case Opcode::AArch64_LD1B: { // ld1b {zt.b}, pg/z, [xn, xm] // LOAD - const uint64_t* p = sourceValues_[0].getAsVector(); + const auto p = sourceValues_[0].getAsVector(); const uint16_t partition_num = VL_bits / 8; - const uint8_t* data = memoryData_[0].getAsVector(); + const auto data = memoryData_[0].getAsVector(); uint8_t out[256] = {0}; for (int i = 0; i < partition_num; i++) { @@ -3679,10 +3677,10 @@ void Instruction::execute() { case Opcode::AArch64_LD1B_IMM: { // ld1b {zt.b}, pg/z, [xn{, #imm, // mul vl}] // LOAD - const uint64_t* p = sourceValues_[0].getAsVector(); + const auto p = sourceValues_[0].getAsVector(); const uint16_t partition_num = VL_bits / 8; - const uint8_t* data = memoryData_[0].getAsVector(); + const auto data = memoryData_[0].getAsVector(); uint8_t out[256] = {0}; for (int i = 0; i < partition_num; i++) { uint64_t shifted_active = 1ull << (i % 64); @@ -3697,10 +3695,10 @@ void Instruction::execute() { } case Opcode::AArch64_LD1D: { // ld1d {zt.d}, pg/z, [xn, xm, lsl #3] // LOAD - const uint64_t* p = sourceValues_[0].getAsVector(); + const auto p = sourceValues_[0].getAsVector(); const uint16_t partition_num = VL_bits / 64; - const uint64_t* data = memoryData_[0].getAsVector(); + const auto data = memoryData_[0].getAsVector(); uint64_t out[32] = {0}; for (int i = 0; i < partition_num; i++) { @@ -3717,10 +3715,10 @@ void Instruction::execute() { case Opcode::AArch64_LD1D_IMM: { // ld1d {zt.d}, pg/z, [xn{, #imm, // mul vl}] // LOAD - const uint64_t* p = sourceValues_[0].getAsVector(); + const auto p = sourceValues_[0].getAsVector(); const uint16_t partition_num = VL_bits / 64; - const uint64_t* data = memoryData_[0].getAsVector(); + const auto data = memoryData_[0].getAsVector(); uint64_t out[32] = {0}; for (int i = 0; i < partition_num; i++) { @@ -3736,10 +3734,10 @@ void Instruction::execute() { } case Opcode::AArch64_LD1H: { // ld1h {zt.h}, pg/z, [xn, xm, lsl #1] // LOAD - const uint64_t* p = sourceValues_[0].getAsVector(); + const auto p = sourceValues_[0].getAsVector(); const uint16_t partition_num = VL_bits / 16; - const uint16_t* data = memoryData_[0].getAsVector(); + const auto data = memoryData_[0].getAsVector(); uint16_t out[128] = {0}; for (int i = 0; i < partition_num; i++) { @@ -3774,7 +3772,7 @@ void Instruction::execute() { uint16_t index = 0; // Check if any lanes are active, otherwise set all to 0 and break early bool active = false; - const uint64_t* p = sourceValues_[0].getAsVector(); + const auto p = sourceValues_[0].getAsVector(); for (int i = 0; i < 4; i++) { if (p[i] != 0) { active = true; @@ -3796,10 +3794,10 @@ void Instruction::execute() { } case Opcode::AArch64_LD1RQ_D_IMM: { // ld1rqd {zd.d}, pg/z, [xn{, #imm}] // LOAD - const uint64_t* p = sourceValues_[0].getAsVector(); + const auto p = sourceValues_[0].getAsVector(); const uint16_t partition_num = VL_bits / 64; uint64_t out[32] = {0}; - const uint64_t* data = memoryData_[0].getAsVector(); + const auto data = memoryData_[0].getAsVector(); // Get mini-vector (quadword) uint64_t mini[2] = {0}; @@ -3820,10 +3818,10 @@ void Instruction::execute() { } case Opcode::AArch64_LD1RQ_W: { // ld1rqw {zd.s}, pg/z, [xn, xm, lsl #2] // LOAD - const uint64_t* p = sourceValues_[0].getAsVector(); + const auto p = sourceValues_[0].getAsVector(); const uint16_t partition_num = VL_bits / 32; uint32_t out[64] = {0}; - const uint32_t* data = memoryData_[0].getAsVector(); + const auto data = memoryData_[0].getAsVector(); // Get mini-vector (quadword) uint32_t mini[4] = {0}; @@ -3846,10 +3844,10 @@ void Instruction::execute() { } case Opcode::AArch64_LD1RQ_W_IMM: { // ld1rqw {zd.s}, pg/z, [xn{, #imm}] // LOAD - const uint64_t* p = sourceValues_[0].getAsVector(); + const auto p = sourceValues_[0].getAsVector(); const uint16_t partition_num = VL_bits / 32; uint32_t out[64] = {0}; - const uint32_t* data = memoryData_[0].getAsVector(); + const auto data = memoryData_[0].getAsVector(); // Get mini-vector (quadword) uint32_t mini[4] = {0}; @@ -3877,7 +3875,7 @@ void Instruction::execute() { // Check if any lanes are active, otherwise set all to 0 and break early bool active = false; - const uint64_t* p = sourceValues_[0].getAsVector(); + const auto p = sourceValues_[0].getAsVector(); for (int i = 0; i < 4; i++) { if (p[i] != 0) { active = true; @@ -4093,10 +4091,10 @@ void Instruction::execute() { } case Opcode::AArch64_LD1W: { // ld1w {zt.s}, pg/z, [xn, xm, lsl #2] // LOAD - const uint64_t* p = sourceValues_[0].getAsVector(); + const auto p = sourceValues_[0].getAsVector(); const uint16_t partition_num = VL_bits / 32; - const uint32_t* data = memoryData_[0].getAsVector(); + const auto data = memoryData_[0].getAsVector(); uint32_t out[64] = {0}; for (int i = 0; i < partition_num; i++) { @@ -4113,10 +4111,10 @@ void Instruction::execute() { case Opcode::AArch64_LD1W_IMM: { // ld1w {zt.s}, pg/z, [xn{, #imm, // mul vl}] // LOAD - const uint64_t* p = sourceValues_[0].getAsVector(); + const auto p = sourceValues_[0].getAsVector(); const uint16_t partition_num = VL_bits / 32; - const uint32_t* data = memoryData_[0].getAsVector(); + const auto data = memoryData_[0].getAsVector(); uint32_t out[64] = {0}; for (int i = 0; i < partition_num; i++) { @@ -4133,7 +4131,7 @@ void Instruction::execute() { case Opcode::AArch64_LD1i32: { // ld1 {vt.s}[index], [xn] // LOAD const int index = metadata_.operands[0].vector_index; - const uint32_t* vt = sourceValues_[0].getAsVector(); + const auto vt = sourceValues_[0].getAsVector(); uint32_t out[4]; for (int i = 0; i < 4; i++) { out[i] = (i == index) ? memoryData_[0].get() : vt[i]; @@ -4144,7 +4142,7 @@ void Instruction::execute() { case Opcode::AArch64_LD1i64: { // ld1 {vt.d}[index], [xn] // LOAD const int index = metadata_.operands[0].vector_index; - const uint64_t* vt = sourceValues_[0].getAsVector(); + const auto vt = sourceValues_[0].getAsVector(); uint64_t out[2]; for (int i = 0; i < 2; i++) { out[i] = (i == index) ? memoryData_[0].get() : vt[i]; @@ -4155,7 +4153,7 @@ void Instruction::execute() { case Opcode::AArch64_LD1i64_POST: { // ld1 {vt.d}[index], [xn], #8 // LOAD const int index = metadata_.operands[0].vector_index; - const uint64_t* vt = sourceValues_[0].getAsVector(); + const auto vt = sourceValues_[0].getAsVector(); uint64_t out[2]; for (int i = 0; i < 2; i++) { out[i] = (i == index) ? memoryData_[0].get() : vt[i]; @@ -4174,11 +4172,10 @@ void Instruction::execute() { case Opcode::AArch64_LD2D_IMM: { // ld2d {zt1.d, zt2.d}, pg/z, [{, // #imm, mul vl}] // LOAD - const uint64_t* p = sourceValues_[0].getAsVector(); + const auto p = sourceValues_[0].getAsVector(); const uint16_t partition_num = VL_bits / 64; std::vector data = { - memoryData_[0].getAsVector(), - memoryData_[1].getAsVector()}; + auto memoryData_[1].getAsVector()}; uint64_t out[2][32] = {{0}, {0}}; for (int i = 0; i < partition_num; i++) { @@ -4197,8 +4194,8 @@ void Instruction::execute() { break; } case Opcode::AArch64_LD2Twov4s: { // ld2 {vt1.4s, vt2.4s} [xn] - const float* region1 = memoryData_[0].getAsVector(); - const float* region2 = memoryData_[1].getAsVector(); + const auto region1 = memoryData_[0].getAsVector(); + const auto region2 = memoryData_[1].getAsVector(); // LD2 multistruct uses de-interleaving float t1[4] = {region1[0], region1[2], region2[0], region2[2]}; @@ -4210,8 +4207,8 @@ void Instruction::execute() { case Opcode::AArch64_LD2Twov4s_POST: { // ld2 {vt1.4s, vt2.4s}, [xn], // // LOAD - const float* region1 = memoryData_[0].getAsVector(); - const float* region2 = memoryData_[1].getAsVector(); + const auto region1 = memoryData_[0].getAsVector(); + const auto region2 = memoryData_[1].getAsVector(); float t1[4] = {region1[0], region1[2], region2[0], region2[2]}; float t2[4] = {region1[1], region1[3], region2[1], region2[3]}; // #imm can only be 32 @@ -4226,12 +4223,11 @@ void Instruction::execute() { case Opcode::AArch64_LD3D_IMM: { // ld3d {zt1.d, zt2.d, zt3.d}, pg/z, // [xn|sp{, #imm, MUL VL}] // LOAD - const uint64_t* p = sourceValues_[0].getAsVector(); + const auto p = sourceValues_[0].getAsVector(); const uint16_t partition_num = VL_bits / 64; std::vector data = { - memoryData_[0].getAsVector(), - memoryData_[1].getAsVector(), - memoryData_[2].getAsVector()}; + auto memoryData_[1].getAsVector(), + auto memoryData_[2].getAsVector()}; uint64_t out[3][32] = {{0}, {0}, {0}}; for (int i = 0; i < partition_num; i++) { @@ -4252,13 +4248,11 @@ void Instruction::execute() { case Opcode::AArch64_LD4D_IMM: { // ld4d {zt1.d, zt2.d, zt3.d, zt4.d}, // pg/z, [xn|sp{, #imm, MUL VL}] // LOAD - const uint64_t* p = sourceValues_[0].getAsVector(); + const auto p = sourceValues_[0].getAsVector(); const uint16_t partition_num = VL_bits / 64; std::vector data = { - memoryData_[0].getAsVector(), - memoryData_[1].getAsVector(), - memoryData_[2].getAsVector(), - memoryData_[3].getAsVector()}; + auto memoryData_[1].getAsVector(), + auto memoryData_[3].getAsVector()}; uint64_t out[4][32] = {{0}, {0}, {0}, {0}}; for (int i = 0; i < partition_num; i++) { @@ -4619,7 +4613,7 @@ void Instruction::execute() { // LOAD const uint64_t PL_bits = VL_bits / 8; const uint16_t partition_num = PL_bits / 8; - const uint8_t* memData = memoryData_[0].getAsVector(); + const auto memData = memoryData_[0].getAsVector(); uint64_t out[4] = {0}; for (int i = 0; i < partition_num; i++) { @@ -4635,7 +4629,7 @@ void Instruction::execute() { // LOAD const uint16_t partition_num = VL_bits / 8; uint8_t out[256] = {0}; - const uint8_t* data = memoryData_[0].getAsVector(); + const auto data = memoryData_[0].getAsVector(); for (int i = 0; i < partition_num; i++) { out[i] = data[i]; @@ -5029,8 +5023,8 @@ void Instruction::execute() { break; } case Opcode::AArch64_PTEST_PP: { // ptest pg, pn.b - const uint64_t* g = sourceValues_[0].getAsVector(); - const uint64_t* s = sourceValues_[1].getAsVector(); + const auto g = sourceValues_[0].getAsVector(); + const auto s = sourceValues_[1].getAsVector(); std::array masked_n = {(g[0] & s[0]), (g[1] & s[1]), (g[2] & s[2]), (g[3] & s[3])}; // Byte count = 1 as destination predicate is regarding single bytes. @@ -5529,8 +5523,8 @@ void Instruction::execute() { } case Opcode::AArch64_SST1B_D: { // st1b {zd.d}, pg, [xn, zm.d] // STORE - const uint64_t* d = sourceValues_[0].getAsVector(); - const uint64_t* p = sourceValues_[1].getAsVector(); + const auto d = sourceValues_[0].getAsVector(); + const auto p = sourceValues_[1].getAsVector(); const uint16_t partition_num = VL_bits / 64; uint16_t index = 0; @@ -5545,8 +5539,8 @@ void Instruction::execute() { } case Opcode::AArch64_SST1D: { // st1d {zt.d}, pg, [xn, zm.d] // STORE - const uint64_t* d = sourceValues_[0].getAsVector(); - const uint64_t* p = sourceValues_[1].getAsVector(); + const auto d = sourceValues_[0].getAsVector(); + const auto p = sourceValues_[1].getAsVector(); const uint16_t partition_num = VL_bits / 64; uint16_t index = 0; @@ -5561,8 +5555,8 @@ void Instruction::execute() { } case Opcode::AArch64_SST1D_IMM: { // st1d {zd.d}, pg, [zn.d{, #imm}] // STORE - const uint64_t* t = sourceValues_[0].getAsVector(); - const uint64_t* p = sourceValues_[1].getAsVector(); + const auto t = sourceValues_[0].getAsVector(); + const auto p = sourceValues_[1].getAsVector(); const uint16_t partition_num = VL_bits / 64; uint16_t index = 0; @@ -5578,8 +5572,8 @@ void Instruction::execute() { case Opcode::AArch64_SST1D_SCALED: { // st1d {zt.d}, pg, [xn, // zm.d, lsl #3] // STORE - const uint64_t* d = sourceValues_[0].getAsVector(); - const uint64_t* p = sourceValues_[1].getAsVector(); + const auto d = sourceValues_[0].getAsVector(); + const auto p = sourceValues_[1].getAsVector(); const uint16_t partition_num = VL_bits / 64; uint16_t index = 0; @@ -5620,13 +5614,13 @@ void Instruction::execute() { const uint16_t partition_num = VL_bits / 64; const uint32_t ws = sourceValues_[partition_num].get(); const uint64_t* pg = - sourceValues_[partition_num + 1].getAsVector(); + auto + 1].getAsVector(); const uint32_t sliceNum = (ws + metadata_.operands[0].sme.slice_offset.imm) % partition_num; const uint64_t* tileSlice = - sourceValues_[sliceNum].getAsVector(); + auto sourceValues_[sliceNum].getAsVector(); memoryData_ = sve_merge_store_data(tileSlice, pg, VL_bits); break; } @@ -5756,7 +5750,7 @@ void Instruction::execute() { const uint16_t partition_num = VL_bits / 64; const uint32_t ws = sourceValues_[partition_num].get(); const uint64_t* pg = - sourceValues_[partition_num + 1].getAsVector(); + auto + 1].getAsVector(); const uint32_t sliceNum = (ws + metadata_.operands[0].sme.slice_offset.imm) % partition_num; @@ -5792,7 +5786,7 @@ void Instruction::execute() { const uint16_t partition_num = VL_bits / 16; const uint32_t ws = sourceValues_[partition_num].get(); const uint64_t* pg = - sourceValues_[partition_num + 1].getAsVector(); + auto + 1].getAsVector(); const uint32_t sliceNum = (ws + metadata_.operands[0].sme.slice_offset.imm) % partition_num; @@ -5870,7 +5864,7 @@ void Instruction::execute() { const uint16_t partition_num = VL_bits / 32; const uint32_t ws = sourceValues_[partition_num].get(); const uint64_t* pg = - sourceValues_[partition_num + 1].getAsVector(); + auto + 1].getAsVector(); const uint32_t sliceNum = (ws + metadata_.operands[0].sme.slice_offset.imm) % partition_num; @@ -5899,8 +5893,8 @@ void Instruction::execute() { } case Opcode::AArch64_SST1W_D_IMM: { // st1w {zt.d}, pg, [zn.d{, #imm}] // STORE - const uint64_t* t = sourceValues_[0].getAsVector(); - const uint64_t* p = sourceValues_[1].getAsVector(); + const auto t = sourceValues_[0].getAsVector(); + const auto p = sourceValues_[1].getAsVector(); const uint16_t partition_num = VL_bits / 64; uint16_t index = 0; @@ -5915,8 +5909,8 @@ void Instruction::execute() { } case Opcode::AArch64_SST1W_IMM: { // st1w {zt.s}, pg, [zn.s{, #imm}] // STORE - const uint32_t* t = sourceValues_[0].getAsVector(); - const uint64_t* p = sourceValues_[1].getAsVector(); + const auto t = sourceValues_[0].getAsVector(); + const auto p = sourceValues_[1].getAsVector(); const uint16_t partition_num = VL_bits / 32; uint16_t index = 0; @@ -5931,32 +5925,32 @@ void Instruction::execute() { } case Opcode::AArch64_ST1B: { // st1b {zt.b}, pg, [xn, xm] // STORE - const uint8_t* d = sourceValues_[0].getAsVector(); - const uint64_t* p = sourceValues_[1].getAsVector(); + const auto d = sourceValues_[0].getAsVector(); + const auto p = sourceValues_[1].getAsVector(); memoryData_ = sve_merge_store_data(d, p, VL_bits); break; } case Opcode::AArch64_ST1B_IMM: { // st1b {zt.b}, pg, [xn{, #imm, mul vl}] // STORE - const uint8_t* d = sourceValues_[0].getAsVector(); - const uint64_t* p = sourceValues_[1].getAsVector(); + const auto d = sourceValues_[0].getAsVector(); + const auto p = sourceValues_[1].getAsVector(); memoryData_ = sve_merge_store_data(d, p, VL_bits); break; } case Opcode::AArch64_ST1D: { // st1d {zt.d}, pg, [xn, xm, lsl #3] // STORE - const uint64_t* d = sourceValues_[0].getAsVector(); - const uint64_t* p = sourceValues_[1].getAsVector(); + const auto d = sourceValues_[0].getAsVector(); + const auto p = sourceValues_[1].getAsVector(); memoryData_ = sve_merge_store_data(d, p, VL_bits); break; } case Opcode::AArch64_ST1D_IMM: { // st1d {zt.d}, pg, [xn{, #imm, mul vl}] // STORE - const uint64_t* d = sourceValues_[0].getAsVector(); - const uint64_t* p = sourceValues_[1].getAsVector(); + const auto d = sourceValues_[0].getAsVector(); + const auto p = sourceValues_[1].getAsVector(); memoryData_ = sve_merge_store_data(d, p, VL_bits); break; @@ -5966,8 +5960,8 @@ void Instruction::execute() { // STORE for (int i = 0; i < 4; i++) { memoryData_[i] = - RegisterValue((char*)sourceValues_[i].getAsVector(), - 16 * sizeof(uint8_t)); + auto RegisterValue((char*)sourceValues_[i].getAsVector(), + 16 * sizeof(uint8_t)); } break; } @@ -5977,8 +5971,8 @@ void Instruction::execute() { // STORE for (int i = 0; i < 4; i++) { memoryData_[i] = - RegisterValue((char*)sourceValues_[i].getAsVector(), - 16 * sizeof(uint8_t)); + auto RegisterValue((char*)sourceValues_[i].getAsVector(), + 16 * sizeof(uint8_t)); } // if #imm post-index, value can only be 64 const uint64_t postIndex = @@ -5992,9 +5986,9 @@ void Instruction::execute() { // vt4.2d}, [xn|sp] // STORE for (int i = 0; i < 4; i++) { - memoryData_[i] = - RegisterValue((char*)sourceValues_[i].getAsVector(), - 2 * sizeof(uint64_t)); + memoryData_[i] = auto RegisterValue( + (char*)sourceValues_[i].getAsVector(), + 2 * sizeof(uint64_t)); } break; } @@ -6002,9 +5996,9 @@ void Instruction::execute() { // vt4.2d}, [xn|sp], <#imm|xm> // STORE for (int i = 0; i < 4; i++) { - memoryData_[i] = - RegisterValue((char*)sourceValues_[i].getAsVector(), - 2 * sizeof(uint64_t)); + memoryData_[i] = auto RegisterValue( + (char*)sourceValues_[i].getAsVector(), + 2 * sizeof(uint64_t)); } // if #imm post-index, value can only be 64 const uint64_t postIndex = @@ -6018,9 +6012,9 @@ void Instruction::execute() { // vt4.2s}, [xn|sp], <#imm|xm> // STORE for (int i = 0; i < 4; i++) { - memoryData_[i] = - RegisterValue((char*)sourceValues_[i].getAsVector(), - 2 * sizeof(uint32_t)); + memoryData_[i] = auto RegisterValue( + (char*)sourceValues_[i].getAsVector(), + 2 * sizeof(uint32_t)); } // if #imm post-index, value can only be 32 const uint64_t postIndex = @@ -6034,9 +6028,9 @@ void Instruction::execute() { // vt4.4s}, [xn|sp] // STORE for (int i = 0; i < 4; i++) { - memoryData_[i] = - RegisterValue((char*)sourceValues_[i].getAsVector(), - 4 * sizeof(uint32_t)); + memoryData_[i] = auto RegisterValue( + (char*)sourceValues_[i].getAsVector(), + 4 * sizeof(uint32_t)); } break; } @@ -6044,9 +6038,9 @@ void Instruction::execute() { // vt4.4s}, [xn|sp], <#imm|xm> // STORE for (int i = 0; i < 4; i++) { - memoryData_[i] = - RegisterValue((char*)sourceValues_[i].getAsVector(), - 4 * sizeof(uint32_t)); + memoryData_[i] = auto RegisterValue( + (char*)sourceValues_[i].getAsVector(), + 4 * sizeof(uint32_t)); } // if #imm post-index, value can only be 64 const uint64_t postIndex = @@ -6058,8 +6052,8 @@ void Instruction::execute() { } case Opcode::AArch64_ST1Twov16b: { // st1 {vt.16b, vt2.16b}, [xn|sp] // STORE - const uint8_t* t = sourceValues_[0].getAsVector(); - const uint8_t* t2 = sourceValues_[1].getAsVector(); + const auto t = sourceValues_[0].getAsVector(); + const auto t2 = sourceValues_[1].getAsVector(); memoryData_[0] = RegisterValue((char*)t, 16 * sizeof(uint8_t)); memoryData_[1] = RegisterValue((char*)t2, 16 * sizeof(uint8_t)); break; @@ -6067,8 +6061,8 @@ void Instruction::execute() { case Opcode::AArch64_ST1Twov16b_POST: { // st1 {vt.16b, vt2.16b}, // [xn|sp], <#imm|xm> // STORE - const uint8_t* t = sourceValues_[0].getAsVector(); - const uint8_t* t2 = sourceValues_[1].getAsVector(); + const auto t = sourceValues_[0].getAsVector(); + const auto t2 = sourceValues_[1].getAsVector(); memoryData_[0] = RegisterValue((char*)t, 16 * sizeof(uint8_t)); memoryData_[1] = RegisterValue((char*)t2, 16 * sizeof(uint8_t)); @@ -6082,8 +6076,8 @@ void Instruction::execute() { } case Opcode::AArch64_ST1Twov2d: { // st1 {vt.2d, vt2.2d}, [xn|sp] // STORE - const uint64_t* t = sourceValues_[0].getAsVector(); - const uint64_t* t2 = sourceValues_[1].getAsVector(); + const auto t = sourceValues_[0].getAsVector(); + const auto t2 = sourceValues_[1].getAsVector(); memoryData_[0] = RegisterValue((char*)t, 2 * sizeof(uint64_t)); memoryData_[1] = RegisterValue((char*)t2, 2 * sizeof(uint64_t)); break; @@ -6091,8 +6085,8 @@ void Instruction::execute() { case Opcode::AArch64_ST1Twov2d_POST: { // st1 {vt.2d, vt2.2d}, // [xn|sp], <#imm|xm> // STORE - const uint64_t* t = sourceValues_[0].getAsVector(); - const uint64_t* t2 = sourceValues_[1].getAsVector(); + const auto t = sourceValues_[0].getAsVector(); + const auto t2 = sourceValues_[1].getAsVector(); memoryData_[0] = RegisterValue((char*)t, 2 * sizeof(uint64_t)); memoryData_[1] = RegisterValue((char*)t2, 2 * sizeof(uint64_t)); @@ -6106,8 +6100,8 @@ void Instruction::execute() { } case Opcode::AArch64_ST1Twov4s: { // st1 {vt.4s, vt2.4s}, [xn|sp] // STORE - const uint32_t* t = sourceValues_[0].getAsVector(); - const uint32_t* t2 = sourceValues_[1].getAsVector(); + const auto t = sourceValues_[0].getAsVector(); + const auto t2 = sourceValues_[1].getAsVector(); memoryData_[0] = RegisterValue((char*)t, 4 * sizeof(uint32_t)); memoryData_[1] = RegisterValue((char*)t2, 4 * sizeof(uint32_t)); break; @@ -6115,8 +6109,8 @@ void Instruction::execute() { case Opcode::AArch64_ST1Twov4s_POST: { // st1 {vt.4s, vt2.4s}, // [xn|sp], <#imm|xm> // STORE - const uint32_t* t = sourceValues_[0].getAsVector(); - const uint32_t* t2 = sourceValues_[1].getAsVector(); + const auto t = sourceValues_[0].getAsVector(); + const auto t2 = sourceValues_[1].getAsVector(); memoryData_[0] = RegisterValue((char*)t, 4 * sizeof(uint32_t)); memoryData_[1] = RegisterValue((char*)t2, 4 * sizeof(uint32_t)); @@ -6130,37 +6124,37 @@ void Instruction::execute() { } case Opcode::AArch64_ST1W: { // st1w {zt.s}, pg, [xn, xm, lsl #2] // STORE - const uint32_t* d = sourceValues_[0].getAsVector(); - const uint64_t* p = sourceValues_[1].getAsVector(); + const auto d = sourceValues_[0].getAsVector(); + const auto p = sourceValues_[1].getAsVector(); memoryData_ = sve_merge_store_data(d, p, VL_bits); break; } case Opcode::AArch64_ST1W_D: { // st1w {zt.d}, pg, [xn, xm, lsl #2] // STORE - const uint64_t* d = sourceValues_[0].getAsVector(); - const uint64_t* p = sourceValues_[1].getAsVector(); + const auto d = sourceValues_[0].getAsVector(); + const auto p = sourceValues_[1].getAsVector(); memoryData_ = sve_merge_store_data(d, p, VL_bits); break; } case Opcode::AArch64_ST1W_IMM: { // st1w {zt.s}, pg, [xn{, #imm, mul vl}] // STORE - const uint32_t* d = sourceValues_[0].getAsVector(); - const uint64_t* p = sourceValues_[1].getAsVector(); + const auto d = sourceValues_[0].getAsVector(); + const auto p = sourceValues_[1].getAsVector(); memoryData_ = sve_merge_store_data(d, p, VL_bits); break; } case Opcode::AArch64_ST1i16: { // st1 {vt.h}[index], [xn] // STORE - const uint16_t* t = sourceValues_[0].getAsVector(); + const auto t = sourceValues_[0].getAsVector(); memoryData_[0] = t[metadata_.operands[0].vector_index]; break; } case Opcode::AArch64_ST1i16_POST: { // st1 {vt.h}[index], [xn], // STORE - const uint16_t* t = sourceValues_[0].getAsVector(); + const auto t = sourceValues_[0].getAsVector(); memoryData_[0] = t[metadata_.operands[0].vector_index]; // if #imm post-index, value can only be 2 const uint64_t postIndex = @@ -6172,13 +6166,13 @@ void Instruction::execute() { } case Opcode::AArch64_ST1i32: { // st1 {vt.s}[index], [xn] // STORE - const uint32_t* t = sourceValues_[0].getAsVector(); + const auto t = sourceValues_[0].getAsVector(); memoryData_[0] = t[metadata_.operands[0].vector_index]; break; } case Opcode::AArch64_ST1i32_POST: { // st1 {vt.s}[index], [xn], // STORE - const uint32_t* t = sourceValues_[0].getAsVector(); + const auto t = sourceValues_[0].getAsVector(); memoryData_[0] = t[metadata_.operands[0].vector_index]; // if #imm post-index, value can only be 4 const uint64_t postIndex = @@ -6190,13 +6184,13 @@ void Instruction::execute() { } case Opcode::AArch64_ST1i64: { // st1 {vt.d}[index], [xn] // STORE - const uint64_t* t = sourceValues_[0].getAsVector(); + const auto t = sourceValues_[0].getAsVector(); memoryData_[0] = t[metadata_.operands[0].vector_index]; break; } case Opcode::AArch64_ST1i64_POST: { // st1 {vt.d}[index], [xn], // STORE - const uint64_t* t = sourceValues_[0].getAsVector(); + const auto t = sourceValues_[0].getAsVector(); memoryData_[0] = t[metadata_.operands[0].vector_index]; // if #imm post-index, value can only be 8 const uint64_t postIndex = @@ -6208,13 +6202,13 @@ void Instruction::execute() { } case Opcode::AArch64_ST1i8: { // st1 {vt.b}[index], [xn] // STORE - const uint8_t* t = sourceValues_[0].getAsVector(); + const auto t = sourceValues_[0].getAsVector(); memoryData_[0] = t[metadata_.operands[0].vector_index]; break; } case Opcode::AArch64_ST1i8_POST: { // st1 {vt.b}[index], [xn], // STORE - const uint8_t* t = sourceValues_[0].getAsVector(); + const auto t = sourceValues_[0].getAsVector(); memoryData_[0] = t[metadata_.operands[0].vector_index]; // if #imm post-index, value can only be 1 const uint64_t postIndex = @@ -6227,9 +6221,9 @@ void Instruction::execute() { case Opcode::AArch64_ST2D_IMM: { // st2d {zt1.d, zt2.d}, pg, [{, // #imm, mul vl}] // STORE - const uint64_t* d1 = sourceValues_[0].getAsVector(); - const uint64_t* d2 = sourceValues_[1].getAsVector(); - const uint64_t* p = sourceValues_[2].getAsVector(); + const auto d1 = sourceValues_[0].getAsVector(); + const auto d2 = sourceValues_[1].getAsVector(); + const auto p = sourceValues_[2].getAsVector(); std::vector memData; bool inActiveBlock = false; @@ -6263,8 +6257,8 @@ void Instruction::execute() { case Opcode::AArch64_ST2Twov4s_POST: { // st2 {vt1.4s, vt2.4s}, [xn], // // STORE - const float* t1 = sourceValues_[0].getAsVector(); - const float* t2 = sourceValues_[1].getAsVector(); + const auto t1 = sourceValues_[0].getAsVector(); + const auto t2 = sourceValues_[1].getAsVector(); std::vector m1 = {t1[0], t2[0], t1[1], t2[1]}; std::vector m2 = {t1[2], t2[2], t1[3], t2[3]}; memoryData_[0] = RegisterValue((char*)m1.data(), 4 * sizeof(float)); @@ -6472,7 +6466,7 @@ void Instruction::execute() { // STORE const uint64_t PL_bits = VL_bits / 8; const uint16_t partition_num = PL_bits / 8; - const uint8_t* p = sourceValues_[0].getAsVector(); + const auto p = sourceValues_[0].getAsVector(); memoryData_[0] = RegisterValue((char*)p, partition_num); break; } @@ -6494,7 +6488,7 @@ void Instruction::execute() { case Opcode::AArch64_STR_ZXI: { // str zt, [xn{, #imm, mul vl}] // STORE const uint16_t partition_num = VL_bits / 8; - const uint8_t* z = sourceValues_[0].getAsVector(); + const auto z = sourceValues_[0].getAsVector(); memoryData_[0] = RegisterValue((char*)z, partition_num); break; } @@ -7232,19 +7226,19 @@ void Instruction::execute() { } case Opcode::AArch64_UMOVvi32_idx0: // umov wd, vn.s[0] case Opcode::AArch64_UMOVvi32: { // umov wd, vn.s[index] - const uint32_t* vec = sourceValues_[0].getAsVector(); + const auto vec = sourceValues_[0].getAsVector(); results_[0] = {vec[metadata_.operands[1].vector_index], 8}; break; } case Opcode::AArch64_UMOVvi64_idx0: // umov xd, vn.d[0] case Opcode::AArch64_UMOVvi64: { // umov xd, vn.d[index] - const uint64_t* vec = sourceValues_[0].getAsVector(); + const auto vec = sourceValues_[0].getAsVector(); results_[0] = vec[metadata_.operands[1].vector_index]; break; } case Opcode::AArch64_UMOVvi8_idx0: // umov wd, vn.b[0] case Opcode::AArch64_UMOVvi8: { // umov wd, vn.b[index] - const uint8_t* vec = sourceValues_[0].getAsVector(); + const auto vec = sourceValues_[0].getAsVector(); results_[0] = {vec[metadata_.operands[1].vector_index], 8}; break; } diff --git a/src/lib/arch/riscv/ExceptionHandler.cc b/src/lib/arch/riscv/ExceptionHandler.cc index 423c7d1435..782ddf6cf2 100644 --- a/src/lib/arch/riscv/ExceptionHandler.cc +++ b/src/lib/arch/riscv/ExceptionHandler.cc @@ -46,8 +46,8 @@ bool ExceptionHandler::init() { uint8_t outSize = static_cast(out.size()); stateChange = {ChangeType::REPLACEMENT, {R0}, {retval}}; stateChange.memoryAddresses.push_back({argp, outSize}); - stateChange.memoryAddressValues.push_back( - RegisterValue(reinterpret_cast(out.data()), outSize)); + stateChange.memoryAddressValues.push_back(RegisterValue( + reinterpret_cast(out.data()), outSize)); break; } case 46: { // ftruncate @@ -118,7 +118,7 @@ bool ExceptionHandler::init() { // Get pointer and size of the buffer uint64_t iDst = bufPtr; // Write data for this buffer in 128-byte chunks - auto iSrc = reinterpret_cast(dataBuffer_.data()); + auto iSrc = reinterpret_cast(dataBuffer_.data()); while (totalRead > 0) { uint8_t len = totalRead > 128 ? 128 : static_cast(totalRead); @@ -158,7 +158,7 @@ bool ExceptionHandler::init() { uint64_t iLength = static_cast(totalRead); // Write data for this buffer in 128-byte chunks - auto iSrc = reinterpret_cast(dataBuffer_.data()); + auto iSrc = reinterpret_cast(dataBuffer_.data()); while (iLength > 0) { uint8_t len = iLength > 128 ? 128 : static_cast(iLength); stateChange.memoryAddresses.push_back({iDst, len}); @@ -237,7 +237,7 @@ bool ExceptionHandler::init() { bytesRemaining -= iLength; // Write data for this buffer in 128-byte chunks - auto iSrc = reinterpret_cast(buffers[i].data()); + auto iSrc = reinterpret_cast(buffers[i].data()); while (iLength > 0) { uint8_t len = iLength > 128 ? 128 : static_cast(iLength); stateChange.memoryAddresses.push_back({iDst, len}); @@ -611,7 +611,7 @@ bool ExceptionHandler::init() { uint64_t bufPtr = registerFileSet.get(R0).get(); size_t buflen = registerFileSet.get(R1).get(); - std::vector buf; + std::vector buf; for (size_t i = 0; i < buflen; i++) { buf.push_back((uint8_t)rand()); } @@ -778,8 +778,10 @@ void ExceptionHandler::readLinkAt(span path) { const auto bufAddress = registerFileSet.get(R2).get(); const auto bufSize = registerFileSet.get(R3).get(); - char buffer[kernel::Linux::LINUX_PATH_MAX]; - auto result = linux_.readlinkat(dirfd, path.data(), buffer, bufSize); + uint8_t buffer[kernel::Linux::LINUX_PATH_MAX]; + // TODO check if reinterpret cast is dangerous here + auto result = linux_.readlinkat(dirfd, path.data(), + reinterpret_cast(buffer), bufSize); if (result < 0) { // TODO: Handle error case @@ -794,7 +796,7 @@ void ExceptionHandler::readLinkAt(span path) { ProcessStateChange stateChange = {ChangeType::REPLACEMENT, {R0}, {result}}; // Slice the returned path into <256-byte chunks for writing - const char* bufPtr = buffer; + const uint8_t* bufPtr = buffer; for (size_t i = 0; i < bytesCopied; i += 256) { uint8_t size = std::min(bytesCopied - i, 256ul); stateChange.memoryAddresses.push_back({bufAddress + i, size}); @@ -836,7 +838,7 @@ bool ExceptionHandler::readBufferThen(uint64_t ptr, uint64_t length, // Append data to buffer assert(response->data && "unhandled failed read in exception handler"); uint8_t bytesRead = response->target.size; - const uint8_t* data = response->data.getAsVector(); + const uint8_t* data = response->data.getAsVector().ptr; dataBuffer_.insert(dataBuffer_.end(), data, data + bytesRead); memory_.clearCompletedReads(); diff --git a/src/lib/arch/riscv/Instruction_decode.cc b/src/lib/arch/riscv/Instruction_decode.cc index e8145d4c11..2c879a2e40 100644 --- a/src/lib/arch/riscv/Instruction_decode.cc +++ b/src/lib/arch/riscv/Instruction_decode.cc @@ -152,7 +152,7 @@ void Instruction::decode() { if (sourceRegisters_[sourceRegisterCount_] == RegisterType::ZERO_REGISTER) { // Catch zero register references and pre-complete those operands - sourceValues_[sourceRegisterCount_] = RegisterValue(0, 8); + sourceValues_[sourceRegisterCount_] = RegisterValue(0ull, 8); } else { sourceOperandsPending_++; } @@ -193,7 +193,7 @@ void Instruction::decode() { if (sourceRegisters_[sourceRegisterCount_] == RegisterType::ZERO_REGISTER) { // Catch zero register references and pre-complete those operands - sourceValues_[sourceRegisterCount_] = RegisterValue(0, 8); + sourceValues_[sourceRegisterCount_] = RegisterValue(0ull, 8); } else { sourceOperandsPending_++; } diff --git a/src/lib/memory/FixedLatencyMemoryInterface.cc b/src/lib/memory/FixedLatencyMemoryInterface.cc index ee33ce7357..9d73940da9 100644 --- a/src/lib/memory/FixedLatencyMemoryInterface.cc +++ b/src/lib/memory/FixedLatencyMemoryInterface.cc @@ -6,7 +6,7 @@ namespace simeng { namespace memory { -FixedLatencyMemoryInterface::FixedLatencyMemoryInterface(char* memory, +FixedLatencyMemoryInterface::FixedLatencyMemoryInterface(uint8_t* memory, size_t size, uint16_t latency) : memory_(memory), size_(size), latency_(latency) {} @@ -35,7 +35,7 @@ void FixedLatencyMemoryInterface::tick() { auto ptr = memory_ + target.address; // Copy the data from the RegisterValue to memory - memcpy(ptr, request.data.getAsVector(), target.size); + memcpy(ptr, request.data.getAsVector().ptr, target.size); } else { // Read: read data into `completedReads` if (target.address + target.size > size_ || @@ -43,7 +43,7 @@ void FixedLatencyMemoryInterface::tick() { // Read outside of memory; return an invalid value to signal a fault completedReads_.push_back({target, RegisterValue(), request.requestId}); } else { - const char* ptr = memory_ + target.address; + const uint8_t* ptr = memory_ + target.address; // Copy the data at the requested memory address into a RegisterValue completedReads_.push_back( diff --git a/src/lib/memory/FlatMemoryInterface.cc b/src/lib/memory/FlatMemoryInterface.cc index 4d46db8781..b9524f54df 100644 --- a/src/lib/memory/FlatMemoryInterface.cc +++ b/src/lib/memory/FlatMemoryInterface.cc @@ -6,7 +6,7 @@ namespace simeng { namespace memory { -FlatMemoryInterface::FlatMemoryInterface(char* memory, size_t size) +FlatMemoryInterface::FlatMemoryInterface(uint8_t* memory, size_t size) : memory_(memory), size_(size) {} void FlatMemoryInterface::requestRead(const MemoryAccessTarget& target, @@ -17,7 +17,7 @@ void FlatMemoryInterface::requestRead(const MemoryAccessTarget& target, return; } - const char* ptr = memory_ + target.address; + const uint8_t* ptr = memory_ + target.address; // Copy the data at the requested memory address into a RegisterValue completedReads_.push_back( @@ -35,7 +35,7 @@ void FlatMemoryInterface::requestWrite(const MemoryAccessTarget& target, auto ptr = memory_ + target.address; // Copy the data from the RegisterValue to memory - memcpy(ptr, data.getAsVector(), target.size); + memcpy(ptr, data.getAsVector().ptr, target.size); } const span FlatMemoryInterface::getCompletedReads() const { diff --git a/src/lib/models/emulation/Core.cc b/src/lib/models/emulation/Core.cc index bf0129b5ee..a2a27663c7 100644 --- a/src/lib/models/emulation/Core.cc +++ b/src/lib/models/emulation/Core.cc @@ -54,7 +54,7 @@ void Core::tick() { // complete reads const auto& instructionBytes = instructionMemory_.getCompletedReads()[0].data; // Predecode fetched data - auto bytesRead = isa_.predecode(instructionBytes.getAsVector(), + auto bytesRead = isa_.predecode(instructionBytes.getAsVector().ptr, FETCH_SIZE, pc_, macroOp_); // Clear the fetched data instructionMemory_.clearCompletedReads(); diff --git a/src/lib/pipeline/FetchUnit.cc b/src/lib/pipeline/FetchUnit.cc index ea59f1a071..8294d6f5e2 100644 --- a/src/lib/pipeline/FetchUnit.cc +++ b/src/lib/pipeline/FetchUnit.cc @@ -113,7 +113,7 @@ void FetchUnit::tick() { // TODO: Handle memory faults assert(fetched[fetchIndex].data && "Memory read failed"); const uint8_t* fetchData = - fetched[fetchIndex].data.getAsVector(); + fetched[fetchIndex].data.getAsVector().ptr; // Copy fetched data to fetch buffer after existing data std::memcpy(fetchBuffer_ + bufferedBytes_, fetchData + bufferOffset, diff --git a/sst/SimEngMemInterface.cc b/sst/SimEngMemInterface.cc index fd3caa0b7d..99ecc58b47 100644 --- a/sst/SimEngMemInterface.cc +++ b/sst/SimEngMemInterface.cc @@ -102,7 +102,7 @@ std::vector SimEngMemInterface::splitAggregatedRequest( // Fill the payload vector currReqSize number of bytes starting // and inclusive of the dataIndex. - const char* data = aggrReq->data.getAsVector(); + const uint8_t* data = aggrReq->data.getAsVector().ptr; memcpy((void*)&payload[0], &(data[dataIndex]), currReqSize); StandardMem::Request* writeReq = new StandardMem::Write(addrStart, currReqSize, payload); diff --git a/test/regression/RegressionTest.hh b/test/regression/RegressionTest.hh index 661584cd43..9dfc94ad30 100644 --- a/test/regression/RegressionTest.hh +++ b/test/regression/RegressionTest.hh @@ -104,7 +104,7 @@ class RegressionTest /** Get a pointer to the value of an architectural vector register. */ template - const T* getVectorRegister(simeng::Register reg) const { + simeng::safePointer getVectorRegister(simeng::Register reg) const { return core_->getArchitecturalRegisterFileSet().get(reg).getAsVector(); } diff --git a/test/regression/aarch64/AArch64RegressionTest.hh b/test/regression/aarch64/AArch64RegressionTest.hh index 32d975b09d..013be94968 100644 --- a/test/regression/aarch64/AArch64RegressionTest.hh +++ b/test/regression/aarch64/AArch64RegressionTest.hh @@ -256,7 +256,7 @@ class AArch64RegressionTest : public RegressionTest { template void checkNeonRegister(uint8_t tag, const std::array& values) const { - const T* data = RegressionTest::getVectorRegister( + const auto data = RegressionTest::getVectorRegister( {simeng::arch::aarch64::RegisterType::VECTOR, tag}); for (unsigned i = 0; i < (256 / sizeof(T)); i++) { EXPECT_NEAR(data[i], values[i], 0.0005) @@ -272,7 +272,7 @@ class AArch64RegressionTest : public RegressionTest { template void checkPredicateRegister( uint8_t tag, const std::array& values) const { - const T* data = RegressionTest::getVectorRegister( + const auto data = RegressionTest::getVectorRegister( {simeng::arch::aarch64::RegisterType::PREDICATE, tag}); for (unsigned i = 0; i < (32 / sizeof(T)); i++) { EXPECT_NEAR(data[i], values[i], 0.0005) @@ -312,7 +312,7 @@ class AArch64RegressionTest : public RegressionTest { } uint16_t reg_tag = base + (index * tileTypeCount); - const T* data = getMatrixRegisterRow(reg_tag); + const auto data = getMatrixRegisterRow(reg_tag); for (unsigned i = 0; i < (256 / sizeof(T)); i++) { EXPECT_NEAR(data[i], values[i], 0.0005) << "Mismatch for element " << i << "."; @@ -383,7 +383,7 @@ class AArch64RegressionTest : public RegressionTest { /** Get a pointer to the value of an architectural matrix register row. */ template - const T* getMatrixRegisterRow(uint16_t tag) const { + const simeng::safePointer getMatrixRegisterRow(uint16_t tag) const { return RegressionTest::getVectorRegister( {simeng::arch::aarch64::RegisterType::MATRIX, tag}); } diff --git a/test/unit/RegisterValueTest.cc b/test/unit/RegisterValueTest.cc index 034d6d8766..e382b63d22 100644 --- a/test/unit/RegisterValueTest.cc +++ b/test/unit/RegisterValueTest.cc @@ -40,7 +40,7 @@ TEST(RegisterValueTest, Reinterpret) { TEST(RegisterValueTest, Vector) { uint64_t value = 0x0000000200000001; auto registerValue = simeng::RegisterValue(value, 8); - const uint32_t* vector = registerValue.getAsVector(); + const auto vector = registerValue.getAsVector(); EXPECT_EQ(vector[0], 1); EXPECT_EQ(vector[1], 2); } From 71e32dd4f8afcf79c42d86b6bb59998477c013b2 Mon Sep 17 00:00:00 2001 From: dANW34V3R Date: Mon, 12 May 2025 17:20:28 +0100 Subject: [PATCH 06/16] Temp commit for rebase --- .../simeng/arch/aarch64/helpers/neon.hh | 18 +++++++++--------- src/lib/arch/aarch64/Instruction_execute.cc | 16 ++++++++-------- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/src/include/simeng/arch/aarch64/helpers/neon.hh b/src/include/simeng/arch/aarch64/helpers/neon.hh index 7ee2ac1c6c..2411c3a8af 100644 --- a/src/include/simeng/arch/aarch64/helpers/neon.hh +++ b/src/include/simeng/arch/aarch64/helpers/neon.hh @@ -87,9 +87,9 @@ RegisterValue vecBicShift_imm( * Returns correctly formatted RegisterValue. */ template RegisterValue vecBitwiseInsert(srcValContainer& sourceValues, bool isBif) { - const uint64_t* d = sourceValues[0].getAsVector(); - const uint64_t* n = sourceValues[1].getAsVector(); - const uint64_t* m = sourceValues[2].getAsVector(); + const safePointer d = sourceValues[0].getAsVector(); + const safePointer n = sourceValues[1].getAsVector(); + const safePointer m = sourceValues[2].getAsVector(); uint64_t out[2] = {0}; for (int i = 0; i < (I / 8); i++) { out[i] = @@ -105,9 +105,9 @@ RegisterValue vecBitwiseInsert(srcValContainer& sourceValues, bool isBif) { * Returns correctly formatted RegisterValue. */ template RegisterValue vecBsl(srcValContainer& sourceValues) { - const uint64_t* d = sourceValues[0].getAsVector(); - const uint64_t* n = sourceValues[1].getAsVector(); - const uint64_t* m = sourceValues[2].getAsVector(); + const safePointer d = sourceValues[0].getAsVector(); + const safePointer n = sourceValues[1].getAsVector(); + const safePointer m = sourceValues[2].getAsVector(); uint64_t out[2] = {0}; for (int i = 0; i < (I / 8); i++) { out[i] = (d[i] & n[i]) | (~d[i] & m[i]); @@ -143,7 +143,7 @@ RegisterValue vecCompare(srcValContainer& sourceValues, bool cmpToZero, * Returns correctly formatted RegisterValue. */ template RegisterValue vecCountPerByte(srcValContainer& sourceValues) { - const uint8_t* n = sourceValues[0].getAsVector(); + const safePointer n = sourceValues[0].getAsVector(); T out[16 / sizeof(T)] = {0}; for (int i = 0; i < I; i++) { for (size_t j = 0; j < (sizeof(T) * 8); j++) { @@ -833,7 +833,7 @@ RegisterValue vecTbl( assert(I == 8 || I == 16); // Vm contains the indices to fetch from table - const uint8_t* Vm = + const safePointer Vm = sourceValues[metadata.operandCount - 2] .getAsVector(); // final operand is vecMovi_imm @@ -845,7 +845,7 @@ RegisterValue vecTbl( const uint16_t tableSize = 16 * n_table_regs; std::vector table(tableSize, 0); for (uint8_t i = 0; i < n_table_regs; i++) { - const uint8_t* currentVector = sourceValues[i].getAsVector(); + const safePointer currentVector = sourceValues[i].getAsVector(); for (uint8_t j = 0; j < 16; j++) { table[16 * i + j] = currentVector[j]; } diff --git a/src/lib/arch/aarch64/Instruction_execute.cc b/src/lib/arch/aarch64/Instruction_execute.cc index f4cc21c727..d4c3a33093 100644 --- a/src/lib/arch/aarch64/Instruction_execute.cc +++ b/src/lib/arch/aarch64/Instruction_execute.cc @@ -5996,8 +5996,8 @@ void Instruction::execute() { // vt4.2d}, [xn|sp], <#imm|xm> // STORE for (int i = 0; i < 4; i++) { - memoryData_[i] = auto RegisterValue( - (char*)sourceValues_[i].getAsVector(), + memoryData_[i] = RegisterValue( + sourceValues_[i].getAsVector().ptr, 2 * sizeof(uint64_t)); } // if #imm post-index, value can only be 64 @@ -6012,8 +6012,8 @@ void Instruction::execute() { // vt4.2s}, [xn|sp], <#imm|xm> // STORE for (int i = 0; i < 4; i++) { - memoryData_[i] = auto RegisterValue( - (char*)sourceValues_[i].getAsVector(), + memoryData_[i] = RegisterValue( + sourceValues_[i].getAsVector().ptr, 2 * sizeof(uint32_t)); } // if #imm post-index, value can only be 32 @@ -6028,8 +6028,8 @@ void Instruction::execute() { // vt4.4s}, [xn|sp] // STORE for (int i = 0; i < 4; i++) { - memoryData_[i] = auto RegisterValue( - (char*)sourceValues_[i].getAsVector(), + memoryData_[i] = RegisterValue( + sourceValues_[i].getAsVector().ptr, 4 * sizeof(uint32_t)); } break; @@ -6038,8 +6038,8 @@ void Instruction::execute() { // vt4.4s}, [xn|sp], <#imm|xm> // STORE for (int i = 0; i < 4; i++) { - memoryData_[i] = auto RegisterValue( - (char*)sourceValues_[i].getAsVector(), + memoryData_[i] = RegisterValue( + sourceValues_[i].getAsVector().ptr, 4 * sizeof(uint32_t)); } // if #imm post-index, value can only be 64 From 39b576ee36a419a633f18848c62e151a4253ea00 Mon Sep 17 00:00:00 2001 From: dANW34V3R Date: Tue, 13 May 2025 12:11:27 +0100 Subject: [PATCH 07/16] Save progress --- src/include/simeng/CoreInstance.hh | 4 +- src/include/simeng/Elf.hh | 2 +- .../simeng/arch/aarch64/helpers/neon.hh | 149 +++-- .../simeng/arch/aarch64/helpers/sve.hh | 305 +++++---- src/include/simeng/kernel/LinuxProcess.hh | 6 +- src/lib/CoreInstance.cc | 2 +- src/lib/Elf.cc | 7 +- src/lib/arch/aarch64/Instruction_address.cc | 6 +- src/lib/arch/aarch64/Instruction_execute.cc | 584 +++++++++--------- src/lib/kernel/Linux.cc | 3 + src/lib/kernel/LinuxProcess.cc | 22 +- test/regression/RegressionTest.cc | 2 +- test/regression/RegressionTest.hh | 2 +- test/regression/aarch64/Syscall.cc | 8 +- test/unit/FixedLatencyMemoryInterfaceTest.cc | 4 +- test/unit/FlatMemoryInterfaceTest.cc | 5 +- 16 files changed, 535 insertions(+), 576 deletions(-) diff --git a/src/include/simeng/CoreInstance.hh b/src/include/simeng/CoreInstance.hh index 2cc739f3f9..b2c75ca4b7 100644 --- a/src/include/simeng/CoreInstance.hh +++ b/src/include/simeng/CoreInstance.hh @@ -60,7 +60,7 @@ class CoreInstance { std::shared_ptr getInstructionMemory() const; /** Getter for a shared pointer to the created process image. */ - std::shared_ptr getProcessImage() const; + std::shared_ptr getProcessImage() const; /** Getter for the size of the created process image. */ uint64_t getProcessImageSize() const; @@ -114,7 +114,7 @@ class CoreInstance { uint64_t processMemorySize_; /** The process memory space. */ - std::shared_ptr processMemory_; + std::shared_ptr processMemory_; /** Whether or not the dataMemory_ must be set manually. */ bool setDataMemory_ = false; diff --git a/src/include/simeng/Elf.hh b/src/include/simeng/Elf.hh index b02e56e9e7..7a7101160e 100644 --- a/src/include/simeng/Elf.hh +++ b/src/include/simeng/Elf.hh @@ -49,7 +49,7 @@ struct Elf64_Phdr { /** A processed Executable and Linkable Format (ELF) file. */ class Elf { public: - Elf(std::string path, char** imagePointer); + Elf(std::string path, uint8_t** imagePointer); ~Elf(); /** Returns the process image size */ diff --git a/src/include/simeng/arch/aarch64/helpers/neon.hh b/src/include/simeng/arch/aarch64/helpers/neon.hh index 2411c3a8af..60d4f98f95 100644 --- a/src/include/simeng/arch/aarch64/helpers/neon.hh +++ b/src/include/simeng/arch/aarch64/helpers/neon.hh @@ -13,8 +13,8 @@ namespace aarch64 { * Returns correctly formatted Register Value. */ template RegisterValue vecAdd_3ops(srcValContainer& sourceValues) { - const T* n = sourceValues[0].getAsVector(); - const T* m = sourceValues[1].getAsVector(); + const safePointer n = sourceValues[0].getAsVector(); + const safePointer m = sourceValues[1].getAsVector(); T out[16 / sizeof(T)] = {0}; for (int i = 0; i < I; i++) { out[i] = static_cast(n[i] + m[i]); @@ -29,8 +29,8 @@ RegisterValue vecAdd_3ops(srcValContainer& sourceValues) { * Returns correctly formatted Register Value. */ template RegisterValue vecAddp_3ops(srcValContainer& sourceValues) { - const T* n = sourceValues[0].getAsVector(); - const T* m = sourceValues[1].getAsVector(); + const safePointer n = sourceValues[0].getAsVector(); + const safePointer m = sourceValues[1].getAsVector(); T out[16 / sizeof(T)] = {0}; uint8_t offset = I / 2; for (int i = 0; i < I; i++) { @@ -50,8 +50,8 @@ RegisterValue vecAddp_3ops(srcValContainer& sourceValues) { * Returns correctly formatted Register Value. */ template RegisterValue vecBic_3ops(srcValContainer& sourceValues) { - const T* n = sourceValues[0].getAsVector(); - const T* m = sourceValues[1].getAsVector(); + const safePointer n = sourceValues[0].getAsVector(); + const safePointer m = sourceValues[1].getAsVector(); T out[16 / sizeof(T)] = {0}; for (int i = 0; i < I; i++) { out[i] = n[i] & ~m[i]; @@ -69,7 +69,7 @@ template RegisterValue vecBicShift_imm( srcValContainer& sourceValues, const simeng::arch::aarch64::InstructionMetadata& metadata) { - const T* d = sourceValues[0].getAsVector(); + const safePointer d = sourceValues[0].getAsVector(); T imm = ~shiftValue(static_cast(metadata.operands[1].imm), metadata.operands[1].shift.type, metadata.operands[1].shift.value); @@ -124,9 +124,8 @@ RegisterValue vecBsl(srcValContainer& sourceValues) { template RegisterValue vecCompare(srcValContainer& sourceValues, bool cmpToZero, std::function func) { - const T* n = sourceValues[0].getAsVector(); - const T* m; - if (!cmpToZero) m = sourceValues[1].getAsVector(); + const safePointer n = sourceValues[0].getAsVector(); + const safePointer m = !cmpToZero ? sourceValues[1].getAsVector() : safePointer(); T out[16 / sizeof(T)] = {0}; for (int i = 0; i < I; i++) { out[i] = func(n[i], cmpToZero ? static_cast(0) : m[i]) @@ -182,8 +181,8 @@ template RegisterValue vecExtVecs_index( srcValContainer& sourceValues, const simeng::arch::aarch64::InstructionMetadata& metadata) { - const T* n = sourceValues[0].getAsVector(); - const T* m = sourceValues[1].getAsVector(); + const safePointer n = sourceValues[0].getAsVector(); + const safePointer m = sourceValues[1].getAsVector(); const uint64_t index = static_cast(metadata.operands[3].imm); T out[16 / sizeof(T)] = {0}; @@ -204,8 +203,8 @@ RegisterValue vecExtVecs_index( * Returns correctly formatted RegisterValue. */ template RegisterValue vecFabd(srcValContainer& sourceValues) { - const T* n = sourceValues[0].getAsVector(); - const T* m = sourceValues[1].getAsVector(); + const safePointer n = sourceValues[0].getAsVector(); + const safePointer m = sourceValues[1].getAsVector(); T out[16 / sizeof(T)] = {0}; for (int i = 0; i < I; i++) { out[i] = std::fabs(n[i] - m[i]); @@ -220,7 +219,7 @@ RegisterValue vecFabd(srcValContainer& sourceValues) { * Returns correctly formatted RegisterValue. */ template RegisterValue vecFabs_2ops(srcValContainer& sourceValues) { - const T* n = sourceValues[0].getAsVector(); + const safePointer n = sourceValues[0].getAsVector(); T out[16 / sizeof(T)] = {0}; for (int i = 0; i < I; i++) { out[i] = std::fabs(n[i]); @@ -239,9 +238,8 @@ RegisterValue vecFabs_2ops(srcValContainer& sourceValues) { template RegisterValue vecFCompare(srcValContainer& sourceValues, bool cmpToZero, std::function func) { - const T* n = sourceValues[0].getAsVector(); - const T* m; - if (!cmpToZero) m = sourceValues[1].getAsVector(); + const safePointer n = sourceValues[0].getAsVector(); + const safePointer m = !cmpToZero ? sourceValues[1].getAsVector() : safePointer(); C out[16 / sizeof(C)] = {0}; for (int i = 0; i < I; i++) { out[i] = func(n[i], cmpToZero ? static_cast(0) : m[i]) @@ -259,7 +257,7 @@ RegisterValue vecFCompare(srcValContainer& sourceValues, bool cmpToZero, * Returns correctly formatted RegisterValue. */ template RegisterValue vecFcvtl(srcValContainer& sourceValues, bool isFcvtl2) { - const N* n = sourceValues[0].getAsVector(); + const safePointer n = sourceValues[0].getAsVector(); D out[16 / sizeof(D)] = {0}; for (int i = (isFcvtl2 ? I : 0); i < (isFcvtl2 ? (I * 2) : I); i++) { out[isFcvtl2 ? (i - I) : i] = static_cast(n[i]); @@ -275,7 +273,7 @@ RegisterValue vecFcvtl(srcValContainer& sourceValues, bool isFcvtl2) { * Returns correctly formatted RegisterValue. */ template RegisterValue vecFcvtn(srcValContainer& sourceValues, bool isFcvtn2) { - const N* n = sourceValues[0].getAsVector(); + const safePointer n = sourceValues[0].getAsVector(); D out[16 / sizeof(D)] = {0}; for (int i = (isFcvtn2 ? (I / 2) : 0); i < I; i++) { out[i] = static_cast(n[isFcvtn2 ? (i - (I / 2)) : i]); @@ -291,7 +289,7 @@ RegisterValue vecFcvtn(srcValContainer& sourceValues, bool isFcvtn2) { * Returns correctly formatted RegisterValue. */ template RegisterValue vecFcvtzs(srcValContainer& sourceValues) { - const N* n = sourceValues[0].getAsVector(); + const safePointer n = sourceValues[0].getAsVector(); D out[16 / sizeof(D)] = {0}; // TODO: Handle NaNs, denorms, and saturation for (int i = 0; i < I; i++) { @@ -308,9 +306,9 @@ RegisterValue vecFcvtzs(srcValContainer& sourceValues) { * Returns correctly formatted RegisterValue. */ template RegisterValue vecFmla_3vecs(srcValContainer& sourceValues) { - const T* d = sourceValues[0].getAsVector(); - const T* n = sourceValues[1].getAsVector(); - const T* m = sourceValues[2].getAsVector(); + const safePointer d = sourceValues[0].getAsVector(); + const safePointer n = sourceValues[1].getAsVector(); + const safePointer m = sourceValues[2].getAsVector(); T out[16 / sizeof(T)] = {0}; for (int i = 0; i < I; i++) { out[i] = d[i] + n[i] * m[i]; @@ -326,8 +324,8 @@ RegisterValue vecFmla_3vecs(srcValContainer& sourceValues) { template std::enable_if_t, RegisterValue> vecFDiv( srcValContainer& sourceValues) { - const T* n = sourceValues[0].getAsVector(); - const T* m = sourceValues[1].getAsVector(); + const safePointer n = sourceValues[0].getAsVector(); + const safePointer m = sourceValues[1].getAsVector(); T out[16 / sizeof(T)] = {0}; for (int i = 0; i < I; i++) { if (m[i] == 0) @@ -348,8 +346,8 @@ template RegisterValue vecFmlaIndexed_3vecs( srcValContainer& sourceValues, const simeng::arch::aarch64::InstructionMetadata& metadata) { - const T* d = sourceValues[0].getAsVector(); - const T* n = sourceValues[1].getAsVector(); + const safePointer d = sourceValues[0].getAsVector(); + const safePointer n = sourceValues[1].getAsVector(); int index = metadata.operands[2].vector_index; const T m = sourceValues[2].getAsVector()[index]; T out[16 / sizeof(T)] = {0}; @@ -367,9 +365,9 @@ RegisterValue vecFmlaIndexed_3vecs( * Returns correctly formatted RegisterValue. */ template RegisterValue vecFmls_3vecs(srcValContainer& sourceValues) { - const T* d = sourceValues[0].getAsVector(); - const T* n = sourceValues[1].getAsVector(); - const T* m = sourceValues[2].getAsVector(); + const safePointer d = sourceValues[0].getAsVector(); + const safePointer n = sourceValues[1].getAsVector(); + const safePointer m = sourceValues[2].getAsVector(); T out[16 / sizeof(T)] = {0}; for (int i = 0; i < I; i++) { out[i] = d[i] - (n[i] * m[i]); @@ -387,8 +385,8 @@ template RegisterValue vecFmlsIndexed_3vecs( srcValContainer& sourceValues, const simeng::arch::aarch64::InstructionMetadata& metadata) { - const T* d = sourceValues[0].getAsVector(); - const T* n = sourceValues[1].getAsVector(); + const safePointer d = sourceValues[0].getAsVector(); + const safePointer n = sourceValues[1].getAsVector(); int index = metadata.operands[2].vector_index; const T m = sourceValues[2].getAsVector()[index]; T out[16 / sizeof(T)] = {0}; @@ -409,7 +407,7 @@ RegisterValue vecFmulIndexed_vecs( srcValContainer& sourceValues, const simeng::arch::aarch64::InstructionMetadata& metadata) { int index = metadata.operands[2].vector_index; - const T* n = sourceValues[0].getAsVector(); + const safePointer n = sourceValues[0].getAsVector(); const T m = sourceValues[1].getAsVector()[index]; T out[16 / sizeof(T)] = {0}; for (int i = 0; i < I; i++) { @@ -425,7 +423,7 @@ RegisterValue vecFmulIndexed_vecs( * Returns correctly formatted RegisterValue. */ template RegisterValue vecFneg_2ops(srcValContainer& sourceValues) { - const T* n = sourceValues[0].getAsVector(); + const safePointer n = sourceValues[0].getAsVector(); T out[16 / sizeof(T)] = {0}; for (int i = 0; i < I; i++) { out[i] = -n[i]; @@ -440,7 +438,7 @@ RegisterValue vecFneg_2ops(srcValContainer& sourceValues) { * Returns correctly formatted RegisterValue. */ template RegisterValue vecFsqrt_2ops(srcValContainer& sourceValues) { - const T* n = sourceValues[0].getAsVector(); + const safePointer n = sourceValues[0].getAsVector(); T out[16 / sizeof(T)] = {0}; for (int i = 0; i < I; i++) { out[i] = ::sqrt(n[i]); @@ -455,7 +453,7 @@ RegisterValue vecFsqrt_2ops(srcValContainer& sourceValues) { * Returns correctly formatted RegisterValue. */ template RegisterValue vecFrsqrte_2ops(srcValContainer& sourceValues) { - const T* n = sourceValues[0].getAsVector(); + const safePointer n = sourceValues[0].getAsVector(); T out[16 / sizeof(T)] = {0}; for (int i = 0; i < I; i++) { out[i] = 1.0f / sqrtf(n[i]); @@ -471,8 +469,8 @@ RegisterValue vecFrsqrte_2ops(srcValContainer& sourceValues) { * Returns correctly formatted RegisterValue. */ template RegisterValue vecFrsqrts_3ops(srcValContainer& sourceValues) { - const T* n = sourceValues[0].getAsVector(); - const T* m = sourceValues[1].getAsVector(); + const safePointer n = sourceValues[0].getAsVector(); + const safePointer m = sourceValues[1].getAsVector(); T out[16 / sizeof(T)] = {0}; for (int i = 0; i < I; i++) { out[i] = (3.0f - n[i] * m[i]) / 2.0f; @@ -490,8 +488,8 @@ template RegisterValue vecIns_2Index( srcValContainer& sourceValues, const simeng::arch::aarch64::InstructionMetadata& metadata) { - const T* d = sourceValues[0].getAsVector(); - const T* n = sourceValues[1].getAsVector(); + const safePointer d = sourceValues[0].getAsVector(); + const safePointer n = sourceValues[1].getAsVector(); T out[16 / sizeof(T)] = {0}; for (int i = 0; i < I; i++) { @@ -512,7 +510,7 @@ template RegisterValue vecInsIndex_gpr( srcValContainer& sourceValues, const simeng::arch::aarch64::InstructionMetadata& metadata) { - const T* d = sourceValues[0].getAsVector(); + const safePointer d = sourceValues[0].getAsVector(); const T n = sourceValues[1].get(); T out[16 / sizeof(T)] = {0}; @@ -532,7 +530,7 @@ RegisterValue vecInsIndex_gpr( template RegisterValue vecLogicOp_2vecs(srcValContainer& sourceValues, std::function func) { - const T* n = sourceValues[0].getAsVector(); + const safePointer n = sourceValues[0].getAsVector(); T out[16 / sizeof(T)] = {0}; for (int i = 0; i < I; i++) { out[i] = func(n[i]); @@ -549,8 +547,8 @@ RegisterValue vecLogicOp_2vecs(srcValContainer& sourceValues, template RegisterValue vecLogicOp_3vecs(srcValContainer& sourceValues, std::function func) { - const T* n = sourceValues[0].getAsVector(); - const T* m = sourceValues[1].getAsVector(); + const safePointer n = sourceValues[0].getAsVector(); + const safePointer m = sourceValues[1].getAsVector(); T out[16 / sizeof(T)] = {0}; for (int i = 0; i < I; i++) { out[i] = func(n[i], m[i]); @@ -565,13 +563,13 @@ RegisterValue vecLogicOp_3vecs(srcValContainer& sourceValues, * Returns correctly formatted RegisterValue. */ template RegisterValue vecUMaxP(srcValContainer& sourceValues) { - const T* n = sourceValues[0].getAsVector(); - const T* m = sourceValues[1].getAsVector(); + const safePointer n = sourceValues[0].getAsVector(); + const safePointer m = sourceValues[1].getAsVector(); // Concatenate the vectors T temp[2 * I]; - memcpy(temp, n, sizeof(T) * I); - memcpy(temp + (sizeof(T) * I), m, sizeof(T) * I); + memcpy(temp, n.ptr, sizeof(T) * I); + memcpy(temp + (sizeof(T) * I), m.ptr, sizeof(T) * I); // Compare each adjacent pair of elements T out[I]; for (int i = 0; i < I; i++) { @@ -587,13 +585,13 @@ RegisterValue vecUMaxP(srcValContainer& sourceValues) { * Returns correctly formatted RegisterValue. */ template RegisterValue vecUMinP(srcValContainer& sourceValues) { - const T* n = sourceValues[0].getAsVector(); - const T* m = sourceValues[1].getAsVector(); + const safePointer n = sourceValues[0].getAsVector(); + const safePointer m = sourceValues[1].getAsVector(); // Concatenate the vectors T temp[2 * I]; - memcpy(temp, n, sizeof(T) * I); - memcpy(temp + (sizeof(T) * I), m, sizeof(T) * I); + memcpy(temp, n.ptr, sizeof(T) * I); + memcpy(temp + (sizeof(T) * I), m.ptr, sizeof(T) * I); T out[I]; for (int i = 0; i < I; i++) { @@ -609,7 +607,7 @@ RegisterValue vecUMinP(srcValContainer& sourceValues) { * Returns correctly formatted RegisterValue. */ template RegisterValue vecMaxnmp_2ops(srcValContainer& sourceValues) { - const T* n = sourceValues[0].getAsVector(); + const safePointer n = sourceValues[0].getAsVector(); bool isFP = std::is_floating_point::value; T out = n[0]; @@ -626,7 +624,7 @@ RegisterValue vecMaxnmp_2ops(srcValContainer& sourceValues) { * Returns correctly formatted RegisterValue. */ template RegisterValue vecMinv_2ops(srcValContainer& sourceValues) { - const T* n = sourceValues[0].getAsVector(); + const safePointer n = sourceValues[0].getAsVector(); bool isFP = std::is_floating_point::value; T out = n[0]; @@ -680,7 +678,7 @@ RegisterValue vecMoviShift_imm( template RegisterValue vecScvtf_2vecs(srcValContainer& sourceValues, std::function func) { - const N* n = sourceValues[0].getAsVector(); + const auto n = sourceValues[0].getAsVector(); D out[16 / sizeof(D)] = {0}; for (int i = 0; i < I; i++) { out[i] = static_cast(n[i]); @@ -697,7 +695,7 @@ template RegisterValue vecShlShift_vecImm( srcValContainer& sourceValues, const simeng::arch::aarch64::InstructionMetadata& metadata) { - const T* n = sourceValues[0].getAsVector(); + const safePointer n = sourceValues[0].getAsVector(); int64_t shift = metadata.operands[2].imm; T out[16 / sizeof(T)] = {0}; for (int i = 0; i < I; i++) { @@ -717,7 +715,7 @@ template RegisterValue vecShllShift_vecImm( srcValContainer& sourceValues, const simeng::arch::aarch64::InstructionMetadata& metadata, bool isShll2) { - const N* n = sourceValues[0].getAsVector(); + const auto n = sourceValues[0].getAsVector(); uint64_t shift = metadata.operands[2].imm; D out[16 / sizeof(D)] = {0}; int index = isShll2 ? I : 0; @@ -741,7 +739,7 @@ RegisterValue vecShrnShift_imm( srcValContainer& sourceValues, const simeng::arch::aarch64::InstructionMetadata& metadata, bool shrn2 = false) { - const Ta* n = sourceValues[0].getAsVector(); + const auto n = sourceValues[0].getAsVector(); uint64_t shift = metadata.operands[2].imm; @@ -762,7 +760,7 @@ template RegisterValue vecSshrShift_imm( srcValContainer& sourceValues, const simeng::arch::aarch64::InstructionMetadata& metadata) { - const T* n = sourceValues[0].getAsVector(); + const safePointer n = sourceValues[0].getAsVector(); uint64_t shift = metadata.operands[2].imm; T out[16 / sizeof(T)] = {0}; for (int i = 0; i < I; i++) { @@ -778,7 +776,7 @@ RegisterValue vecSshrShift_imm( * Returns correctly formatted RegisterValue. */ template RegisterValue vecSumElems_2ops(srcValContainer& sourceValues) { - const T* n = sourceValues[0].getAsVector(); + const safePointer n = sourceValues[0].getAsVector(); T out = 0; for (int i = 0; i < I; i++) { out += n[i]; @@ -794,15 +792,8 @@ RegisterValue vecSumElems_2ops(srcValContainer& sourceValues) { * Returns correctly formatted RegisterValue. */ template RegisterValue vecXtn(srcValContainer& sourceValues, bool isXtn2) { - const D* d; - const N* n; - if (isXtn2) { - d = sourceValues[0].getAsVector(); - n = sourceValues[1].getAsVector(); - } else { - d = {}; - n = sourceValues[0].getAsVector(); - } + const auto d = isXtn2 ? sourceValues[0].getAsVector() : safePointer(); + const auto n = isXtn2 ? sourceValues[1].getAsVector() : sourceValues[0].getAsVector(); D out[16 / sizeof(D)] = {0}; int index = 0; @@ -879,7 +870,7 @@ RegisterValue vecTbl( * Returns correctly formatted RegisterValue. */ template RegisterValue vecRev(srcValContainer& sourceValues) { - const T* source = sourceValues[0].getAsVector(); + const safePointer source = sourceValues[0].getAsVector(); int element_size = (sizeof(T) * 8); int datasize = I * element_size; int container_size = V; @@ -907,8 +898,8 @@ RegisterValue vecRev(srcValContainer& sourceValues) { * Returns formatted Register Value. */ template RegisterValue vecTrn1(srcValContainer& sourceValues) { - const T* n = sourceValues[0].getAsVector(); - const T* m = sourceValues[1].getAsVector(); + const safePointer n = sourceValues[0].getAsVector(); + const safePointer m = sourceValues[1].getAsVector(); T out[16 / sizeof(T)] = {0}; for (int i = 0; i < I / 2; i++) { @@ -926,8 +917,8 @@ RegisterValue vecTrn1(srcValContainer& sourceValues) { * Returns formatted Register Value. */ template RegisterValue vecTrn2(srcValContainer& sourceValues) { - const T* n = sourceValues[0].getAsVector(); - const T* m = sourceValues[1].getAsVector(); + const safePointer n = sourceValues[0].getAsVector(); + const safePointer m = sourceValues[1].getAsVector(); T out[16 / sizeof(T)] = {0}; for (int i = 0; i < I / 2; i++) { @@ -945,8 +936,8 @@ RegisterValue vecTrn2(srcValContainer& sourceValues) { * Returns formatted Register Value. */ template RegisterValue vecUzp(srcValContainer& sourceValues, bool isUzp1) { - const T* n = sourceValues[0].getAsVector(); - const T* m = sourceValues[1].getAsVector(); + const safePointer n = sourceValues[0].getAsVector(); + const safePointer m = sourceValues[1].getAsVector(); T out[16 / sizeof(T)] = {0}; for (int i = 0; i < I / 2; i++) { @@ -966,8 +957,8 @@ RegisterValue vecUzp(srcValContainer& sourceValues, bool isUzp1) { * Returns formatted Register Value. */ template RegisterValue vecZip(srcValContainer& sourceValues, bool isZip2) { - const T* n = sourceValues[0].getAsVector(); - const T* m = sourceValues[1].getAsVector(); + const safePointer n = sourceValues[0].getAsVector(); + const safePointer m = sourceValues[1].getAsVector(); T out[16 / sizeof(T)] = {0}; int index = isZip2 ? (I / 2) : 0; diff --git a/src/include/simeng/arch/aarch64/helpers/sve.hh b/src/include/simeng/arch/aarch64/helpers/sve.hh index 0711fafc90..8ed710343e 100644 --- a/src/include/simeng/arch/aarch64/helpers/sve.hh +++ b/src/include/simeng/arch/aarch64/helpers/sve.hh @@ -15,8 +15,8 @@ namespace aarch64 { template RegisterValue sveAdd_3ops(srcValContainer& sourceValues, const uint16_t VL_bits) { - const T* n = sourceValues[0].getAsVector(); - const T* m = sourceValues[1].getAsVector(); + const safePointer n = sourceValues[0].getAsVector(); + const safePointer m = sourceValues[1].getAsVector(); const uint16_t partition_num = VL_bits / (sizeof(T) * 8); T out[256 / sizeof(T)] = {0}; @@ -34,7 +34,7 @@ RegisterValue sveAdd_imm( srcValContainer& sourceValues, const simeng::arch::aarch64::InstructionMetadata& metadata, const uint16_t VL_bits) { - const T* n = sourceValues[0].getAsVector(); + const safePointer n = sourceValues[0].getAsVector(); const T imm = static_cast(metadata.operands[2].imm); const uint16_t partition_num = VL_bits / (sizeof(T) * 8); @@ -55,8 +55,8 @@ RegisterValue sveAddPredicated_const( const simeng::arch::aarch64::InstructionMetadata& metadata, const uint16_t VL_bits) { bool isFP = std::is_floating_point::value; - const uint64_t* p = sourceValues[0].getAsVector(); - const T* d = sourceValues[1].getAsVector(); + const auto p = sourceValues[0].getAsVector(); + const safePointer d = sourceValues[1].getAsVector(); const auto con = isFP ? metadata.operands[3].fp : metadata.operands[3].imm; const uint16_t partition_num = VL_bits / (sizeof(T) * 8); @@ -78,9 +78,9 @@ RegisterValue sveAddPredicated_const( template RegisterValue sveAddPredicated_vecs(srcValContainer& sourceValues, const uint16_t VL_bits) { - const uint64_t* p = sourceValues[0].getAsVector(); - const T* d = sourceValues[1].getAsVector(); - const T* m = sourceValues[2].getAsVector(); + const auto p = sourceValues[0].getAsVector(); + const safePointer d = sourceValues[1].getAsVector(); + const safePointer m = sourceValues[2].getAsVector(); const uint16_t partition_num = VL_bits / (sizeof(T) * 8); T out[256 / sizeof(T)] = {0}; @@ -100,8 +100,8 @@ RegisterValue sveAddPredicated_vecs(srcValContainer& sourceValues, template RegisterValue sveAddvPredicated(srcValContainer& sourceValues, const uint16_t VL_bits) { - const uint64_t* p = sourceValues[0].getAsVector(); - const T* n = sourceValues[1].getAsVector(); + const auto p = sourceValues[0].getAsVector(); + const safePointer n = sourceValues[1].getAsVector(); const uint16_t partition_num = VL_bits / (sizeof(T) * 8); uint64_t out = 0; @@ -123,8 +123,8 @@ RegisterValue sveAdr_packedOffsets( srcValContainer& sourceValues, const simeng::arch::aarch64::InstructionMetadata& metadata, const uint16_t VL_bits) { - const T* n = sourceValues[0].getAsVector(); - const T* m = sourceValues[1].getAsVector(); + const safePointer n = sourceValues[0].getAsVector(); + const safePointer m = sourceValues[1].getAsVector(); const uint16_t partition_num = VL_bits / (sizeof(T) * 8); T out[256 / sizeof(T)] = {0}; @@ -145,14 +145,10 @@ std::tuple, uint8_t> sveCmpPredicated_toPred( srcValContainer& sourceValues, const simeng::arch::aarch64::InstructionMetadata& metadata, const uint16_t VL_bits, bool cmpToImm, std::function func) { - const uint64_t* p = sourceValues[0].getAsVector(); - const T* n = sourceValues[1].getAsVector(); - const T* m; - T imm; - if (cmpToImm) - imm = static_cast(metadata.operands[3].imm); - else - m = sourceValues[2].getAsVector(); + const auto p = sourceValues[0].getAsVector(); + const safePointer n = sourceValues[1].getAsVector(); + const safePointer m = !cmpToImm ? sourceValues[2].getAsVector() : safePointer(); + T imm = cmpToImm ? static_cast(metadata.operands[3].imm) : 0; const uint16_t partition_num = VL_bits / (sizeof(T) * 8); std::array out = {0, 0, 0, 0}; @@ -189,8 +185,8 @@ uint64_t sveCnt_gpr(const simeng::arch::aarch64::InstructionMetadata& metadata, * Returns single value of type uint64_t. */ template uint64_t sveCntp(srcValContainer& sourceValues, const uint16_t VL_bits) { - const uint64_t* pg = sourceValues[0].getAsVector(); - const uint64_t* pn = sourceValues[1].getAsVector(); + const auto pg = sourceValues[0].getAsVector(); + const auto pn = sourceValues[1].getAsVector(); const uint16_t partition_num = VL_bits / (sizeof(T) * 8); uint64_t count = 0; @@ -213,10 +209,9 @@ std::array sveComparePredicated_vecsToPred( srcValContainer& sourceValues, const simeng::arch::aarch64::InstructionMetadata& metadata, const uint16_t VL_bits, bool cmpToZero, std::function func) { - const uint64_t* p = sourceValues[0].getAsVector(); - const T* n = sourceValues[1].getAsVector(); - const T* m; - if (!cmpToZero) m = sourceValues[2].getAsVector(); + const auto p = sourceValues[0].getAsVector(); + const safePointer n = sourceValues[1].getAsVector(); + const safePointer m = !cmpToZero ? sourceValues[2].getAsVector() : safePointer(); const uint16_t partition_num = VL_bits / (sizeof(T) * 8); std::array out = {0}; @@ -240,7 +235,7 @@ RegisterValue sveCpy_imm( srcValContainer& sourceValues, const simeng::arch::aarch64::InstructionMetadata& metadata, const uint16_t VL_bits) { - const uint64_t* p = sourceValues[0].getAsVector(); + const auto p = sourceValues[0].getAsVector(); const int16_t imm = metadata.operands[2].imm; const uint16_t partition_num = VL_bits / (sizeof(T) * 8); @@ -308,7 +303,7 @@ RegisterValue sveDup_vecIndexed( const uint16_t VL_bits) { const uint16_t index = static_cast(metadata.operands[1].vector_index); - const T* n = sourceValues[0].getAsVector(); + const safePointer n = sourceValues[0].getAsVector(); const uint16_t partition_num = VL_bits / (sizeof(T) * 8); T out[256 / sizeof(T)] = {0}; @@ -329,9 +324,9 @@ RegisterValue sveDup_vecIndexed( template RegisterValue sveFabsPredicated(srcValContainer& sourceValues, const uint16_t VL_bits) { - const T* d = sourceValues[0].getAsVector(); - const uint64_t* p = sourceValues[1].getAsVector(); - const T* n = sourceValues[2].getAsVector(); + const safePointer d = sourceValues[0].getAsVector(); + const auto p = sourceValues[1].getAsVector(); + const safePointer n = sourceValues[2].getAsVector(); const uint16_t partition_num = VL_bits / (sizeof(T) * 8); T out[256 / sizeof(T)] = {0}; @@ -354,9 +349,9 @@ RegisterValue sveFabsPredicated(srcValContainer& sourceValues, template RegisterValue sveFaddaPredicated(srcValContainer& sourceValues, const uint16_t VL_bits) { - const uint64_t* p = sourceValues[0].getAsVector(); + const auto p = sourceValues[0].getAsVector(); const T n = sourceValues[1].get(); - const T* m = sourceValues[2].getAsVector(); + const safePointer m = sourceValues[2].getAsVector(); const uint16_t partition_num = VL_bits / (sizeof(T) * 8); T out[256 / sizeof(T)] = {0}; @@ -380,9 +375,9 @@ RegisterValue sveFcaddPredicated( srcValContainer& sourceValues, const simeng::arch::aarch64::InstructionMetadata& metadata, const uint16_t VL_bits) { - const uint64_t* p = sourceValues[0].getAsVector(); - const T* dn = sourceValues[1].getAsVector(); - const T* m = sourceValues[2].getAsVector(); + const auto p = sourceValues[0].getAsVector(); + const safePointer dn = sourceValues[1].getAsVector(); + const safePointer m = sourceValues[2].getAsVector(); const uint32_t imm = metadata.operands[4].imm; const uint16_t partition_num = VL_bits / (sizeof(T) * 8); @@ -425,10 +420,10 @@ RegisterValue sveFcmlaPredicated( srcValContainer& sourceValues, const simeng::arch::aarch64::InstructionMetadata& metadata, const uint16_t VL_bits) { - const T* da = sourceValues[0].getAsVector(); - const uint64_t* p = sourceValues[1].getAsVector(); - const T* n = sourceValues[2].getAsVector(); - const T* m = sourceValues[3].getAsVector(); + const safePointer da = sourceValues[0].getAsVector(); + const auto p = sourceValues[1].getAsVector(); + const safePointer n = sourceValues[2].getAsVector(); + const safePointer m = sourceValues[3].getAsVector(); const uint32_t imm = metadata.operands[4].imm; const uint16_t partition_num = VL_bits / (sizeof(T) * 8); @@ -475,8 +470,8 @@ RegisterValue sveFcpy_imm( srcValContainer& sourceValues, const simeng::arch::aarch64::InstructionMetadata& metadata, const uint16_t VL_bits) { - const T* dn = sourceValues[0].getAsVector(); - const uint64_t* p = sourceValues[1].getAsVector(); + const safePointer dn = sourceValues[0].getAsVector(); + const auto p = sourceValues[1].getAsVector(); const T imm = metadata.operands[2].fp; const uint16_t partition_num = VL_bits / (sizeof(T) * 8); @@ -502,9 +497,9 @@ RegisterValue sveFcpy_imm( template RegisterValue sveFcvtPredicated(srcValContainer& sourceValues, const uint16_t VL_bits) { - const D* d = sourceValues[0].getAsVector(); - const uint64_t* p = sourceValues[1].getAsVector(); - const N* n = sourceValues[2].getAsVector(); + const auto d = sourceValues[0].getAsVector(); + const auto p = sourceValues[1].getAsVector(); + const auto n = sourceValues[2].getAsVector(); // Stores size of largest type out of D and N int lts = std::max(sizeof(D), sizeof(N)); @@ -548,9 +543,9 @@ RegisterValue sveFcvtzsPredicated(srcValContainer& sourceValues, static_assert((std::is_same() || std::is_same()) && "D is not a valid type which should be int32_t or int64_t"); - const D* d = sourceValues[0].getAsVector(); - const uint64_t* p = sourceValues[1].getAsVector(); - const N* n = sourceValues[2].getAsVector(); + const auto d = sourceValues[0].getAsVector(); + const auto p = sourceValues[1].getAsVector(); + const auto n = sourceValues[2].getAsVector(); // Stores size of largest type out of D and N int lts = std::max(sizeof(D), sizeof(N)); @@ -605,9 +600,9 @@ RegisterValue sveFcvtzsPredicated(srcValContainer& sourceValues, template std::enable_if_t, RegisterValue> sveFDivPredicated( srcValContainer& sourceValues, const uint16_t VL_bits) { - const uint64_t* p = sourceValues[0].getAsVector(); - const T* dn = sourceValues[1].getAsVector(); - const T* m = sourceValues[2].getAsVector(); + const auto p = sourceValues[0].getAsVector(); + const safePointer dn = sourceValues[1].getAsVector(); + const safePointer m = sourceValues[2].getAsVector(); const uint16_t partition_num = VL_bits / (sizeof(T) * 8); T out[256 / sizeof(T)] = {0}; @@ -633,10 +628,10 @@ std::enable_if_t, RegisterValue> sveFDivPredicated( template RegisterValue sveFmadPredicated_vecs(srcValContainer& sourceValues, const uint16_t VL_bits) { - const T* d = sourceValues[0].getAsVector(); - const uint64_t* p = sourceValues[1].getAsVector(); - const T* n = sourceValues[2].getAsVector(); - const T* m = sourceValues[3].getAsVector(); + const safePointer d = sourceValues[0].getAsVector(); + const auto p = sourceValues[1].getAsVector(); + const safePointer n = sourceValues[2].getAsVector(); + const safePointer m = sourceValues[3].getAsVector(); const uint16_t partition_num = VL_bits / (sizeof(T) * 8); T out[256 / sizeof(T)] = {0}; @@ -657,10 +652,10 @@ RegisterValue sveFmadPredicated_vecs(srcValContainer& sourceValues, template RegisterValue sveFmlsPredicated_vecs(srcValContainer& sourceValues, const uint16_t VL_bits) { - const T* d = sourceValues[0].getAsVector(); - const uint64_t* p = sourceValues[1].getAsVector(); - const T* n = sourceValues[2].getAsVector(); - const T* m = sourceValues[3].getAsVector(); + const safePointer d = sourceValues[0].getAsVector(); + const auto p = sourceValues[1].getAsVector(); + const safePointer n = sourceValues[2].getAsVector(); + const safePointer m = sourceValues[3].getAsVector(); const uint16_t partition_num = VL_bits / (sizeof(T) * 8); T out[256 / sizeof(T)] = {0}; @@ -681,10 +676,10 @@ RegisterValue sveFmlsPredicated_vecs(srcValContainer& sourceValues, template RegisterValue sveFmsbPredicated_vecs(srcValContainer& sourceValues, const uint16_t VL_bits) { - const T* d = sourceValues[0].getAsVector(); - const uint64_t* p = sourceValues[1].getAsVector(); - const T* n = sourceValues[2].getAsVector(); - const T* m = sourceValues[3].getAsVector(); + const safePointer d = sourceValues[0].getAsVector(); + const auto p = sourceValues[1].getAsVector(); + const safePointer n = sourceValues[2].getAsVector(); + const safePointer m = sourceValues[3].getAsVector(); const uint16_t partition_num = VL_bits / (sizeof(T) * 8); T out[256 / sizeof(T)] = {0}; @@ -704,8 +699,8 @@ RegisterValue sveFmsbPredicated_vecs(srcValContainer& sourceValues, template RegisterValue sveFmul_3ops(srcValContainer& sourceValues, const uint16_t VL_bits) { - const T* n = sourceValues[0].getAsVector(); - const T* m = sourceValues[1].getAsVector(); + const safePointer n = sourceValues[0].getAsVector(); + const safePointer m = sourceValues[1].getAsVector(); const uint16_t partition_num = VL_bits / (sizeof(T) * 8); T out[256 / sizeof(T)] = {0}; @@ -721,9 +716,9 @@ RegisterValue sveFmul_3ops(srcValContainer& sourceValues, template RegisterValue sveFnegPredicated(srcValContainer& sourceValues, const uint16_t VL_bits) { - const T* d = sourceValues[0].getAsVector(); - const uint64_t* p = sourceValues[1].getAsVector(); - const T* n = sourceValues[2].getAsVector(); + const safePointer d = sourceValues[0].getAsVector(); + const auto p = sourceValues[1].getAsVector(); + const safePointer n = sourceValues[2].getAsVector(); const uint16_t partition_num = VL_bits / (sizeof(T) * 8); T out[256 / sizeof(T)] = {0}; @@ -745,10 +740,10 @@ RegisterValue sveFnegPredicated(srcValContainer& sourceValues, template RegisterValue sveFnmlsPredicated(srcValContainer& sourceValues, const uint16_t VL_bits) { - const T* d = sourceValues[0].getAsVector(); - const uint64_t* p = sourceValues[1].getAsVector(); - const T* n = sourceValues[2].getAsVector(); - const T* m = sourceValues[3].getAsVector(); + const safePointer d = sourceValues[0].getAsVector(); + const auto p = sourceValues[1].getAsVector(); + const safePointer n = sourceValues[2].getAsVector(); + const safePointer m = sourceValues[3].getAsVector(); const uint16_t partition_num = VL_bits / (sizeof(T) * 8); T out[256 / sizeof(T)] = {0}; @@ -770,10 +765,10 @@ RegisterValue sveFnmlsPredicated(srcValContainer& sourceValues, template RegisterValue sveFnmsbPredicated(srcValContainer& sourceValues, const uint16_t VL_bits) { - const T* n = sourceValues[0].getAsVector(); - const uint64_t* p = sourceValues[1].getAsVector(); - const T* m = sourceValues[2].getAsVector(); - const T* a = sourceValues[3].getAsVector(); + const safePointer n = sourceValues[0].getAsVector(); + const auto p = sourceValues[1].getAsVector(); + const safePointer m = sourceValues[2].getAsVector(); + const safePointer a = sourceValues[3].getAsVector(); const uint16_t partition_num = VL_bits / (sizeof(T) * 8); T out[256 / sizeof(T)] = {0}; @@ -795,9 +790,9 @@ RegisterValue sveFnmsbPredicated(srcValContainer& sourceValues, template std::enable_if_t, RegisterValue> sveFrintnPredicated(srcValContainer& sourceValues, const uint16_t VL_bits) { - const T* d = sourceValues[0].getAsVector(); - const uint64_t* p = sourceValues[1].getAsVector(); - const T* n = sourceValues[2].getAsVector(); + const safePointer d = sourceValues[0].getAsVector(); + const auto p = sourceValues[1].getAsVector(); + const safePointer n = sourceValues[2].getAsVector(); const uint16_t partition_num = VL_bits / (sizeof(T) * 8); T out[256 / sizeof(T)] = {0}; @@ -833,9 +828,9 @@ sveFrintnPredicated(srcValContainer& sourceValues, const uint16_t VL_bits) { template RegisterValue sveFsqrtPredicated_2vecs(srcValContainer& sourceValues, const uint16_t VL_bits) { - const T* d = sourceValues[0].getAsVector(); - const uint64_t* p = sourceValues[1].getAsVector(); - const T* n = sourceValues[2].getAsVector(); + const safePointer d = sourceValues[0].getAsVector(); + const auto p = sourceValues[1].getAsVector(); + const safePointer n = sourceValues[2].getAsVector(); const uint16_t partition_num = VL_bits / (sizeof(T) * 8); T out[256 / sizeof(T)] = {0}; @@ -876,7 +871,7 @@ RegisterValue sveInc_imm( srcValContainer& sourceValues, const simeng::arch::aarch64::InstructionMetadata& metadata, const uint16_t VL_bits) { - const T* n = sourceValues[0].getAsVector(); + const safePointer n = sourceValues[0].getAsVector(); const uint8_t imm = static_cast(metadata.operands[2].imm); @@ -897,7 +892,7 @@ RegisterValue sveInc_imm( template uint64_t sveIncp_gpr(srcValContainer& sourceValues, const uint16_t VL_bits) { const uint64_t dn = sourceValues[0].get(); - const uint64_t* p = sourceValues[1].getAsVector(); + const auto p = sourceValues[1].getAsVector(); const uint16_t partition_num = VL_bits / (sizeof(T) * 8); uint64_t count = 0; @@ -944,9 +939,9 @@ template std::array sveLogicOp_preds( srcValContainer& sourceValues, const uint16_t VL_bits, std::function func) { - const uint64_t* p = sourceValues[0].getAsVector(); - const uint64_t* n = sourceValues[1].getAsVector(); - const uint64_t* m = sourceValues[2].getAsVector(); + const auto p = sourceValues[0].getAsVector(); + const auto n = sourceValues[1].getAsVector(); + const auto m = sourceValues[2].getAsVector(); const uint16_t partition_num = VL_bits / (sizeof(T) * 8); std::array out = {0}; @@ -970,9 +965,9 @@ template RegisterValue sveLogicOpPredicated_3vecs(srcValContainer& sourceValues, const uint16_t VL_bits, std::function func) { - const uint64_t* p = sourceValues[0].getAsVector(); - const T* dn = sourceValues[1].getAsVector(); - const T* m = sourceValues[2].getAsVector(); + const auto p = sourceValues[0].getAsVector(); + const safePointer dn = sourceValues[1].getAsVector(); + const safePointer m = sourceValues[2].getAsVector(); const uint16_t partition_num = VL_bits / (sizeof(T) * 8); T out[256 / sizeof(T)] = {0}; @@ -994,8 +989,8 @@ template RegisterValue sveLogicOpUnPredicated_3vecs(srcValContainer& sourceValues, const uint16_t VL_bits, std::function func) { - const T* n = sourceValues[0].getAsVector(); - const T* m = sourceValues[1].getAsVector(); + const safePointer n = sourceValues[0].getAsVector(); + const safePointer m = sourceValues[1].getAsVector(); const uint16_t partition_num = VL_bits / (sizeof(T) * 8); T out[256 / sizeof(T)] = {0}; @@ -1013,7 +1008,7 @@ RegisterValue sveLsl_imm( srcValContainer& sourceValues, const simeng::arch::aarch64::InstructionMetadata& metadata, const uint16_t VL_bits) { - const T* n = sourceValues[0].getAsVector(); + const safePointer n = sourceValues[0].getAsVector(); const T imm = static_cast(metadata.operands[2].imm); const uint16_t partition_num = VL_bits / (sizeof(T) * 8); @@ -1034,7 +1029,7 @@ RegisterValue sveMax_vecImm( srcValContainer& sourceValues, const simeng::arch::aarch64::InstructionMetadata& metadata, const uint16_t VL_bits) { - const T* n = sourceValues[0].getAsVector(); + const safePointer n = sourceValues[0].getAsVector(); T imm = static_cast(metadata.operands[2].imm); const uint16_t partition_num = VL_bits / (sizeof(T) * 8); @@ -1053,9 +1048,9 @@ RegisterValue sveMax_vecImm( template RegisterValue sveMaxPredicated_vecs(srcValContainer& sourceValues, const uint16_t VL_bits) { - const uint64_t* p = sourceValues[0].getAsVector(); - const T* n = sourceValues[1].getAsVector(); - const T* m = sourceValues[2].getAsVector(); + const auto p = sourceValues[0].getAsVector(); + const safePointer n = sourceValues[1].getAsVector(); + const safePointer m = sourceValues[2].getAsVector(); const uint16_t partition_num = VL_bits / (sizeof(T) * 8); T out[256 / sizeof(T)] = {0}; @@ -1077,10 +1072,10 @@ RegisterValue sveMaxPredicated_vecs(srcValContainer& sourceValues, template RegisterValue sveMlaPredicated_vecs(srcValContainer& sourceValues, const uint16_t VL_bits) { - const T* d = sourceValues[0].getAsVector(); - const uint64_t* p = sourceValues[1].getAsVector(); - const T* n = sourceValues[2].getAsVector(); - const T* m = sourceValues[3].getAsVector(); + const safePointer d = sourceValues[0].getAsVector(); + const auto p = sourceValues[1].getAsVector(); + const safePointer n = sourceValues[2].getAsVector(); + const safePointer m = sourceValues[3].getAsVector(); const uint16_t partition_num = VL_bits / (sizeof(T) * 8); T out[256 / sizeof(T)] = {0}; @@ -1103,9 +1098,9 @@ RegisterValue sveMlaIndexed_vecs( srcValContainer& sourceValues, const simeng::arch::aarch64::InstructionMetadata& metadata, const uint16_t VL_bits) { - const T* d = sourceValues[0].getAsVector(); - const T* n = sourceValues[1].getAsVector(); - const T* m = sourceValues[2].getAsVector(); + const safePointer d = sourceValues[0].getAsVector(); + const safePointer n = sourceValues[1].getAsVector(); + const safePointer m = sourceValues[2].getAsVector(); const size_t index = static_cast(metadata.operands[2].vector_index); const uint16_t elemsPer128 = 128 / (sizeof(T) * 8); @@ -1130,8 +1125,8 @@ template RegisterValue sveMovprfxPredicated_destToZero(srcValContainer& sourceValues, const uint16_t VL_bits) { // TODO: Adopt hint logic of the MOVPRFX instruction - const uint64_t* p = sourceValues[0].getAsVector(); - const T* n = sourceValues[1].getAsVector(); + const auto p = sourceValues[0].getAsVector(); + const safePointer n = sourceValues[1].getAsVector(); const uint16_t partition_num = VL_bits / (sizeof(T) * 8); T out[256 / sizeof(T)] = {0}; @@ -1155,9 +1150,9 @@ template RegisterValue sveMovprfxPredicated_destUnchanged(srcValContainer& sourceValues, const uint16_t VL_bits) { // TODO: Adopt hint logic of the MOVPRFX instruction - const T* d = sourceValues[0].getAsVector(); - const uint64_t* p = sourceValues[1].getAsVector(); - const T* n = sourceValues[2].getAsVector(); + const safePointer d = sourceValues[0].getAsVector(); + const auto p = sourceValues[1].getAsVector(); + const safePointer n = sourceValues[2].getAsVector(); const uint16_t partition_num = VL_bits / (sizeof(T) * 8); T out[256 / sizeof(T)] = {0}; @@ -1183,14 +1178,14 @@ RegisterValue sveMulPredicated( const simeng::arch::aarch64::InstructionMetadata& metadata, const uint16_t VL_bits, bool useImm) { bool isFP = std::is_floating_point::value; - const uint64_t* p = sourceValues[0].getAsVector(); - const T* n = sourceValues[1].getAsVector(); - const T* m; - T imm; - if (useImm) - imm = isFP ? metadata.operands[3].fp : metadata.operands[3].imm; - else - m = sourceValues[2].getAsVector(); + const auto p = sourceValues[0].getAsVector(); + const safePointer n = sourceValues[1].getAsVector(); + const safePointer m = !useImm ? sourceValues[2].getAsVector() : safePointer() ; + T imm = useImm ? (isFP ? metadata.operands[3].fp : metadata.operands[3].imm) : T(); + // if (useImm) + // imm = isFP ? metadata.operands[3].fp : metadata.operands[3].imm; + // else + // m = sourceValues[2].getAsVector(); const uint16_t partition_num = VL_bits / (sizeof(T) * 8); T out[256 / sizeof(T)] = {0}; @@ -1215,9 +1210,9 @@ RegisterValue sveMulPredicated( template RegisterValue sveMulhPredicated(srcValContainer& sourceValues, const uint16_t VL_bits) { - const uint64_t* p = sourceValues[0].getAsVector(); - const T* n = sourceValues[1].getAsVector(); - const T* m = sourceValues[2].getAsVector(); + const auto p = sourceValues[0].getAsVector(); + const safePointer n = sourceValues[1].getAsVector(); + const safePointer m = sourceValues[2].getAsVector(); const uint16_t partition_num = VL_bits / (sizeof(T) * 8); T out[256 / sizeof(T)] = {0}; @@ -1253,8 +1248,8 @@ RegisterValue sveMulhPredicated(srcValContainer& sourceValues, template RegisterValue sveOrr_3vecs(srcValContainer& sourceValues, const uint16_t VL_bits) { - const T* n = sourceValues[0].getAsVector(); - const T* m = sourceValues[1].getAsVector(); + const safePointer n = sourceValues[0].getAsVector(); + const safePointer m = sourceValues[1].getAsVector(); const uint16_t partition_num = VL_bits / (sizeof(T) * 8); T out[256 / sizeof(T)] = {0}; @@ -1274,8 +1269,8 @@ std::array svePsel( srcValContainer& sourceValues, const simeng::arch::aarch64::InstructionMetadata& metadata, const uint16_t VL_bits) { - const uint64_t* pn = sourceValues[0].getAsVector(); - const uint64_t* pm = sourceValues[1].getAsVector(); + const auto pn = sourceValues[0].getAsVector(); + const auto pm = sourceValues[1].getAsVector(); const uint32_t wa = sourceValues[2].get(); const uint32_t imm = static_cast(metadata.operands[2].pred.imm_index); @@ -1346,7 +1341,7 @@ std::array svePunpk(srcValContainer& sourceValues, template std::array sveRev_predicates(srcValContainer& sourceValues, const uint16_t VL_bits) { - const uint64_t* n = sourceValues[0].getAsVector(); + const auto n = sourceValues[0].getAsVector(); const uint16_t partition_num = VL_bits / (sizeof(T) * 8); std::array out = {0, 0, 0, 0}; @@ -1371,7 +1366,7 @@ std::array sveRev_predicates(srcValContainer& sourceValues, template RegisterValue sveRev_vecs(srcValContainer& sourceValues, const uint16_t VL_bits) { - const T* n = sourceValues[0].getAsVector(); + const safePointer n = sourceValues[0].getAsVector(); const uint16_t partition_num = VL_bits / (sizeof(T) * 8); T out[256 / sizeof(T)] = {0}; @@ -1391,9 +1386,9 @@ RegisterValue sveRev_vecs(srcValContainer& sourceValues, template RegisterValue sveSel_zpzz(srcValContainer& sourceValues, const uint16_t VL_bits) { - const uint64_t* p = sourceValues[0].getAsVector(); - const T* n = sourceValues[1].getAsVector(); - const T* m = sourceValues[2].getAsVector(); + const auto p = sourceValues[0].getAsVector(); + const safePointer n = sourceValues[1].getAsVector(); + const safePointer m = sourceValues[2].getAsVector(); const uint16_t partition_num = VL_bits / (sizeof(T) * 8); T out[256 / sizeof(T)] = {0}; @@ -1412,8 +1407,8 @@ RegisterValue sveSel_zpzz(srcValContainer& sourceValues, * Returns correctly formatted RegisterValue. */ template RegisterValue sveSminv(srcValContainer& sourceValues, const uint16_t VL_bits) { - const uint64_t* p = sourceValues[0].getAsVector(); - const T* n = sourceValues[1].getAsVector(); + const auto p = sourceValues[0].getAsVector(); + const safePointer n = sourceValues[1].getAsVector(); const uint16_t partition_num = VL_bits / (sizeof(T) * 8); T out = std::numeric_limits::max(); @@ -1432,8 +1427,8 @@ RegisterValue sveSminv(srcValContainer& sourceValues, const uint16_t VL_bits) { template RegisterValue sveSub_3vecs(srcValContainer& sourceValues, const uint16_t VL_bits) { - const T* n = sourceValues[0].getAsVector(); - const T* m = sourceValues[1].getAsVector(); + const safePointer n = sourceValues[0].getAsVector(); + const safePointer m = sourceValues[1].getAsVector(); const uint16_t partition_num = VL_bits / (sizeof(T) * 8); T out[256 / sizeof(T)] = {0}; @@ -1451,9 +1446,9 @@ RegisterValue sveSub_3vecs(srcValContainer& sourceValues, template RegisterValue sveSubrPredicated_3vecs(srcValContainer& sourceValues, const uint16_t VL_bits) { - const uint64_t* p = sourceValues[0].getAsVector(); - const T* dn = sourceValues[1].getAsVector(); - const T* m = sourceValues[2].getAsVector(); + const auto p = sourceValues[0].getAsVector(); + const safePointer dn = sourceValues[1].getAsVector(); + const safePointer m = sourceValues[2].getAsVector(); const uint16_t partition_num = VL_bits / (sizeof(T) * 8); T out[256 / sizeof(T)] = {0}; @@ -1479,8 +1474,8 @@ RegisterValue sveSubPredicated_imm( const simeng::arch::aarch64::InstructionMetadata& metadata, const uint16_t VL_bits) { bool isFP = std::is_floating_point::value; - const uint64_t* p = sourceValues[0].getAsVector(); - const T* dn = sourceValues[1].getAsVector(); + const auto p = sourceValues[0].getAsVector(); + const safePointer dn = sourceValues[1].getAsVector(); const auto imm = isFP ? metadata.operands[3].fp : metadata.operands[3].imm; const uint16_t partition_num = VL_bits / (sizeof(T) * 8); @@ -1506,9 +1501,9 @@ RegisterValue sveSubPredicated_imm( template RegisterValue sveSxtPredicated(srcValContainer& sourceValues, const uint16_t VL_bits) { - const T* d = sourceValues[0].getAsVector(); - const uint64_t* p = sourceValues[1].getAsVector(); - const T* n = sourceValues[2].getAsVector(); + const safePointer d = sourceValues[0].getAsVector(); + const auto p = sourceValues[1].getAsVector(); + const safePointer n = sourceValues[2].getAsVector(); const uint16_t partition_num = VL_bits / (sizeof(T) * 8); T out[256 / sizeof(T)] = {0}; @@ -1531,8 +1526,8 @@ RegisterValue sveSxtPredicated(srcValContainer& sourceValues, template RegisterValue sveTrn1_3vecs(srcValContainer& sourceValues, const uint16_t VL_bits) { - const T* n = sourceValues[0].getAsVector(); - const T* m = sourceValues[1].getAsVector(); + const safePointer n = sourceValues[0].getAsVector(); + const safePointer m = sourceValues[1].getAsVector(); const uint16_t partition_num = VL_bits / (sizeof(T) * 8); T out[256 / sizeof(T)] = {0}; @@ -1550,8 +1545,8 @@ RegisterValue sveTrn1_3vecs(srcValContainer& sourceValues, template RegisterValue sveTrn2_3vecs(srcValContainer& sourceValues, const uint16_t VL_bits) { - const T* n = sourceValues[0].getAsVector(); - const T* m = sourceValues[1].getAsVector(); + const safePointer n = sourceValues[0].getAsVector(); + const safePointer m = sourceValues[1].getAsVector(); const uint16_t partition_num = VL_bits / (sizeof(T) * 8); T out[256 / sizeof(T)] = {0}; @@ -1572,7 +1567,7 @@ RegisterValue sveTrn2_3vecs(srcValContainer& sourceValues, template RegisterValue sveUnpk_vecs(srcValContainer& sourceValues, const uint16_t VL_bits, bool isHi) { - const N* n = sourceValues[0].getAsVector(); + const auto n = sourceValues[0].getAsVector(); const uint16_t partition_num = VL_bits / (sizeof(D) * 8); D out[256 / sizeof(D)] = {0}; @@ -1616,8 +1611,8 @@ uint64_t sveUqdec(srcValContainer& sourceValues, template RegisterValue sveUzp_vecs(srcValContainer& sourceValues, const uint16_t VL_bits, bool isUzp1) { - const T* n = sourceValues[0].getAsVector(); - const T* m = sourceValues[1].getAsVector(); + const safePointer n = sourceValues[0].getAsVector(); + const safePointer m = sourceValues[1].getAsVector(); const uint16_t partition_num = VL_bits / (sizeof(T) * 8); T out[256 / sizeof(T)] = {0}; @@ -1671,8 +1666,8 @@ std::tuple, uint8_t> sveWhilelo( template std::array sveZip_preds(srcValContainer& sourceValues, const uint16_t VL_bits, bool isZip2) { - const uint64_t* n = sourceValues[0].getAsVector(); - const uint64_t* m = sourceValues[1].getAsVector(); + const auto n = sourceValues[0].getAsVector(); + const auto m = sourceValues[1].getAsVector(); const uint16_t partition_num = VL_bits / (sizeof(T) * 8); std::array out = {0, 0, 0, 0}; @@ -1707,8 +1702,8 @@ std::array sveZip_preds(srcValContainer& sourceValues, template RegisterValue sveZip_vecs(srcValContainer& sourceValues, const uint16_t VL_bits, bool isZip2) { - const T* n = sourceValues[0].getAsVector(); - const T* m = sourceValues[1].getAsVector(); + const safePointer n = sourceValues[0].getAsVector(); + const safePointer m = sourceValues[1].getAsVector(); const uint16_t partition_num = VL_bits / (sizeof(T) * 8); T out[256 / sizeof(T)] = {0}; @@ -1733,7 +1728,7 @@ RegisterValue sveZip_vecs(srcValContainer& sourceValues, const uint16_t VL_bits, * C represents the size of the memory elements (e.g. for st1w, C = uint32_t). * Return a vector of RegisterValues. */ template -std::vector sve_merge_store_data(const T* d, const uint64_t* p, +std::vector sve_merge_store_data(const safePointer d, const safePointer p, uint16_t vl_bits) { std::vector outputData; @@ -1754,13 +1749,13 @@ std::vector sve_merge_store_data(const T* d, const uint64_t* p, mdSize++; } else if (mdSize) { outputData.push_back( - RegisterValue((char*)mData.data(), mdSize * sizeof(C))); + RegisterValue(reinterpret_cast(mData.data()), mdSize * sizeof(C))); mdSize = 0; } } if (mdSize) { outputData.push_back( - RegisterValue((char*)mData.data(), mdSize * sizeof(C))); + RegisterValue(reinterpret_cast(mData.data()), mdSize * sizeof(C))); } return outputData; } diff --git a/src/include/simeng/kernel/LinuxProcess.hh b/src/include/simeng/kernel/LinuxProcess.hh index 9d3fcf1c25..21ef03840b 100644 --- a/src/include/simeng/kernel/LinuxProcess.hh +++ b/src/include/simeng/kernel/LinuxProcess.hh @@ -87,7 +87,7 @@ class LinuxProcess { uint64_t getPageSize() const; /** Get a shared_ptr to process image. */ - std::shared_ptr getProcessImage() const; + std::shared_ptr getProcessImage() const; /** Get the size of the process image. */ uint64_t getProcessImageSize() const; @@ -112,7 +112,7 @@ class LinuxProcess { const uint64_t HEAP_SIZE; /** Create and populate the initial process stack. */ - void createStack(char** processImage); + void createStack(uint8_t** processImage); /** The entry point of the process. */ uint64_t entryPoint_ = 0; @@ -148,7 +148,7 @@ class LinuxProcess { bool isValid_ = false; /** Shared pointer to processImage. */ - std::shared_ptr processImage_; + std::shared_ptr processImage_; }; } // namespace kernel diff --git a/src/lib/CoreInstance.cc b/src/lib/CoreInstance.cc index 46f8638286..f1aa2f6fc8 100644 --- a/src/lib/CoreInstance.cc +++ b/src/lib/CoreInstance.cc @@ -337,7 +337,7 @@ std::shared_ptr CoreInstance::getInstructionMemory() return instructionMemory_; } -std::shared_ptr CoreInstance::getProcessImage() const { +std::shared_ptr CoreInstance::getProcessImage() const { return processMemory_; } diff --git a/src/lib/Elf.cc b/src/lib/Elf.cc index 28b138558b..f6bac7178a 100644 --- a/src/lib/Elf.cc +++ b/src/lib/Elf.cc @@ -14,7 +14,7 @@ namespace simeng { * https://man7.org/linux/man-pages/man5/elf.5.html */ -Elf::Elf(std::string path, char** imagePointer) { +Elf::Elf(std::string path, uint8_t** imagePointer) { std::ifstream file(path, std::ios::binary); if (!file.is_open()) { @@ -165,7 +165,7 @@ Elf::Elf(std::string path, char** imagePointer) { } } - *imagePointer = (char*)malloc(processImageSize_ * sizeof(char)); + *imagePointer = (uint8_t*)malloc(processImageSize_ * sizeof(char)); /** * The ELF Program header has a member called `p_type`, which represents * the kind of data or memory segments described by the program header. @@ -180,7 +180,8 @@ Elf::Elf(std::string path, char** imagePointer) { file.seekg(header.p_offset); // Read `p_filesz` bytes from `file` into the appropriate place in process // memory - file.read(*imagePointer + header.p_vaddr, header.p_filesz); + file.read(reinterpret_cast(*imagePointer + header.p_vaddr), + header.p_filesz); } } diff --git a/src/lib/arch/aarch64/Instruction_address.cc b/src/lib/arch/aarch64/Instruction_address.cc index da733f08db..a81921ddf1 100644 --- a/src/lib/arch/aarch64/Instruction_address.cc +++ b/src/lib/arch/aarch64/Instruction_address.cc @@ -1058,7 +1058,7 @@ span Instruction::generateAddresses() { // [{, xm}] // SME const uint16_t partition_num = VL_bits / 8; - const uint64_t* pg = + const auto pg = sourceValues_[partition_num + 1].getAsVector(); const uint64_t n = sourceValues_[partition_num + 2].get(); uint64_t m = 0; @@ -1104,7 +1104,7 @@ span Instruction::generateAddresses() { // [{, xm, lsl #1}] // SME const uint16_t partition_num = VL_bits / 16; - const uint64_t* pg = + const auto pg = sourceValues_[partition_num + 1].getAsVector(); const uint64_t n = sourceValues_[partition_num + 2].get(); uint64_t m = 0; @@ -1127,7 +1127,7 @@ span Instruction::generateAddresses() { // [{, xm, lsl #4}] // SME const uint16_t partition_num = VL_bits / 128; - const uint64_t* pg = + const auto pg = sourceValues_[partition_num + 1].getAsVector(); const uint64_t n = sourceValues_[partition_num + 2].get(); uint64_t m = 0; diff --git a/src/lib/arch/aarch64/Instruction_execute.cc b/src/lib/arch/aarch64/Instruction_execute.cc index d4c3a33093..39733a4758 100644 --- a/src/lib/arch/aarch64/Instruction_execute.cc +++ b/src/lib/arch/aarch64/Instruction_execute.cc @@ -115,16 +115,14 @@ void Instruction::execute() { if (!ZAenabled) return ZAdisabled(); const uint16_t rowCount = VL_bits / 64; - const uint64_t* pn = sourceValues_[rowCount].getAsVector(); - const uint64_t* pm = - sourceValues_[rowCount + 1].getAsVector(); - const uint64_t* zn = - sourceValues_[rowCount + 2].getAsVector(); + const auto pn = sourceValues_[rowCount].getAsVector(); + const auto pm = sourceValues_[rowCount + 1].getAsVector(); + const auto zn = sourceValues_[rowCount + 2].getAsVector(); for (uint16_t row = 0; row < rowCount; row++) { - const uint64_t* zaRow = sourceValues_[row].getAsVector(); + const auto zaRow = sourceValues_[row].getAsVector(); uint64_t out[32] = {0}; - std::memcpy(out, zaRow, rowCount * sizeof(uint64_t)); + std::memcpy(out, zaRow.ptr, rowCount * sizeof(uint64_t)); // Slice element is active IFF all of the following conditions hold: // - Element in 1st source pred corresponding to horizontal // slice is TRUE @@ -149,16 +147,14 @@ void Instruction::execute() { if (!ZAenabled) return ZAdisabled(); const uint16_t rowCount = VL_bits / 32; - const uint64_t* pn = sourceValues_[rowCount].getAsVector(); - const uint64_t* pm = - sourceValues_[rowCount + 1].getAsVector(); - const uint32_t* zn = - sourceValues_[rowCount + 2].getAsVector(); + const auto pn = sourceValues_[rowCount].getAsVector(); + const auto pm = sourceValues_[rowCount + 1].getAsVector(); + const auto zn = sourceValues_[rowCount + 2].getAsVector(); for (uint16_t row = 0; row < rowCount; row++) { - const uint32_t* zaRow = sourceValues_[row].getAsVector(); + const auto zaRow = sourceValues_[row].getAsVector(); uint32_t out[64] = {0}; - std::memcpy(out, zaRow, rowCount * sizeof(uint32_t)); + std::memcpy(out, zaRow.ptr, rowCount * sizeof(uint32_t)); // Slice element is active IFF all of the following conditions hold: // - Element in 1st source pred corresponding to horizontal // slice is TRUE @@ -183,16 +179,14 @@ void Instruction::execute() { if (!ZAenabled) return ZAdisabled(); const uint16_t rowCount = VL_bits / 64; - const uint64_t* pn = sourceValues_[rowCount].getAsVector(); - const uint64_t* pm = - sourceValues_[rowCount + 1].getAsVector(); - const uint64_t* zn = - sourceValues_[rowCount + 2].getAsVector(); + const auto pn = sourceValues_[rowCount].getAsVector(); + const auto pm = sourceValues_[rowCount + 1].getAsVector(); + const auto zn = sourceValues_[rowCount + 2].getAsVector(); for (uint16_t row = 0; row < rowCount; row++) { - const uint64_t* zaRow = sourceValues_[row].getAsVector(); + const auto zaRow = sourceValues_[row].getAsVector(); uint64_t out[32] = {0}; - std::memcpy(out, zaRow, rowCount * sizeof(uint64_t)); + std::memcpy(out, zaRow.ptr, rowCount * sizeof(uint64_t)); // Slice element is active IFF all of the following conditions hold: // - Corresponding element in 1st source pred is TRUE // - Element in 2nd source pred corresponding to vertical @@ -220,16 +214,14 @@ void Instruction::execute() { if (!ZAenabled) return ZAdisabled(); const uint16_t rowCount = VL_bits / 32; - const uint64_t* pn = sourceValues_[rowCount].getAsVector(); - const uint64_t* pm = - sourceValues_[rowCount + 1].getAsVector(); - const uint32_t* zn = - sourceValues_[rowCount + 2].getAsVector(); + const auto pn = sourceValues_[rowCount].getAsVector(); + const auto pm = sourceValues_[rowCount + 1].getAsVector(); + const auto zn = sourceValues_[rowCount + 2].getAsVector(); for (uint16_t row = 0; row < rowCount; row++) { - const uint32_t* zaRow = sourceValues_[row].getAsVector(); + const auto zaRow = sourceValues_[row].getAsVector(); uint32_t out[64] = {0}; - std::memcpy(out, zaRow, rowCount * sizeof(uint32_t)); + std::memcpy(out, zaRow.ptr, rowCount * sizeof(uint32_t)); // Slice element is active IFF all of the following conditions hold: // - Corresponding element in 1st source pred is TRUE // - Element in 2nd source pred corresponding to vertical @@ -1384,8 +1376,7 @@ void Instruction::execute() { static_cast( metadata_.operands[2].sme.slice_offset.imm)) % rowCount; - const auto zaRow = - sourceValues_[2 + sliceNum].getAsVector(); + const auto zaRow = sourceValues_[2 + sliceNum].getAsVector(); uint8_t out[256] = {0}; for (int elem = 0; elem < rowCount; elem++) { @@ -1406,15 +1397,14 @@ void Instruction::execute() { if (!ZAenabled) return ZAdisabled(); const uint16_t rowCount = VL_bits / 64; - const uint64_t* zd = sourceValues_[0].getAsVector(); - const uint64_t* pg = sourceValues_[1].getAsVector(); + const auto zd = sourceValues_[0].getAsVector(); + const auto pg = sourceValues_[1].getAsVector(); const uint32_t sliceNum = (sourceValues_[2 + rowCount].get() + static_cast( metadata_.operands[2].sme.slice_offset.imm)) % rowCount; - const uint64_t* zaRow = - sourceValues_[2 + sliceNum].getAsVector(); + const auto zaRow = sourceValues_[2 + sliceNum].getAsVector(); uint64_t out[32] = {0}; for (int elem = 0; elem < rowCount; elem++) { @@ -1435,15 +1425,14 @@ void Instruction::execute() { if (!ZAenabled) return ZAdisabled(); const uint16_t rowCount = VL_bits / 16; - const uint16_t* zd = sourceValues_[0].getAsVector(); - const uint64_t* pg = sourceValues_[1].getAsVector(); + const auto zd = sourceValues_[0].getAsVector(); + const auto pg = sourceValues_[1].getAsVector(); const uint32_t sliceNum = (sourceValues_[2 + rowCount].get() + static_cast( metadata_.operands[2].sme.slice_offset.imm)) % rowCount; - const uint16_t* zaRow = - sourceValues_[2 + sliceNum].getAsVector(); + const auto zaRow = sourceValues_[2 + sliceNum].getAsVector(); uint16_t out[128] = {0}; for (int elem = 0; elem < rowCount; elem++) { @@ -1464,13 +1453,12 @@ void Instruction::execute() { const uint16_t rowCount = VL_bits / 128; // Use uint64_t as no 128-bit - const uint64_t* zd = sourceValues_[0].getAsVector(); - const uint64_t* pg = sourceValues_[1].getAsVector(); + const auto zd = sourceValues_[0].getAsVector(); + const auto pg = sourceValues_[1].getAsVector(); const uint32_t sliceNum = sourceValues_[2 + rowCount].get() % rowCount; // Use uint64_t as no 128-bit - const uint64_t* zaRow = - sourceValues_[2 + sliceNum].getAsVector(); + const auto zaRow = sourceValues_[2 + sliceNum].getAsVector(); // Use uint64_t as no 128-bit uint64_t out[32] = {0}; @@ -1498,15 +1486,14 @@ void Instruction::execute() { if (!ZAenabled) return ZAdisabled(); const uint16_t rowCount = VL_bits / 32; - const uint32_t* zd = sourceValues_[0].getAsVector(); - const uint64_t* pg = sourceValues_[1].getAsVector(); + const auto zd = sourceValues_[0].getAsVector(); + const auto pg = sourceValues_[1].getAsVector(); const uint32_t sliceNum = (sourceValues_[2 + rowCount].get() + static_cast( metadata_.operands[2].sme.slice_offset.imm)) % rowCount; - const uint32_t* zaRow = - sourceValues_[2 + sliceNum].getAsVector(); + const auto zaRow = sourceValues_[2 + sliceNum].getAsVector(); uint32_t out[64] = {0}; for (int elem = 0; elem < rowCount; elem++) { @@ -1527,8 +1514,8 @@ void Instruction::execute() { if (!ZAenabled) return ZAdisabled(); const uint16_t rowCount = VL_bits / 8; - const uint8_t* zd = sourceValues_[0].getAsVector(); - const uint64_t* pg = sourceValues_[1].getAsVector(); + const auto zd = sourceValues_[0].getAsVector(); + const auto pg = sourceValues_[1].getAsVector(); const uint32_t sliceNum = (sourceValues_[2 + rowCount].get() + static_cast( @@ -1555,8 +1542,8 @@ void Instruction::execute() { if (!ZAenabled) return ZAdisabled(); const uint16_t rowCount = VL_bits / 64; - const uint64_t* zd = sourceValues_[0].getAsVector(); - const uint64_t* pg = sourceValues_[1].getAsVector(); + const auto zd = sourceValues_[0].getAsVector(); + const auto pg = sourceValues_[1].getAsVector(); const uint32_t sliceNum = (sourceValues_[2 + rowCount].get() + static_cast( @@ -1583,8 +1570,8 @@ void Instruction::execute() { if (!ZAenabled) return ZAdisabled(); const uint16_t rowCount = VL_bits / 16; - const uint16_t* zd = sourceValues_[0].getAsVector(); - const uint64_t* pg = sourceValues_[1].getAsVector(); + const auto zd = sourceValues_[0].getAsVector(); + const auto pg = sourceValues_[1].getAsVector(); const uint32_t sliceNum = (sourceValues_[2 + rowCount].get() + static_cast( @@ -1611,8 +1598,8 @@ void Instruction::execute() { const uint16_t rowCount = VL_bits / 128; // Use uint64_t as no 128-bit - const uint64_t* zd = sourceValues_[0].getAsVector(); - const uint64_t* pg = sourceValues_[1].getAsVector(); + const auto zd = sourceValues_[0].getAsVector(); + const auto pg = sourceValues_[1].getAsVector(); const uint32_t sliceNum = sourceValues_[2 + rowCount].get() % rowCount; @@ -1623,8 +1610,7 @@ void Instruction::execute() { uint64_t shifted_active = 1ull << ((elem % 4) * 16); if (pg[elem / 4] & shifted_active) { // Need to move two consecutive 64-bit elements - const uint64_t* zaRow = - sourceValues_[2 + elem].getAsVector(); + const auto zaRow = sourceValues_[2 + elem].getAsVector(); out[2 * elem] = zaRow[2 * sliceNum]; out[2 * elem + 1] = zaRow[2 * sliceNum + 1]; } else { @@ -1644,8 +1630,8 @@ void Instruction::execute() { if (!ZAenabled) return ZAdisabled(); const uint16_t rowCount = VL_bits / 32; - const uint32_t* zd = sourceValues_[0].getAsVector(); - const uint64_t* pg = sourceValues_[1].getAsVector(); + const auto zd = sourceValues_[0].getAsVector(); + const auto pg = sourceValues_[1].getAsVector(); const uint32_t sliceNum = (sourceValues_[2 + rowCount].get() + static_cast( @@ -2361,17 +2347,16 @@ void Instruction::execute() { if (!ZAenabled) return ZAdisabled(); const uint16_t rowCount = VL_bits / 64; - const uint64_t* pn = sourceValues_[rowCount].getAsVector(); - const uint64_t* pm = - sourceValues_[rowCount + 1].getAsVector(); - const double* zn = sourceValues_[rowCount + 2].getAsVector(); - const double* zm = sourceValues_[rowCount + 3].getAsVector(); + const auto pn = sourceValues_[rowCount].getAsVector(); + const auto pm = sourceValues_[rowCount + 1].getAsVector(); + const auto zn = sourceValues_[rowCount + 2].getAsVector(); + const auto zm = sourceValues_[rowCount + 3].getAsVector(); // zn is row, zm is col for (int row = 0; row < rowCount; row++) { double outRow[32] = {0}; uint64_t shifted_active_row = 1ull << ((row % 8) * 8); - const double* zadaRow = sourceValues_[row].getAsVector(); + const auto zadaRow = sourceValues_[row].getAsVector(); for (int col = 0; col < rowCount; col++) { double zadaElem = zadaRow[col]; uint64_t shifted_active_col = 1ull << ((col % 8) * 8); @@ -2393,17 +2378,16 @@ void Instruction::execute() { if (!ZAenabled) return ZAdisabled(); const uint16_t rowCount = VL_bits / 32; - const uint64_t* pn = sourceValues_[rowCount].getAsVector(); - const uint64_t* pm = - sourceValues_[rowCount + 1].getAsVector(); - const float* zn = sourceValues_[rowCount + 2].getAsVector(); - const float* zm = sourceValues_[rowCount + 3].getAsVector(); + const auto pn = sourceValues_[rowCount].getAsVector(); + const auto pm = sourceValues_[rowCount + 1].getAsVector(); + const auto zn = sourceValues_[rowCount + 2].getAsVector(); + const auto zm = sourceValues_[rowCount + 3].getAsVector(); // zn is row, zm is col for (int row = 0; row < rowCount; row++) { float outRow[64] = {0}; uint64_t shifted_active_row = 1ull << ((row % 16) * 4); - const float* zadaRow = sourceValues_[row].getAsVector(); + const auto zadaRow = sourceValues_[row].getAsVector(); for (int col = 0; col < rowCount; col++) { float zadaElem = zadaRow[col]; uint64_t shifted_active_col = 1ull << ((col % 16) * 4); @@ -3012,10 +2996,9 @@ void Instruction::execute() { static_cast( metadata_.operands[0].sme.slice_offset.imm)) % rowCount; - const uint8_t* zaRow = sourceValues_[sliceNum].getAsVector(); - const uint64_t* pg = - sourceValues_[rowCount + 1].getAsVector(); - const uint8_t* zn = sourceValues_[rowCount + 2].getAsVector(); + const auto zaRow = sourceValues_[sliceNum].getAsVector(); + const auto pg = sourceValues_[rowCount + 1].getAsVector(); + const auto zn = sourceValues_[rowCount + 2].getAsVector(); uint8_t out[256] = {0}; for (uint16_t elem = 0; elem < rowCount; elem++) { @@ -3045,11 +3028,9 @@ void Instruction::execute() { static_cast( metadata_.operands[0].sme.slice_offset.imm)) % rowCount; - const uint64_t* zaRow = sourceValues_[sliceNum].getAsVector(); - const uint64_t* pg = - sourceValues_[rowCount + 1].getAsVector(); - const uint64_t* zn = - sourceValues_[rowCount + 2].getAsVector(); + const auto zaRow = sourceValues_[sliceNum].getAsVector(); + const auto pg = sourceValues_[rowCount + 1].getAsVector(); + const auto zn = sourceValues_[rowCount + 2].getAsVector(); uint64_t out[32] = {0}; for (uint16_t elem = 0; elem < rowCount; elem++) { @@ -3079,11 +3060,9 @@ void Instruction::execute() { static_cast( metadata_.operands[0].sme.slice_offset.imm)) % rowCount; - const uint16_t* zaRow = sourceValues_[sliceNum].getAsVector(); - const uint64_t* pg = - sourceValues_[rowCount + 1].getAsVector(); - const uint16_t* zn = - sourceValues_[rowCount + 2].getAsVector(); + const auto zaRow = sourceValues_[sliceNum].getAsVector(); + const auto pg = sourceValues_[rowCount + 1].getAsVector(); + const auto zn = sourceValues_[rowCount + 2].getAsVector(); uint16_t out[128] = {0}; for (uint16_t elem = 0; elem < rowCount; elem++) { @@ -3110,13 +3089,11 @@ void Instruction::execute() { const uint32_t sliceNum = sourceValues_[rowCount].get() % rowCount; // Use uint64_t in place of 128-bit - const uint64_t* zaRow = sourceValues_[sliceNum].getAsVector(); + const auto zaRow = sourceValues_[sliceNum].getAsVector(); - const uint64_t* pg = - sourceValues_[rowCount + 1].getAsVector(); + const auto pg = sourceValues_[rowCount + 1].getAsVector(); // Use uint64_t in place of 128-bit - const uint64_t* zn = - sourceValues_[rowCount + 2].getAsVector(); + const auto zn = sourceValues_[rowCount + 2].getAsVector(); // Use uint64_t in place of 128-bit uint64_t out[32] = {0}; @@ -3153,11 +3130,9 @@ void Instruction::execute() { static_cast( metadata_.operands[0].sme.slice_offset.imm)) % rowCount; - const uint32_t* zaRow = sourceValues_[sliceNum].getAsVector(); - const uint64_t* pg = - sourceValues_[rowCount + 1].getAsVector(); - const uint32_t* zn = - sourceValues_[rowCount + 2].getAsVector(); + const auto zaRow = sourceValues_[sliceNum].getAsVector(); + const auto pg = sourceValues_[rowCount + 1].getAsVector(); + const auto zn = sourceValues_[rowCount + 2].getAsVector(); uint32_t out[64] = {0}; for (uint16_t elem = 0; elem < rowCount; elem++) { @@ -3187,14 +3162,13 @@ void Instruction::execute() { static_cast( metadata_.operands[0].sme.slice_offset.imm)) % rowCount; - const uint64_t* pg = - sourceValues_[rowCount + 1].getAsVector(); - const uint8_t* zn = sourceValues_[rowCount + 2].getAsVector(); + const auto pg = sourceValues_[rowCount + 1].getAsVector(); + const auto zn = sourceValues_[rowCount + 2].getAsVector(); for (uint16_t i = 0; i < rowCount; i++) { - const uint8_t* row = sourceValues_[i].getAsVector(); + const auto row = sourceValues_[i].getAsVector(); uint8_t out[256] = {0}; - memcpy(out, row, rowCount * sizeof(uint8_t)); + memcpy(out, row.ptr, rowCount * sizeof(uint8_t)); uint64_t shifted_active = 1ull << (i % 64); if (pg[i / 64] & shifted_active) out[sliceNum] = zn[i]; results_[i] = {out, 256}; @@ -3214,15 +3188,13 @@ void Instruction::execute() { static_cast( metadata_.operands[0].sme.slice_offset.imm)) % rowCount; - const uint64_t* pg = - sourceValues_[rowCount + 1].getAsVector(); - const uint64_t* zn = - sourceValues_[rowCount + 2].getAsVector(); + const auto pg = sourceValues_[rowCount + 1].getAsVector(); + const auto zn = sourceValues_[rowCount + 2].getAsVector(); for (uint16_t i = 0; i < rowCount; i++) { - const uint64_t* row = sourceValues_[i].getAsVector(); + const auto row = sourceValues_[i].getAsVector(); uint64_t out[32] = {0}; - memcpy(out, row, rowCount * sizeof(uint64_t)); + memcpy(out, row.ptr, rowCount * sizeof(uint64_t)); uint64_t shifted_active = 1ull << ((i % 8) * 8); if (pg[i / 8] & shifted_active) out[sliceNum] = zn[i]; results_[i] = {out, 256}; @@ -3242,15 +3214,13 @@ void Instruction::execute() { static_cast( metadata_.operands[0].sme.slice_offset.imm)) % rowCount; - const uint64_t* pg = - sourceValues_[rowCount + 1].getAsVector(); - const uint16_t* zn = - sourceValues_[rowCount + 2].getAsVector(); + const auto pg = sourceValues_[rowCount + 1].getAsVector(); + const auto zn = sourceValues_[rowCount + 2].getAsVector(); for (uint16_t i = 0; i < rowCount; i++) { - const uint16_t* row = sourceValues_[i].getAsVector(); + const auto row = sourceValues_[i].getAsVector(); uint16_t out[128] = {0}; - memcpy(out, row, rowCount * sizeof(uint16_t)); + memcpy(out, row.ptr, rowCount * sizeof(uint16_t)); uint64_t shifted_active = 1ull << ((i % 32) * 2); if (pg[i / 32] & shifted_active) out[sliceNum] = zn[i]; results_[i] = {out, 256}; @@ -3266,18 +3236,16 @@ void Instruction::execute() { const uint16_t rowCount = VL_bits / 128; const uint32_t sliceNum = sourceValues_[rowCount].get() % rowCount; - const uint64_t* pg = - sourceValues_[rowCount + 1].getAsVector(); + const auto pg = sourceValues_[rowCount + 1].getAsVector(); // Use uint64_t in place of 128-bit - const uint64_t* zn = - sourceValues_[rowCount + 2].getAsVector(); + const auto zn = sourceValues_[rowCount + 2].getAsVector(); for (uint16_t i = 0; i < rowCount; i++) { // Use uint64_t in place of 128-bit - const uint64_t* row = sourceValues_[i].getAsVector(); + const auto row = sourceValues_[i].getAsVector(); uint64_t out[32] = {0}; // *2 in memcpy as need 128-bit elements but using uint64_t - memcpy(out, row, rowCount * sizeof(uint64_t) * 2); + memcpy(out, row.ptr, rowCount * sizeof(uint64_t) * 2); // For 128-bit there are 16-bit for each active element uint64_t shifted_active = 1ull << ((i % 4) * 16); if (pg[i / 4] & shifted_active) { @@ -3302,15 +3270,13 @@ void Instruction::execute() { static_cast( metadata_.operands[0].sme.slice_offset.imm)) % rowCount; - const uint64_t* pg = - sourceValues_[rowCount + 1].getAsVector(); - const uint32_t* zn = - sourceValues_[rowCount + 2].getAsVector(); + const auto pg = sourceValues_[rowCount + 1].getAsVector(); + const auto zn = sourceValues_[rowCount + 2].getAsVector(); for (uint16_t i = 0; i < rowCount; i++) { - const uint32_t* row = sourceValues_[i].getAsVector(); + const auto row = sourceValues_[i].getAsVector(); uint32_t out[64] = {0}; - memcpy(out, row, rowCount * sizeof(uint32_t)); + memcpy(out, row.ptr, rowCount * sizeof(uint32_t)); uint64_t shifted_active = 1ull << ((i % 16) * 4); if (pg[i / 16] & shifted_active) out[sliceNum] = zn[i]; results_[i] = {out, 256}; @@ -3353,12 +3319,12 @@ void Instruction::execute() { const uint16_t partition_num = VL_bits / 8; const uint32_t ws = sourceValues_[partition_num].get(); - const uint64_t* pg = + const auto pg = sourceValues_[partition_num + 1].getAsVector(); const uint16_t sliceNum = (ws + metadata_.operands[0].sme.slice_offset.imm) % partition_num; - const uint8_t* data = memoryData_[0].getAsVector(); + const auto data = memoryData_[0].getAsVector(); uint8_t out[256] = {0}; for (int i = 0; i < partition_num; i++) { @@ -3424,7 +3390,7 @@ void Instruction::execute() { const uint32_t sliceNum = (ws + metadata_.operands[0].sme.slice_offset.imm) % partition_num; - const uint16_t* data = memoryData_[0].getAsVector(); + const auto data = memoryData_[0].getAsVector(); uint16_t out[128] = {0}; for (int i = 0; i < partition_num; i++) { @@ -3452,12 +3418,12 @@ void Instruction::execute() { const uint16_t partition_num = VL_bits / 128; const uint32_t ws = sourceValues_[partition_num].get(); - const uint64_t* pg = + const auto pg = sourceValues_[partition_num + 1].getAsVector(); const uint32_t sliceNum = ws % partition_num; // Use uint64_t as no 128-bit type - const uint64_t* data = memoryData_[0].getAsVector(); + const auto data = memoryData_[0].getAsVector(); // Use uint64_t as no 128-bit type uint64_t out[32] = {0}; @@ -3490,7 +3456,7 @@ void Instruction::execute() { const uint16_t partition_num = VL_bits / 32; const uint32_t ws = sourceValues_[partition_num].get(); - const uint64_t* pg = + const auto pg = sourceValues_[partition_num + 1].getAsVector(); const uint32_t sliceNum = @@ -3523,17 +3489,17 @@ void Instruction::execute() { const uint16_t partition_num = VL_bits / 8; const uint32_t ws = sourceValues_[partition_num].get(); - const uint64_t* pg = + const auto pg = sourceValues_[partition_num + 1].getAsVector(); const uint32_t sliceNum = (ws + metadata_.operands[0].sme.slice_offset.imm) % partition_num; - const uint8_t* data = memoryData_[0].getAsVector(); + const auto data = memoryData_[0].getAsVector(); for (int i = 0; i < partition_num; i++) { - const uint8_t* row = sourceValues_[i].getAsVector(); + const auto row = sourceValues_[i].getAsVector(); uint8_t out[256] = {0}; - memcpy(out, row, partition_num * sizeof(uint8_t)); + memcpy(out, row.ptr, partition_num * sizeof(uint8_t)); uint64_t shifted_active = 1ull << (i % 64); if (pg[i / 64] & shifted_active) { out[sliceNum] = data[i]; @@ -3550,17 +3516,17 @@ void Instruction::execute() { const uint16_t partition_num = VL_bits / 64; const uint32_t ws = sourceValues_[partition_num].get(); - const uint64_t* pg = + const auto pg = sourceValues_[partition_num + 1].getAsVector(); const uint32_t sliceNum = (ws + metadata_.operands[0].sme.slice_offset.imm) % partition_num; - const uint64_t* data = memoryData_[0].getAsVector(); + const auto data = memoryData_[0].getAsVector(); for (int i = 0; i < partition_num; i++) { - const uint64_t* row = sourceValues_[i].getAsVector(); + const auto row = sourceValues_[i].getAsVector(); uint64_t out[32] = {0}; - memcpy(out, row, partition_num * sizeof(uint64_t)); + memcpy(out, row.ptr, partition_num * sizeof(uint64_t)); uint64_t shifted_active = 1ull << ((i % 8) * 8); if (pg[i / 8] & shifted_active) { out[sliceNum] = data[i]; @@ -3577,17 +3543,17 @@ void Instruction::execute() { const uint16_t partition_num = VL_bits / 16; const uint32_t ws = sourceValues_[partition_num].get(); - const uint64_t* pg = + const auto pg = sourceValues_[partition_num + 1].getAsVector(); const uint32_t sliceNum = (ws + metadata_.operands[0].sme.slice_offset.imm) % partition_num; - const uint16_t* data = memoryData_[0].getAsVector(); + const auto data = memoryData_[0].getAsVector(); for (int i = 0; i < partition_num; i++) { - const uint16_t* row = sourceValues_[i].getAsVector(); + const auto row = sourceValues_[i].getAsVector(); uint16_t out[128] = {0}; - memcpy(out, row, partition_num * sizeof(uint16_t)); + memcpy(out, row.ptr, partition_num * sizeof(uint16_t)); uint64_t shifted_active = 1ull << ((i % 32) * 2); if (pg[i / 32] & shifted_active) { out[sliceNum] = data[i]; @@ -3604,19 +3570,19 @@ void Instruction::execute() { const uint16_t partition_num = VL_bits / 128; const uint32_t ws = sourceValues_[partition_num].get(); - const uint64_t* pg = + const auto pg = sourceValues_[partition_num + 1].getAsVector(); const uint32_t sliceNum = ws % partition_num; // Using uint64_t as no 128-bit data type - const uint64_t* data = memoryData_[0].getAsVector(); + const auto data = memoryData_[0].getAsVector(); for (int i = 0; i < partition_num; i++) { // Using uint64_t as no 128-bit data type - const uint64_t* row = sourceValues_[i].getAsVector(); + const auto row = sourceValues_[i].getAsVector(); uint64_t out[32] = {0}; // *2 in memcpy as need 128-bit but using uint64_t - memcpy(out, row, partition_num * sizeof(uint64_t) * 2); + memcpy(out, row.ptr, partition_num * sizeof(uint64_t) * 2); // For 128-bit there are 16-bit for each active element uint64_t shifted_active = 1ull << ((i % 4) * 16); if (pg[i / 4] & shifted_active) { @@ -3636,7 +3602,7 @@ void Instruction::execute() { const uint16_t partition_num = VL_bits / 32; const uint32_t ws = sourceValues_[partition_num].get(); - const uint64_t* pg = + const auto pg = sourceValues_[partition_num + 1].getAsVector(); const uint32_t sliceNum = @@ -3644,9 +3610,9 @@ void Instruction::execute() { const auto data = memoryData_[0].getAsVector(); for (int i = 0; i < partition_num; i++) { - const uint32_t* row = sourceValues_[i].getAsVector(); + const auto row = sourceValues_[i].getAsVector(); uint32_t out[64] = {0}; - memcpy(out, row, partition_num * sizeof(uint32_t)); + memcpy(out, row.ptr, partition_num * sizeof(uint32_t)); uint64_t shifted_active = 1ull << ((i % 16) * 4); if (pg[i / 16] & shifted_active) { out[sliceNum] = data[i]; @@ -4174,8 +4140,9 @@ void Instruction::execute() { // LOAD const auto p = sourceValues_[0].getAsVector(); const uint16_t partition_num = VL_bits / 64; - std::vector data = { - auto memoryData_[1].getAsVector()}; + std::vector> data = { + memoryData_[0].getAsVector(), + memoryData_[1].getAsVector()}; uint64_t out[2][32] = {{0}, {0}}; for (int i = 0; i < partition_num; i++) { @@ -4225,9 +4192,10 @@ void Instruction::execute() { // LOAD const auto p = sourceValues_[0].getAsVector(); const uint16_t partition_num = VL_bits / 64; - std::vector data = { - auto memoryData_[1].getAsVector(), - auto memoryData_[2].getAsVector()}; + std::vector> data = { + memoryData_[0].getAsVector(), + memoryData_[1].getAsVector(), + memoryData_[2].getAsVector()}; uint64_t out[3][32] = {{0}, {0}, {0}}; for (int i = 0; i < partition_num; i++) { @@ -4250,9 +4218,11 @@ void Instruction::execute() { // LOAD const auto p = sourceValues_[0].getAsVector(); const uint16_t partition_num = VL_bits / 64; - std::vector data = { - auto memoryData_[1].getAsVector(), - auto memoryData_[3].getAsVector()}; + std::vector> data = { + memoryData_[0].getAsVector(), + memoryData_[1].getAsVector(), + memoryData_[2].getAsVector(), + memoryData_[3].getAsVector()}; uint64_t out[4][32] = {{0}, {0}, {0}, {0}}; for (int i = 0; i < partition_num; i++) { @@ -4649,7 +4619,7 @@ void Instruction::execute() { wn + static_cast(metadata_.operands[0].sme.slice_offset.imm); - const uint8_t* data = memoryData_[0].getAsVector(); + const auto data = memoryData_[0].getAsVector(); uint8_t out[256] = {0}; for (uint16_t i = 0; i < rowCount; i++) { out[i] = data[i]; @@ -5333,17 +5303,17 @@ void Instruction::execute() { if (!ZAenabled) return ZAdisabled(); const uint16_t tileDim = VL_bits / 64; - const uint64_t* pn = sourceValues_[tileDim].getAsVector(); - const uint64_t* pm = sourceValues_[tileDim + 1].getAsVector(); - const int16_t* zn = sourceValues_[tileDim + 2].getAsVector(); - const int16_t* zm = sourceValues_[tileDim + 3].getAsVector(); + const auto pn = sourceValues_[tileDim].getAsVector(); + const auto pm = sourceValues_[tileDim + 1].getAsVector(); + const auto zn = sourceValues_[tileDim + 2].getAsVector(); + const auto zm = sourceValues_[tileDim + 3].getAsVector(); // zn is a SVLd x 4 sub matrix // zm is a 4 x SVLd sub matrix // Resulting SVLd x SVLd matrix has results widened to 64-bit for (int row = 0; row < tileDim; row++) { int64_t outRow[32] = {0}; - const int64_t* zadaRow = sourceValues_[row].getAsVector(); + const auto zadaRow = sourceValues_[row].getAsVector(); for (int col = 0; col < tileDim; col++) { // Get corresponding output element int64_t sum = zadaRow[col]; @@ -5371,17 +5341,17 @@ void Instruction::execute() { if (!ZAenabled) return ZAdisabled(); const uint16_t tileDim = VL_bits / 32; - const uint64_t* pn = sourceValues_[tileDim].getAsVector(); - const uint64_t* pm = sourceValues_[tileDim + 1].getAsVector(); - const int8_t* zn = sourceValues_[tileDim + 2].getAsVector(); - const int8_t* zm = sourceValues_[tileDim + 3].getAsVector(); + const auto pn = sourceValues_[tileDim].getAsVector(); + const auto pm = sourceValues_[tileDim + 1].getAsVector(); + const auto zn = sourceValues_[tileDim + 2].getAsVector(); + const auto zm = sourceValues_[tileDim + 3].getAsVector(); // zn is a SVLs x 4 sub matrix // zm is a 4 x SVLs sub matrix // Resulting SVLs x SVLs matrix has results widened to 32-bit for (int row = 0; row < tileDim; row++) { int32_t outRow[64] = {0}; - const int32_t* zadaRow = sourceValues_[row].getAsVector(); + const auto zadaRow = sourceValues_[row].getAsVector(); for (int col = 0; col < tileDim; col++) { // Get corresponding output element int32_t sum = zadaRow[col]; @@ -5409,17 +5379,17 @@ void Instruction::execute() { if (!ZAenabled) return ZAdisabled(); const uint16_t tileDim = VL_bits / 64; - const uint64_t* pn = sourceValues_[tileDim].getAsVector(); - const uint64_t* pm = sourceValues_[tileDim + 1].getAsVector(); - const int16_t* zn = sourceValues_[tileDim + 2].getAsVector(); - const int16_t* zm = sourceValues_[tileDim + 3].getAsVector(); + const auto pn = sourceValues_[tileDim].getAsVector(); + const auto pm = sourceValues_[tileDim + 1].getAsVector(); + const auto zn = sourceValues_[tileDim + 2].getAsVector(); + const auto zm = sourceValues_[tileDim + 3].getAsVector(); // zn is a SVLd x 4 sub matrix // zm is a 4 x SVLd sub matrix // Resulting SVLd x SVLd matrix has results widened to 64-bit for (int row = 0; row < tileDim; row++) { int64_t outRow[32] = {0}; - const int64_t* zadaRow = sourceValues_[row].getAsVector(); + const auto zadaRow = sourceValues_[row].getAsVector(); for (int col = 0; col < tileDim; col++) { // Get corresponding output element int64_t sum = zadaRow[col]; @@ -5447,17 +5417,17 @@ void Instruction::execute() { if (!ZAenabled) return ZAdisabled(); const uint16_t tileDim = VL_bits / 32; - const uint64_t* pn = sourceValues_[tileDim].getAsVector(); - const uint64_t* pm = sourceValues_[tileDim + 1].getAsVector(); - const int8_t* zn = sourceValues_[tileDim + 2].getAsVector(); - const int8_t* zm = sourceValues_[tileDim + 3].getAsVector(); + const auto pn = sourceValues_[tileDim].getAsVector(); + const auto pm = sourceValues_[tileDim + 1].getAsVector(); + const auto zn = sourceValues_[tileDim + 2].getAsVector(); + const auto zm = sourceValues_[tileDim + 3].getAsVector(); // zn is a SVLs x 4 sub matrix // zm is a 4 x SVLs sub matrix // Resulting SVLs x SVLs matrix has results widened to 32-bit for (int row = 0; row < tileDim; row++) { int32_t outRow[64] = {0}; - const int32_t* zadaRow = sourceValues_[row].getAsVector(); + const auto zadaRow = sourceValues_[row].getAsVector(); for (int col = 0; col < tileDim; col++) { // Get corresponding output element int32_t sum = zadaRow[col]; @@ -5594,14 +5564,13 @@ void Instruction::execute() { const uint16_t partition_num = VL_bits / 8; const uint32_t ws = sourceValues_[partition_num].get(); - const uint64_t* pg = + const auto pg = sourceValues_[partition_num + 1].getAsVector(); const uint32_t sliceNum = (ws + metadata_.operands[0].sme.slice_offset.imm) % partition_num; - const uint8_t* tileSlice = - sourceValues_[sliceNum].getAsVector(); + const auto tileSlice = sourceValues_[sliceNum].getAsVector(); memoryData_ = sve_merge_store_data(tileSlice, pg, VL_bits); break; } @@ -5613,14 +5582,14 @@ void Instruction::execute() { const uint16_t partition_num = VL_bits / 64; const uint32_t ws = sourceValues_[partition_num].get(); - const uint64_t* pg = - auto + 1].getAsVector(); + const auto pg = + sourceValues_[partition_num + 1].getAsVector(); const uint32_t sliceNum = (ws + metadata_.operands[0].sme.slice_offset.imm) % partition_num; - const uint64_t* tileSlice = - auto sourceValues_[sliceNum].getAsVector(); + const auto tileSlice = + sourceValues_[sliceNum].getAsVector(); memoryData_ = sve_merge_store_data(tileSlice, pg, VL_bits); break; } @@ -5632,14 +5601,13 @@ void Instruction::execute() { const uint16_t partition_num = VL_bits / 16; const uint32_t ws = sourceValues_[partition_num].get(); - const uint64_t* pg = + const auto pg = sourceValues_[partition_num + 1].getAsVector(); const uint32_t sliceNum = (ws + metadata_.operands[0].sme.slice_offset.imm) % partition_num; - const uint16_t* tileSlice = - sourceValues_[sliceNum].getAsVector(); + const auto tileSlice = sourceValues_[sliceNum].getAsVector(); memoryData_ = sve_merge_store_data(tileSlice, pg, VL_bits); break; } @@ -5651,14 +5619,13 @@ void Instruction::execute() { const uint16_t partition_num = VL_bits / 128; const uint32_t ws = sourceValues_[partition_num].get(); - const uint64_t* pg = + const auto pg = sourceValues_[partition_num + 1].getAsVector(); const uint32_t sliceNum = ws % partition_num; // Using uint64_t as no 128-bit type - const uint64_t* tileSlice = - sourceValues_[sliceNum].getAsVector(); + const auto tileSlice = sourceValues_[sliceNum].getAsVector(); // Need to combine active adjacent elements into RegisterValues and // place into each memoryData_ index. @@ -5674,14 +5641,14 @@ void Instruction::execute() { } else if (memData.size() > 0) { // Predicate false, save current data memoryData_[index] = RegisterValue( - (char*)memData.data(), memData.size() * sizeof(uint64_t)); + reinterpret_cast(memData.data()), memData.size() * sizeof(uint64_t)); index++; memData.clear(); } } // Check if final data needs putting into memoryData_ if (memData.size() > 0) { - memoryData_[index] = RegisterValue((char*)memData.data(), + memoryData_[index] = RegisterValue(reinterpret_cast(memData.data()), memData.size() * sizeof(uint64_t)); } break; @@ -5694,14 +5661,13 @@ void Instruction::execute() { const uint16_t partition_num = VL_bits / 32; const uint32_t ws = sourceValues_[partition_num].get(); - const uint64_t* pg = + const auto pg = sourceValues_[partition_num + 1].getAsVector(); const uint32_t sliceNum = (ws + metadata_.operands[0].sme.slice_offset.imm) % partition_num; - const uint32_t* tileSlice = - sourceValues_[sliceNum].getAsVector(); + const auto tileSlice = sourceValues_[sliceNum].getAsVector(); memoryData_ = sve_merge_store_data(tileSlice, pg, VL_bits); break; } @@ -5713,7 +5679,7 @@ void Instruction::execute() { const uint16_t partition_num = VL_bits / 8; const uint32_t ws = sourceValues_[partition_num].get(); - const uint64_t* pg = + const auto pg = sourceValues_[partition_num + 1].getAsVector(); const uint32_t sliceNum = @@ -5729,7 +5695,7 @@ void Instruction::execute() { sourceValues_[x].getAsVector()[sliceNum]); } else if (memData.size() > 0) { memoryData_[index] = - RegisterValue((char*)memData.data(), memData.size()); + RegisterValue(reinterpret_cast(memData.data()), memData.size()); index++; memData.clear(); } @@ -5737,7 +5703,7 @@ void Instruction::execute() { if (memData.size() > 0) { memoryData_[index] = - RegisterValue((char*)memData.data(), memData.size()); + RegisterValue(reinterpret_cast(memData.data()), memData.size()); } break; } @@ -5749,8 +5715,8 @@ void Instruction::execute() { const uint16_t partition_num = VL_bits / 64; const uint32_t ws = sourceValues_[partition_num].get(); - const uint64_t* pg = - auto + 1].getAsVector(); + const auto pg = + sourceValues_[partition_num + 1].getAsVector(); const uint32_t sliceNum = (ws + metadata_.operands[0].sme.slice_offset.imm) % partition_num; @@ -5765,7 +5731,7 @@ void Instruction::execute() { sourceValues_[x].getAsVector()[sliceNum]); } else if (memData.size() > 0) { memoryData_[index] = - RegisterValue((char*)memData.data(), memData.size() * 8); + RegisterValue(reinterpret_cast(memData.data()), memData.size() * 8); index++; memData.clear(); } @@ -5773,7 +5739,7 @@ void Instruction::execute() { if (memData.size() > 0) { memoryData_[index] = - RegisterValue((char*)memData.data(), memData.size() * 8); + RegisterValue(reinterpret_cast(memData.data()), memData.size() * 8); } break; } @@ -5785,8 +5751,8 @@ void Instruction::execute() { const uint16_t partition_num = VL_bits / 16; const uint32_t ws = sourceValues_[partition_num].get(); - const uint64_t* pg = - auto + 1].getAsVector(); + const auto pg = + sourceValues_[partition_num + 1].getAsVector(); const uint32_t sliceNum = (ws + metadata_.operands[0].sme.slice_offset.imm) % partition_num; @@ -5801,7 +5767,7 @@ void Instruction::execute() { sourceValues_[x].getAsVector()[sliceNum]); } else if (memData.size() > 0) { memoryData_[index] = - RegisterValue((char*)memData.data(), memData.size() * 2); + RegisterValue(reinterpret_cast(memData.data()), memData.size() * 2); index++; memData.clear(); } @@ -5809,7 +5775,7 @@ void Instruction::execute() { if (memData.size() > 0) { memoryData_[index] = - RegisterValue((char*)memData.data(), memData.size() * 2); + RegisterValue(reinterpret_cast(memData.data()), memData.size() * 2); } break; } @@ -5821,7 +5787,7 @@ void Instruction::execute() { const uint16_t partition_num = VL_bits / 128; const uint32_t ws = sourceValues_[partition_num].get(); - const uint64_t* pg = + const auto pg = sourceValues_[partition_num + 1].getAsVector(); const uint32_t sliceNum = ws % partition_num; @@ -5842,7 +5808,7 @@ void Instruction::execute() { } else if (memData.size() > 0) { // Predicate false, save current data memoryData_[index] = RegisterValue( - (char*)memData.data(), memData.size() * sizeof(uint64_t)); + reinterpret_cast(memData.data()), memData.size() * sizeof(uint64_t)); index++; memData.clear(); } @@ -5850,7 +5816,7 @@ void Instruction::execute() { // Check if final data needs putting into memoryData_ if (memData.size() > 0) { - memoryData_[index] = RegisterValue((char*)memData.data(), + memoryData_[index] = RegisterValue(reinterpret_cast(memData.data()), memData.size() * sizeof(uint64_t)); } break; @@ -5863,8 +5829,8 @@ void Instruction::execute() { const uint16_t partition_num = VL_bits / 32; const uint32_t ws = sourceValues_[partition_num].get(); - const uint64_t* pg = - auto + 1].getAsVector(); + const auto pg = + sourceValues_[partition_num + 1].getAsVector(); const uint32_t sliceNum = (ws + metadata_.operands[0].sme.slice_offset.imm) % partition_num; @@ -5879,7 +5845,7 @@ void Instruction::execute() { sourceValues_[x].getAsVector()[sliceNum]); } else if (memData.size() > 0) { memoryData_[index] = - RegisterValue((char*)memData.data(), memData.size() * 4); + RegisterValue(reinterpret_cast(memData.data()), memData.size() * 4); index++; memData.clear(); } @@ -5887,7 +5853,7 @@ void Instruction::execute() { if (memData.size() > 0) { memoryData_[index] = - RegisterValue((char*)memData.data(), memData.size() * 4); + RegisterValue(reinterpret_cast(memData.data()), memData.size() * 4); } break; } @@ -5960,7 +5926,7 @@ void Instruction::execute() { // STORE for (int i = 0; i < 4; i++) { memoryData_[i] = - auto RegisterValue((char*)sourceValues_[i].getAsVector(), + RegisterValue(sourceValues_[i].getAsVector().ptr, 16 * sizeof(uint8_t)); } break; @@ -5971,7 +5937,7 @@ void Instruction::execute() { // STORE for (int i = 0; i < 4; i++) { memoryData_[i] = - auto RegisterValue((char*)sourceValues_[i].getAsVector(), + RegisterValue(sourceValues_[i].getAsVector().ptr, 16 * sizeof(uint8_t)); } // if #imm post-index, value can only be 64 @@ -5986,8 +5952,8 @@ void Instruction::execute() { // vt4.2d}, [xn|sp] // STORE for (int i = 0; i < 4; i++) { - memoryData_[i] = auto RegisterValue( - (char*)sourceValues_[i].getAsVector(), + memoryData_[i] = RegisterValue( + sourceValues_[i].getAsVector().ptr, 2 * sizeof(uint64_t)); } break; @@ -5996,9 +5962,9 @@ void Instruction::execute() { // vt4.2d}, [xn|sp], <#imm|xm> // STORE for (int i = 0; i < 4; i++) { - memoryData_[i] = RegisterValue( - sourceValues_[i].getAsVector().ptr, - 2 * sizeof(uint64_t)); + memoryData_[i] = + RegisterValue(sourceValues_[i].getAsVector().ptr, + 2 * sizeof(uint64_t)); } // if #imm post-index, value can only be 64 const uint64_t postIndex = @@ -6012,9 +5978,9 @@ void Instruction::execute() { // vt4.2s}, [xn|sp], <#imm|xm> // STORE for (int i = 0; i < 4; i++) { - memoryData_[i] = RegisterValue( - sourceValues_[i].getAsVector().ptr, - 2 * sizeof(uint32_t)); + memoryData_[i] = + RegisterValue(sourceValues_[i].getAsVector().ptr, + 2 * sizeof(uint32_t)); } // if #imm post-index, value can only be 32 const uint64_t postIndex = @@ -6028,9 +5994,9 @@ void Instruction::execute() { // vt4.4s}, [xn|sp] // STORE for (int i = 0; i < 4; i++) { - memoryData_[i] = RegisterValue( - sourceValues_[i].getAsVector().ptr, - 4 * sizeof(uint32_t)); + memoryData_[i] = + RegisterValue(sourceValues_[i].getAsVector().ptr, + 4 * sizeof(uint32_t)); } break; } @@ -6038,9 +6004,9 @@ void Instruction::execute() { // vt4.4s}, [xn|sp], <#imm|xm> // STORE for (int i = 0; i < 4; i++) { - memoryData_[i] = RegisterValue( - sourceValues_[i].getAsVector().ptr, - 4 * sizeof(uint32_t)); + memoryData_[i] = + RegisterValue(sourceValues_[i].getAsVector().ptr, + 4 * sizeof(uint32_t)); } // if #imm post-index, value can only be 64 const uint64_t postIndex = @@ -6054,8 +6020,8 @@ void Instruction::execute() { // STORE const auto t = sourceValues_[0].getAsVector(); const auto t2 = sourceValues_[1].getAsVector(); - memoryData_[0] = RegisterValue((char*)t, 16 * sizeof(uint8_t)); - memoryData_[1] = RegisterValue((char*)t2, 16 * sizeof(uint8_t)); + memoryData_[0] = RegisterValue(t.ptr, 16 * sizeof(uint8_t)); + memoryData_[1] = RegisterValue(t2.ptr, 16 * sizeof(uint8_t)); break; } case Opcode::AArch64_ST1Twov16b_POST: { // st1 {vt.16b, vt2.16b}, @@ -6063,8 +6029,8 @@ void Instruction::execute() { // STORE const auto t = sourceValues_[0].getAsVector(); const auto t2 = sourceValues_[1].getAsVector(); - memoryData_[0] = RegisterValue((char*)t, 16 * sizeof(uint8_t)); - memoryData_[1] = RegisterValue((char*)t2, 16 * sizeof(uint8_t)); + memoryData_[0] = RegisterValue(t.ptr, 16 * sizeof(uint8_t)); + memoryData_[1] = RegisterValue(t2.ptr, 16 * sizeof(uint8_t)); // if #imm post-index, value can only be 32 const uint64_t postIndex = @@ -6078,8 +6044,8 @@ void Instruction::execute() { // STORE const auto t = sourceValues_[0].getAsVector(); const auto t2 = sourceValues_[1].getAsVector(); - memoryData_[0] = RegisterValue((char*)t, 2 * sizeof(uint64_t)); - memoryData_[1] = RegisterValue((char*)t2, 2 * sizeof(uint64_t)); + memoryData_[0] = RegisterValue(t.ptr, 2 * sizeof(uint64_t)); + memoryData_[1] = RegisterValue(t2.ptr, 2 * sizeof(uint64_t)); break; } case Opcode::AArch64_ST1Twov2d_POST: { // st1 {vt.2d, vt2.2d}, @@ -6087,8 +6053,8 @@ void Instruction::execute() { // STORE const auto t = sourceValues_[0].getAsVector(); const auto t2 = sourceValues_[1].getAsVector(); - memoryData_[0] = RegisterValue((char*)t, 2 * sizeof(uint64_t)); - memoryData_[1] = RegisterValue((char*)t2, 2 * sizeof(uint64_t)); + memoryData_[0] = RegisterValue(t.ptr, 2 * sizeof(uint64_t)); + memoryData_[1] = RegisterValue(t2.ptr, 2 * sizeof(uint64_t)); // if #imm post-index, value can only be 32 const uint64_t postIndex = @@ -6102,8 +6068,8 @@ void Instruction::execute() { // STORE const auto t = sourceValues_[0].getAsVector(); const auto t2 = sourceValues_[1].getAsVector(); - memoryData_[0] = RegisterValue((char*)t, 4 * sizeof(uint32_t)); - memoryData_[1] = RegisterValue((char*)t2, 4 * sizeof(uint32_t)); + memoryData_[0] = RegisterValue(t.ptr, 4 * sizeof(uint32_t)); + memoryData_[1] = RegisterValue(t2.ptr, 4 * sizeof(uint32_t)); break; } case Opcode::AArch64_ST1Twov4s_POST: { // st1 {vt.4s, vt2.4s}, @@ -6111,8 +6077,8 @@ void Instruction::execute() { // STORE const auto t = sourceValues_[0].getAsVector(); const auto t2 = sourceValues_[1].getAsVector(); - memoryData_[0] = RegisterValue((char*)t, 4 * sizeof(uint32_t)); - memoryData_[1] = RegisterValue((char*)t2, 4 * sizeof(uint32_t)); + memoryData_[0] = RegisterValue(t.ptr, 4 * sizeof(uint32_t)); + memoryData_[1] = RegisterValue(t2.ptr, 4 * sizeof(uint32_t)); // if #imm post-index, value can only be 32 const uint64_t postIndex = @@ -6243,13 +6209,13 @@ void Instruction::execute() { } else if (inActiveBlock) { inActiveBlock = false; memoryData_[index] = RegisterValue( - (char*)memData.data(), sizeof(uint64_t) * memData.size()); + reinterpret_cast(memData.data()), sizeof(uint64_t) * memData.size()); index++; } } // Add final block if needed if (inActiveBlock) - memoryData_[index] = RegisterValue((char*)memData.data(), + memoryData_[index] = RegisterValue(reinterpret_cast(memData.data()), sizeof(uint64_t) * memData.size()); break; @@ -6261,8 +6227,8 @@ void Instruction::execute() { const auto t2 = sourceValues_[1].getAsVector(); std::vector m1 = {t1[0], t2[0], t1[1], t2[1]}; std::vector m2 = {t1[2], t2[2], t1[3], t2[3]}; - memoryData_[0] = RegisterValue((char*)m1.data(), 4 * sizeof(float)); - memoryData_[1] = RegisterValue((char*)m2.data(), 4 * sizeof(float)); + memoryData_[0] = RegisterValue(reinterpret_cast(m1.data()), 4 * sizeof(float)); + memoryData_[1] = RegisterValue(reinterpret_cast(m2.data()), 4 * sizeof(float)); // if #imm post-index, value can only be 32 const uint64_t postIndex = (metadata_.operands[3].type == AARCH64_OP_REG) @@ -6467,7 +6433,7 @@ void Instruction::execute() { const uint64_t PL_bits = VL_bits / 8; const uint16_t partition_num = PL_bits / 8; const auto p = sourceValues_[0].getAsVector(); - memoryData_[0] = RegisterValue((char*)p, partition_num); + memoryData_[0] = RegisterValue(p.ptr, partition_num); break; } case Opcode::AArch64_STR_ZA: { // str za[wv, #imm], [xn|sp{, #imm, mul @@ -6480,16 +6446,16 @@ void Instruction::execute() { const uint32_t wv = sourceValues_[zaRowCount].get(); const uint32_t imm = metadata_.operands[0].sme.slice_offset.imm; - const uint8_t* zaRow = + const auto zaRow = sourceValues_[(wv + imm) % zaRowCount].getAsVector(); - memoryData_[0] = RegisterValue((char*)zaRow, zaRowCount); + memoryData_[0] = RegisterValue(zaRow.ptr, zaRowCount); break; } case Opcode::AArch64_STR_ZXI: { // str zt, [xn{, #imm, mul vl}] // STORE const uint16_t partition_num = VL_bits / 8; const auto z = sourceValues_[0].getAsVector(); - memoryData_[0] = RegisterValue((char*)z, partition_num); + memoryData_[0] = RegisterValue(z.ptr, partition_num); break; } case Opcode::AArch64_STURBBi: { // sturb wd, [xn, #imm] @@ -6676,17 +6642,17 @@ void Instruction::execute() { if (!ZAenabled) return ZAdisabled(); const uint16_t tileDim = VL_bits / 64; - const uint64_t* pn = sourceValues_[tileDim].getAsVector(); - const uint64_t* pm = sourceValues_[tileDim + 1].getAsVector(); - const int16_t* zn = sourceValues_[tileDim + 2].getAsVector(); - const uint16_t* zm = sourceValues_[tileDim + 3].getAsVector(); + const auto pn = sourceValues_[tileDim].getAsVector(); + const auto pm = sourceValues_[tileDim + 1].getAsVector(); + const auto zn = sourceValues_[tileDim + 2].getAsVector(); + const auto zm = sourceValues_[tileDim + 3].getAsVector(); // zn is a SVLd x 4 sub matrix // zm is a 4 x SVLd sub matrix // Resulting SVLd x SVLd matrix has results widened to 64-bit for (int row = 0; row < tileDim; row++) { int64_t outRow[32] = {0}; - const int64_t* zadaRow = sourceValues_[row].getAsVector(); + const auto zadaRow = sourceValues_[row].getAsVector(); for (int col = 0; col < tileDim; col++) { // Get corresponding output element int64_t sum = zadaRow[col]; @@ -6714,17 +6680,17 @@ void Instruction::execute() { if (!ZAenabled) return ZAdisabled(); const uint16_t tileDim = VL_bits / 32; - const uint64_t* pn = sourceValues_[tileDim].getAsVector(); - const uint64_t* pm = sourceValues_[tileDim + 1].getAsVector(); - const int8_t* zn = sourceValues_[tileDim + 2].getAsVector(); - const uint8_t* zm = sourceValues_[tileDim + 3].getAsVector(); + const auto pn = sourceValues_[tileDim].getAsVector(); + const auto pm = sourceValues_[tileDim + 1].getAsVector(); + const auto zn = sourceValues_[tileDim + 2].getAsVector(); + const auto zm = sourceValues_[tileDim + 3].getAsVector(); // zn is a SVLs x 4 sub matrix // zm is a 4 x SVLs sub matrix // Resulting SVLs x SVLs matrix has results widened to 32-bit for (int row = 0; row < tileDim; row++) { int32_t outRow[64] = {0}; - const int32_t* zadaRow = sourceValues_[row].getAsVector(); + const auto zadaRow = sourceValues_[row].getAsVector(); for (int col = 0; col < tileDim; col++) { // Get corresponding output element int32_t sum = zadaRow[col]; @@ -6752,17 +6718,17 @@ void Instruction::execute() { if (!ZAenabled) return ZAdisabled(); const uint16_t tileDim = VL_bits / 64; - const uint64_t* pn = sourceValues_[tileDim].getAsVector(); - const uint64_t* pm = sourceValues_[tileDim + 1].getAsVector(); - const int16_t* zn = sourceValues_[tileDim + 2].getAsVector(); - const uint16_t* zm = sourceValues_[tileDim + 3].getAsVector(); + const auto pn = sourceValues_[tileDim].getAsVector(); + const auto pm = sourceValues_[tileDim + 1].getAsVector(); + const auto zn = sourceValues_[tileDim + 2].getAsVector(); + const auto zm = sourceValues_[tileDim + 3].getAsVector(); // zn is a SVLd x 4 sub matrix // zm is a 4 x SVLd sub matrix // Resulting SVLd x SVLd matrix has results widened to 64-bit for (int row = 0; row < tileDim; row++) { int64_t outRow[32] = {0}; - const int64_t* zadaRow = sourceValues_[row].getAsVector(); + const auto zadaRow = sourceValues_[row].getAsVector(); for (int col = 0; col < tileDim; col++) { // Get corresponding output element int64_t sum = zadaRow[col]; @@ -6790,17 +6756,17 @@ void Instruction::execute() { if (!ZAenabled) return ZAdisabled(); const uint16_t tileDim = VL_bits / 32; - const uint64_t* pn = sourceValues_[tileDim].getAsVector(); - const uint64_t* pm = sourceValues_[tileDim + 1].getAsVector(); - const int8_t* zn = sourceValues_[tileDim + 2].getAsVector(); - const uint8_t* zm = sourceValues_[tileDim + 3].getAsVector(); + const auto pn = sourceValues_[tileDim].getAsVector(); + const auto pm = sourceValues_[tileDim + 1].getAsVector(); + const auto zn = sourceValues_[tileDim + 2].getAsVector(); + const auto zm = sourceValues_[tileDim + 3].getAsVector(); // zn is a SVLs x 4 sub matrix // zm is a 4 x SVLs sub matrix // Resulting SVLs x SVLs matrix has results widened to 32-bit for (int row = 0; row < tileDim; row++) { int32_t outRow[64] = {0}; - const int32_t* zadaRow = sourceValues_[row].getAsVector(); + const auto zadaRow = sourceValues_[row].getAsVector(); for (int col = 0; col < tileDim; col++) { // Get corresponding output element int32_t sum = zadaRow[col]; @@ -7080,17 +7046,17 @@ void Instruction::execute() { if (!ZAenabled) return ZAdisabled(); const uint16_t tileDim = VL_bits / 64; - const uint64_t* pn = sourceValues_[tileDim].getAsVector(); - const uint64_t* pm = sourceValues_[tileDim + 1].getAsVector(); - const uint16_t* zn = sourceValues_[tileDim + 2].getAsVector(); - const uint16_t* zm = sourceValues_[tileDim + 3].getAsVector(); + const auto pn = sourceValues_[tileDim].getAsVector(); + const auto pm = sourceValues_[tileDim + 1].getAsVector(); + const auto zn = sourceValues_[tileDim + 2].getAsVector(); + const auto zm = sourceValues_[tileDim + 3].getAsVector(); // zn is a SVLd x 4 sub matrix // zm is a 4 x SVLd sub matrix // Resulting SVLd x SVLd matrix has results widened to 64-bit for (int row = 0; row < tileDim; row++) { uint64_t outRow[32] = {0}; - const uint64_t* zadaRow = sourceValues_[row].getAsVector(); + const auto zadaRow = sourceValues_[row].getAsVector(); for (int col = 0; col < tileDim; col++) { // Get corresponding output element uint64_t sum = zadaRow[col]; @@ -7118,17 +7084,17 @@ void Instruction::execute() { if (!ZAenabled) return ZAdisabled(); const uint16_t tileDim = VL_bits / 32; - const uint64_t* pn = sourceValues_[tileDim].getAsVector(); - const uint64_t* pm = sourceValues_[tileDim + 1].getAsVector(); - const uint8_t* zn = sourceValues_[tileDim + 2].getAsVector(); - const uint8_t* zm = sourceValues_[tileDim + 3].getAsVector(); + const auto pn = sourceValues_[tileDim].getAsVector(); + const auto pm = sourceValues_[tileDim + 1].getAsVector(); + const auto zn = sourceValues_[tileDim + 2].getAsVector(); + const auto zm = sourceValues_[tileDim + 3].getAsVector(); // zn is a SVLs x 4 sub matrix // zm is a 4 x SVLs sub matrix // Resulting SVLs x SVLs matrix has results widened to 32-bit for (int row = 0; row < tileDim; row++) { uint32_t outRow[64] = {0}; - const uint32_t* zadaRow = sourceValues_[row].getAsVector(); + const auto zadaRow = sourceValues_[row].getAsVector(); for (int col = 0; col < tileDim; col++) { // Get corresponding output element uint32_t sum = zadaRow[col]; @@ -7156,17 +7122,17 @@ void Instruction::execute() { if (!ZAenabled) return ZAdisabled(); const uint16_t tileDim = VL_bits / 64; - const uint64_t* pn = sourceValues_[tileDim].getAsVector(); - const uint64_t* pm = sourceValues_[tileDim + 1].getAsVector(); - const uint16_t* zn = sourceValues_[tileDim + 2].getAsVector(); - const uint16_t* zm = sourceValues_[tileDim + 3].getAsVector(); + const auto pn = sourceValues_[tileDim].getAsVector(); + const auto pm = sourceValues_[tileDim + 1].getAsVector(); + const auto zn = sourceValues_[tileDim + 2].getAsVector(); + const auto zm = sourceValues_[tileDim + 3].getAsVector(); // zn is a SVLd x 4 sub matrix // zm is a 4 x SVLd sub matrix // Resulting SVLd x SVLd matrix has results widened to 64-bit for (int row = 0; row < tileDim; row++) { uint64_t outRow[32] = {0}; - const uint64_t* zadaRow = sourceValues_[row].getAsVector(); + const auto zadaRow = sourceValues_[row].getAsVector(); for (int col = 0; col < tileDim; col++) { // Get corresponding output element uint64_t sum = zadaRow[col]; @@ -7194,17 +7160,17 @@ void Instruction::execute() { if (!ZAenabled) return ZAdisabled(); const uint16_t tileDim = VL_bits / 32; - const uint64_t* pn = sourceValues_[tileDim].getAsVector(); - const uint64_t* pm = sourceValues_[tileDim + 1].getAsVector(); - const uint8_t* zn = sourceValues_[tileDim + 2].getAsVector(); - const uint8_t* zm = sourceValues_[tileDim + 3].getAsVector(); + const auto pn = sourceValues_[tileDim].getAsVector(); + const auto pm = sourceValues_[tileDim + 1].getAsVector(); + const auto zn = sourceValues_[tileDim + 2].getAsVector(); + const auto zm = sourceValues_[tileDim + 3].getAsVector(); // zn is a SVLs x 4 sub matrix // zm is a 4 x SVLs sub matrix // Resulting SVLs x SVLs matrix has results widened to 32-bit for (int row = 0; row < tileDim; row++) { uint32_t outRow[64] = {0}; - const uint32_t* zadaRow = sourceValues_[row].getAsVector(); + const auto zadaRow = sourceValues_[row].getAsVector(); for (int col = 0; col < tileDim; col++) { // Get corresponding output element uint32_t sum = zadaRow[col]; @@ -7299,17 +7265,17 @@ void Instruction::execute() { if (!ZAenabled) return ZAdisabled(); const uint16_t tileDim = VL_bits / 64; - const uint64_t* pn = sourceValues_[tileDim].getAsVector(); - const uint64_t* pm = sourceValues_[tileDim + 1].getAsVector(); - const uint16_t* zn = sourceValues_[tileDim + 2].getAsVector(); - const int16_t* zm = sourceValues_[tileDim + 3].getAsVector(); + const auto pn = sourceValues_[tileDim].getAsVector(); + const auto pm = sourceValues_[tileDim + 1].getAsVector(); + const auto zn = sourceValues_[tileDim + 2].getAsVector(); + const auto zm = sourceValues_[tileDim + 3].getAsVector(); // zn is a SVLd x 4 sub matrix // zm is a 4 x SVLd sub matrix // Resulting SVLd x SVLd matrix has results widened to 64-bit for (int row = 0; row < tileDim; row++) { int64_t outRow[32] = {0}; - const int64_t* zadaRow = sourceValues_[row].getAsVector(); + const auto zadaRow = sourceValues_[row].getAsVector(); for (int col = 0; col < tileDim; col++) { // Get corresponding output element int64_t sum = zadaRow[col]; @@ -7337,17 +7303,17 @@ void Instruction::execute() { if (!ZAenabled) return ZAdisabled(); const uint16_t tileDim = VL_bits / 32; - const uint64_t* pn = sourceValues_[tileDim].getAsVector(); - const uint64_t* pm = sourceValues_[tileDim + 1].getAsVector(); - const uint8_t* zn = sourceValues_[tileDim + 2].getAsVector(); - const int8_t* zm = sourceValues_[tileDim + 3].getAsVector(); + const auto pn = sourceValues_[tileDim].getAsVector(); + const auto pm = sourceValues_[tileDim + 1].getAsVector(); + const auto zn = sourceValues_[tileDim + 2].getAsVector(); + const auto zm = sourceValues_[tileDim + 3].getAsVector(); // zn is a SVLs x 4 sub matrix // zm is a 4 x SVLs sub matrix // Resulting SVLs x SVLs matrix has results widened to 32-bit for (int row = 0; row < tileDim; row++) { int32_t outRow[64] = {0}; - const int32_t* zadaRow = sourceValues_[row].getAsVector(); + const auto zadaRow = sourceValues_[row].getAsVector(); for (int col = 0; col < tileDim; col++) { // Get corresponding output element int32_t sum = zadaRow[col]; @@ -7375,17 +7341,17 @@ void Instruction::execute() { if (!ZAenabled) return ZAdisabled(); const uint16_t tileDim = VL_bits / 64; - const uint64_t* pn = sourceValues_[tileDim].getAsVector(); - const uint64_t* pm = sourceValues_[tileDim + 1].getAsVector(); - const uint16_t* zn = sourceValues_[tileDim + 2].getAsVector(); - const int16_t* zm = sourceValues_[tileDim + 3].getAsVector(); + const auto pn = sourceValues_[tileDim].getAsVector(); + const auto pm = sourceValues_[tileDim + 1].getAsVector(); + const auto zn = sourceValues_[tileDim + 2].getAsVector(); + const auto zm = sourceValues_[tileDim + 3].getAsVector(); // zn is a SVLd x 4 sub matrix // zm is a 4 x SVLd sub matrix // Resulting SVLd x SVLd matrix has results widened to 64-bit for (int row = 0; row < tileDim; row++) { int64_t outRow[32] = {0}; - const int64_t* zadaRow = sourceValues_[row].getAsVector(); + const auto zadaRow = sourceValues_[row].getAsVector(); for (int col = 0; col < tileDim; col++) { // Get corresponding output element int64_t sum = zadaRow[col]; @@ -7413,17 +7379,17 @@ void Instruction::execute() { if (!ZAenabled) return ZAdisabled(); const uint16_t tileDim = VL_bits / 32; - const uint64_t* pn = sourceValues_[tileDim].getAsVector(); - const uint64_t* pm = sourceValues_[tileDim + 1].getAsVector(); - const uint8_t* zn = sourceValues_[tileDim + 2].getAsVector(); - const int8_t* zm = sourceValues_[tileDim + 3].getAsVector(); + const auto pn = sourceValues_[tileDim].getAsVector(); + const auto pm = sourceValues_[tileDim + 1].getAsVector(); + const auto zn = sourceValues_[tileDim + 2].getAsVector(); + const auto zm = sourceValues_[tileDim + 3].getAsVector(); // zn is a SVLs x 4 sub matrix // zm is a 4 x SVLs sub matrix // Resulting SVLs x SVLs matrix has results widened to 32-bit for (int row = 0; row < tileDim; row++) { int32_t outRow[64] = {0}; - const int32_t* zadaRow = sourceValues_[row].getAsVector(); + const auto zadaRow = sourceValues_[row].getAsVector(); for (int col = 0; col < tileDim; col++) { // Get corresponding output element int32_t sum = zadaRow[col]; diff --git a/src/lib/kernel/Linux.cc b/src/lib/kernel/Linux.cc index 780867cdec..a841022422 100644 --- a/src/lib/kernel/Linux.cc +++ b/src/lib/kernel/Linux.cc @@ -190,6 +190,9 @@ int64_t Linux::newfstatat(int64_t dfd, const std::string& filename, stat& out, struct ::stat statbuf; int64_t retval = ::fstatat(hostDfd, new_pathname.c_str(), &statbuf, flag); + std::cout << "retval = " << retval << " errno = " << errno << std::endl; + + // Copy results to output struct out.dev = statbuf.st_dev; out.ino = statbuf.st_ino; diff --git a/src/lib/kernel/LinuxProcess.cc b/src/lib/kernel/LinuxProcess.cc index 795a6e5a8a..ee36ec6fa6 100644 --- a/src/lib/kernel/LinuxProcess.cc +++ b/src/lib/kernel/LinuxProcess.cc @@ -23,7 +23,7 @@ LinuxProcess::LinuxProcess(const std::vector& commandLine, commandLine_(commandLine) { // Parse ELF file assert(commandLine.size() > 0); - char* unwrappedProcImgPtr; + uint8_t* unwrappedProcImgPtr; Elf elf(commandLine[0], &unwrappedProcImgPtr); if (!elf.isValid()) { return; @@ -47,7 +47,8 @@ LinuxProcess::LinuxProcess(const std::vector& commandLine, // Calculate process image size, including heap + stack size_ = heapStart_ + HEAP_SIZE + STACK_SIZE; - char* temp = (char*)realloc(unwrappedProcImgPtr, size_ * sizeof(char)); + uint8_t* temp = + (uint8_t*)realloc(unwrappedProcImgPtr, size_ * sizeof(uint8_t)); if (temp == NULL) { free(unwrappedProcImgPtr); std::cerr << "[SimEng:LinuxProcess] ProcessImage cannot be constructed " @@ -59,7 +60,7 @@ LinuxProcess::LinuxProcess(const std::vector& commandLine, unwrappedProcImgPtr = temp; createStack(&unwrappedProcImgPtr); - processImage_ = std::shared_ptr(unwrappedProcImgPtr, free); + processImage_ = std::shared_ptr(unwrappedProcImgPtr, free); } LinuxProcess::LinuxProcess(span instructions, @@ -81,11 +82,11 @@ LinuxProcess::LinuxProcess(span instructions, alignToBoundary(heapStart_ + (HEAP_SIZE + STACK_SIZE) / 2, pageSize_); size_ = heapStart_ + HEAP_SIZE + STACK_SIZE; - char* unwrappedProcImgPtr = (char*)calloc(size_, sizeof(char)); + uint8_t* unwrappedProcImgPtr = (uint8_t*)calloc(size_, sizeof(uint8_t)); std::copy(instructions.begin(), instructions.end(), unwrappedProcImgPtr); createStack(&unwrappedProcImgPtr); - processImage_ = std::shared_ptr(unwrappedProcImgPtr, free); + processImage_ = std::shared_ptr(unwrappedProcImgPtr, free); } LinuxProcess::~LinuxProcess() {} @@ -102,8 +103,8 @@ std::string LinuxProcess::getPath() const { return commandLine_[0]; } bool LinuxProcess::isValid() const { return isValid_; } -std::shared_ptr LinuxProcess::getProcessImage() const { - return std::shared_ptr(processImage_); +std::shared_ptr LinuxProcess::getProcessImage() const { + return std::shared_ptr(processImage_); } uint64_t LinuxProcess::getProcessImageSize() const { return size_; } @@ -112,7 +113,7 @@ uint64_t LinuxProcess::getEntryPoint() const { return entryPoint_; } uint64_t LinuxProcess::getInitialStackPointer() const { return stackPointer_; } -void LinuxProcess::createStack(char** processImage) { +void LinuxProcess::createStack(uint8_t** processImage) { // Decrement the stack pointer and populate with initial stack state // (https://www.win.tue.nl/~aeb/linux/hh/stack-layout.html) // The argv and env strings are added to the top of the stack first and the @@ -128,7 +129,7 @@ void LinuxProcess::createStack(char** processImage) { // Program arguments (argc, argv[]) initialStackFrame.push_back(commandLine_.size()); // argc for (size_t i = 0; i < commandLine_.size(); i++) { - char* argvi = commandLine_[i].data(); + uint8_t* argvi = reinterpret_cast(commandLine_[i].data()); for (size_t j = 0; j < commandLine_[i].size(); j++) { stringBytes.push_back(argvi[j]); } @@ -193,7 +194,8 @@ void LinuxProcess::createStack(char** processImage) { stackPointer_ -= stackOffset; // Copy initial stack frame to process memory - char* stackFrameBytes = reinterpret_cast(initialStackFrame.data()); + uint8_t* stackFrameBytes = + reinterpret_cast(initialStackFrame.data()); std::copy(stackFrameBytes, stackFrameBytes + stackFrameSize, (*processImage) + stackPointer_); } diff --git a/test/regression/RegressionTest.cc b/test/regression/RegressionTest.cc index 4d2de4e440..3ed570fab4 100644 --- a/test/regression/RegressionTest.cc +++ b/test/regression/RegressionTest.cc @@ -56,7 +56,7 @@ void RegressionTest::createArchitecture(const char* source, const char* triple, // This instance of procImgPtr pointer needs to be shared because // getMemoryValue in RegressionTest.hh uses reference to the class // member processMemory_ - std::shared_ptr procImgPtr = process_->getProcessImage(); + std::shared_ptr procImgPtr = process_->getProcessImage(); processMemory_ = procImgPtr.get(); // Populate the heap with initial data (specified by the test being run) diff --git a/test/regression/RegressionTest.hh b/test/regression/RegressionTest.hh index 9dfc94ad30..c16bf2cf8d 100644 --- a/test/regression/RegressionTest.hh +++ b/test/regression/RegressionTest.hh @@ -124,7 +124,7 @@ class RegressionTest std::unique_ptr process_; /** The process memory. */ - char* processMemory_ = nullptr; + uint8_t* processMemory_ = nullptr; /** The output written to stdout during the test. */ std::string stdout_; diff --git a/test/regression/aarch64/Syscall.cc b/test/regression/aarch64/Syscall.cc index 0866c278e2..c46e761241 100644 --- a/test/regression/aarch64/Syscall.cc +++ b/test/regression/aarch64/Syscall.cc @@ -446,14 +446,14 @@ TEST_P(Syscall, file_read) { // Check result of readv operations const char refReadv[] = "ABCD\0UV\0EFGH\0\0\0\0MNOPQRST"; - char* dataReadv = processMemory_ + process_->getHeapStart(); + uint8_t* dataReadv = processMemory_ + process_->getHeapStart(); for (size_t i = 0; i < strlen(refReadv); i++) { EXPECT_EQ(dataReadv[i], refReadv[i]) << "at index i=" << i << '\n'; } // Check result of read operation const char refRead[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; - char* dataRead = processMemory_ + process_->getInitialStackPointer() - 64; + uint8_t* dataRead = processMemory_ + process_->getInitialStackPointer() - 64; for (size_t i = 0; i < strlen(refRead); i++) { EXPECT_EQ(dataRead[i], refRead[i]) << "at index i=" << i << '\n'; } @@ -619,7 +619,7 @@ TEST_P(Syscall, readlinkat) { )"); EXPECT_EQ(getGeneralRegister(0), reference.size()); - char* data = processMemory_ + process_->getHeapStart() + 15; + uint8_t* data = processMemory_ + process_->getHeapStart() + 15; for (size_t i = 0; i < reference.size(); i++) { EXPECT_EQ(data[i], reference.c_str()[i]) << "at index i=" << i << '\n'; } @@ -1107,7 +1107,7 @@ TEST_P(Syscall, uname) { EXPECT_EQ(getGeneralRegister(21), 0); // Check utsname struct in memory - char* data = processMemory_ + process_->getHeapStart(); + uint8_t* data = processMemory_ + process_->getHeapStart(); const char sysname[] = "Linux"; for (size_t i = 0; i < strlen(sysname); i++) EXPECT_EQ(data[i], sysname[i]); diff --git a/test/unit/FixedLatencyMemoryInterfaceTest.cc b/test/unit/FixedLatencyMemoryInterfaceTest.cc index e2cc28c2fd..ba2a47ecf5 100644 --- a/test/unit/FixedLatencyMemoryInterfaceTest.cc +++ b/test/unit/FixedLatencyMemoryInterfaceTest.cc @@ -11,8 +11,8 @@ class FixedLatencyMemoryInterfaceTest protected: static constexpr uint16_t memorySize = 4; - std::array memoryData = {(char)0xFE, (char)0xCA, (char)0xBA, - (char)0xAB}; + std::array memoryData = {static_cast(0xFE), static_cast(0xCA), static_cast(0xBA), + static_cast(0xAB)}; simeng::RegisterValue value = {0xDEADBEEF, 4}; simeng::RegisterValue value_oversized = {0xDEADBEEFDEADBEEF, 8}; diff --git a/test/unit/FlatMemoryInterfaceTest.cc b/test/unit/FlatMemoryInterfaceTest.cc index e04b895179..81c1cd8721 100644 --- a/test/unit/FlatMemoryInterfaceTest.cc +++ b/test/unit/FlatMemoryInterfaceTest.cc @@ -9,8 +9,9 @@ class FlatMemoryInterfaceTest : public testing::Test { protected: static constexpr uint16_t memorySize = 4; - std::array memoryData = {(char)0xFE, (char)0xCA, (char)0xBA, - (char)0xAB}; + std::array memoryData = { + static_cast(0xFE), static_cast(0xCA), static_cast(0xBA), + static_cast(0xAB)}; simeng::RegisterValue value = {0xDEADBEEF, 4}; simeng::RegisterValue value_oversized = {0xDEADBEEFDEADBEEF, 8}; From 7ac1fc3c78c8d6b455aee5bdd5b21bd3cb772b98 Mon Sep 17 00:00:00 2001 From: dANW34V3R Date: Tue, 13 May 2025 12:53:53 +0100 Subject: [PATCH 08/16] Fix incorrect register in newfstatat test --- src/lib/kernel/Linux.cc | 3 --- test/regression/aarch64/Syscall.cc | 2 +- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/src/lib/kernel/Linux.cc b/src/lib/kernel/Linux.cc index a841022422..780867cdec 100644 --- a/src/lib/kernel/Linux.cc +++ b/src/lib/kernel/Linux.cc @@ -190,9 +190,6 @@ int64_t Linux::newfstatat(int64_t dfd, const std::string& filename, stat& out, struct ::stat statbuf; int64_t retval = ::fstatat(hostDfd, new_pathname.c_str(), &statbuf, flag); - std::cout << "retval = " << retval << " errno = " << errno << std::endl; - - // Copy results to output struct out.dev = statbuf.st_dev; out.ino = statbuf.st_ino; diff --git a/test/regression/aarch64/Syscall.cc b/test/regression/aarch64/Syscall.cc index c46e761241..79fb3cca7f 100644 --- a/test/regression/aarch64/Syscall.cc +++ b/test/regression/aarch64/Syscall.cc @@ -771,7 +771,7 @@ TEST_P(Syscall, newfstatat) { ::fstatat(AT_FDCWD, filepath, &statbufRef, 0); // Check fstatat returned 0 - EXPECT_EQ(getGeneralRegister(27), 0); + EXPECT_EQ(getGeneralRegister(21), 0); // Check fstatat buf matches reference EXPECT_EQ(getMemoryValue(process_->getHeapStart()), From 73837127725631b88e8b2a302e354ec6ed25f96e Mon Sep 17 00:00:00 2001 From: dANW34V3R Date: Tue, 13 May 2025 16:43:02 +0100 Subject: [PATCH 09/16] Fix various issues --- src/include/simeng/RegisterFileSet.hh | 1 + src/include/simeng/RegisterValue.hh | 8 +++++++- src/lib/RegisterFileSet.cc | 1 + test/regression/RegressionTest.cc | 2 +- test/regression/riscv/Syscall.cc | 8 ++++---- test/unit/ArchitecturalRegisterFileSetTest.cc | 15 ++++++++------ test/unit/ElfTest.cc | 2 +- test/unit/RegisterFileSetTest.cc | 18 ++++++++++------- test/unit/RegisterValueTest.cc | 2 +- test/unit/aarch64/ArchitectureTest.cc | 12 ++++++++--- test/unit/aarch64/ExceptionHandlerTest.cc | 10 +++++----- test/unit/aarch64/InstructionTest.cc | 10 +++++----- test/unit/pipeline/DispatchIssueUnitTest.cc | 4 ++-- .../pipeline/MappedRegisterFileSetTest.cc | 4 ++-- test/unit/riscv/ArchitectureTest.cc | 3 ++- test/unit/riscv/ExceptionHandlerTest.cc | 8 ++++---- test/unit/riscv/InstructionTest.cc | 20 +++++++++---------- 17 files changed, 75 insertions(+), 53 deletions(-) diff --git a/src/include/simeng/RegisterFileSet.hh b/src/include/simeng/RegisterFileSet.hh index 89f768d11a..1232c761c3 100644 --- a/src/include/simeng/RegisterFileSet.hh +++ b/src/include/simeng/RegisterFileSet.hh @@ -24,6 +24,7 @@ struct RegisterFileStructure { class RegisterFileSet { public: /** Constructs a set of register files, defined by `registerFileStructures`. + * Initialisation can't be assumed to be 0. */ RegisterFileSet(std::vector registerFileStructures); diff --git a/src/include/simeng/RegisterValue.hh b/src/include/simeng/RegisterValue.hh index 058705fc2e..53e2907c6b 100644 --- a/src/include/simeng/RegisterValue.hh +++ b/src/include/simeng/RegisterValue.hh @@ -4,6 +4,7 @@ #include #include #include +#include #include #include "simeng/Pool.hh" @@ -50,7 +51,12 @@ class RegisterValue { // T* view = reinterpret_cast(this->localValue); // view[0] = value; - memcpy(this->localValue, &value, bytes); + size_t numBytesToCopy = bytes; + if (bytes > sizeof(T)) { + numBytesToCopy = sizeof(T); + } + + memcpy(this->localValue, &value, numBytesToCopy); // if (bytes > sizeof(T)) { // // Zero the remaining bytes not set by the provided value diff --git a/src/lib/RegisterFileSet.cc b/src/lib/RegisterFileSet.cc index a195af1bca..3546da72d1 100644 --- a/src/lib/RegisterFileSet.cc +++ b/src/lib/RegisterFileSet.cc @@ -9,6 +9,7 @@ RegisterFileSet::RegisterFileSet( : registerFiles(registerFileStructures.size()) { for (size_t type = 0; type < registerFileStructures.size(); type++) { const auto& structure = registerFileStructures[type]; + // Initialisation won't always be 0 registerFiles[type] = std::vector( structure.quantity, RegisterValue(0, structure.bytes)); } diff --git a/test/regression/RegressionTest.cc b/test/regression/RegressionTest.cc index 3ed570fab4..8974d4d185 100644 --- a/test/regression/RegressionTest.cc +++ b/test/regression/RegressionTest.cc @@ -274,7 +274,7 @@ void RegressionTest::assemble(const char* source, const char* triple, options)); ASSERT_NE(asmParser, nullptr) << "Failed to create LLVM target asm parser"; asmParser->setTargetParser(*targetAsmParser); - + // Run asm parser to generate assembled object code ASSERT_FALSE(asmParser->Run(false)); diff --git a/test/regression/riscv/Syscall.cc b/test/regression/riscv/Syscall.cc index c40fc5754a..34e8aac490 100644 --- a/test/regression/riscv/Syscall.cc +++ b/test/regression/riscv/Syscall.cc @@ -452,14 +452,14 @@ TEST_P(Syscall, file_read) { // Check result of readv operations const char refReadv[] = "ABCD\0UV\0EFGH\0\0\0\0MNOPQRST"; - char* dataReadv = processMemory_ + process_->getHeapStart(); + uint8_t* dataReadv = processMemory_ + process_->getHeapStart(); for (size_t i = 0; i < strlen(refReadv); i++) { EXPECT_EQ(dataReadv[i], refReadv[i]) << "at index i=" << i << '\n'; } // Check result of read operation const char refRead[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; - char* dataRead = processMemory_ + process_->getInitialStackPointer() - 64; + uint8_t* dataRead = processMemory_ + process_->getInitialStackPointer() - 64; for (size_t i = 0; i < strlen(refRead); i++) { EXPECT_EQ(dataRead[i], refRead[i]) << "at index i=" << i << '\n'; } @@ -630,7 +630,7 @@ TEST_P(Syscall, readlinkat) { )"); EXPECT_EQ(getGeneralRegister(10), reference.size()); - char* data = processMemory_ + process_->getHeapStart() + 15; + uint8_t* data = processMemory_ + process_->getHeapStart() + 15; for (size_t i = 0; i < reference.size(); i++) { EXPECT_EQ(data[i], reference.c_str()[i]) << "at index i=" << i << '\n'; } @@ -1134,7 +1134,7 @@ TEST_P(Syscall, uname) { EXPECT_EQ(getGeneralRegister(6), 0); // Check utsname struct in memory - char* data = processMemory_ + process_->getHeapStart(); + uint8_t* data = processMemory_ + process_->getHeapStart(); const char sysname[] = "Linux"; for (size_t i = 0; i < strlen(sysname); i++) EXPECT_EQ(data[i], sysname[i]); diff --git a/test/unit/ArchitecturalRegisterFileSetTest.cc b/test/unit/ArchitecturalRegisterFileSetTest.cc index 1529ef1cea..f1bd978184 100644 --- a/test/unit/ArchitecturalRegisterFileSetTest.cc +++ b/test/unit/ArchitecturalRegisterFileSetTest.cc @@ -26,14 +26,17 @@ TEST_F(ArchitecturalRegisterFileSetTest, readWrite) { const Register r0 = {i, 0}; const Register rMax = {i, maxRegTag}; - EXPECT_EQ(archRegFileSet.get(r0), RegisterValue(0, regSize)); - EXPECT_EQ(archRegFileSet.get(rMax), RegisterValue(0, regSize)); + archRegFileSet.set(r0, RegisterValue(0ull, regSize)); + archRegFileSet.set(rMax, RegisterValue(0ull, regSize)); - archRegFileSet.set(r0, RegisterValue(20, regSize)); - archRegFileSet.set(rMax, RegisterValue(40, regSize)); + EXPECT_EQ(archRegFileSet.get(r0), RegisterValue(0ull, regSize)); + EXPECT_EQ(archRegFileSet.get(rMax), RegisterValue(0ull, regSize)); - EXPECT_EQ(archRegFileSet.get(r0), RegisterValue(20, regSize)); - EXPECT_EQ(archRegFileSet.get(rMax), RegisterValue(40, regSize)); + archRegFileSet.set(r0, RegisterValue(20ull, regSize)); + archRegFileSet.set(rMax, RegisterValue(40ull, regSize)); + + EXPECT_EQ(archRegFileSet.get(r0), RegisterValue(20ull, regSize)); + EXPECT_EQ(archRegFileSet.get(rMax), RegisterValue(40ull, regSize)); } } diff --git a/test/unit/ElfTest.cc b/test/unit/ElfTest.cc index 9635304bf3..dcd5890d71 100644 --- a/test/unit/ElfTest.cc +++ b/test/unit/ElfTest.cc @@ -22,7 +22,7 @@ class ElfTest : public testing::Test { const uint64_t known_phdrTableAddress = 4194368; const uint64_t known_processImageSize = 5040480; - char* unwrappedProcImgPtr; + uint8_t* unwrappedProcImgPtr; }; // Test that a valid ELF file can be created diff --git a/test/unit/RegisterFileSetTest.cc b/test/unit/RegisterFileSetTest.cc index ed8485eb61..572f565ac9 100644 --- a/test/unit/RegisterFileSetTest.cc +++ b/test/unit/RegisterFileSetTest.cc @@ -16,11 +16,12 @@ class RegisterFileSetTest : public ::testing::Test { }; // Ensure RegisterFileSet is constructed correctly +// TODO THIS WILL FAIL AS REGS DON'T GET SET TO 0 WHEN INITIALISED TEST_F(RegisterFileSetTest, validConstruction) { for (uint8_t i = 0; i < regFileStruct.size(); i++) { for (uint16_t j = 0; j < regFileStruct[i].quantity; j++) { const Register reg = {i, j}; - EXPECT_EQ(regFileSet.get(reg), RegisterValue(0, regFileStruct[i].bytes)); + EXPECT_EQ(regFileSet.get(reg), RegisterValue(0ull, regFileStruct[i].bytes)); } } } @@ -33,14 +34,17 @@ TEST_F(RegisterFileSetTest, readWrite) { const Register r0 = {i, 0}; const Register rMax = {i, maxRegTag}; - EXPECT_EQ(regFileSet.get(r0), RegisterValue(0, regSize)); - EXPECT_EQ(regFileSet.get(rMax), RegisterValue(0, regSize)); + regFileSet.set(r0, RegisterValue(0ull, regSize)); + regFileSet.set(rMax, RegisterValue(0ull, regSize)); - regFileSet.set(r0, RegisterValue(20, regSize)); - regFileSet.set(rMax, RegisterValue(40, regSize)); + EXPECT_EQ(regFileSet.get(r0), RegisterValue(0ull, regSize)); + EXPECT_EQ(regFileSet.get(rMax), RegisterValue(0ull, regSize)); - EXPECT_EQ(regFileSet.get(r0), RegisterValue(20, regSize)); - EXPECT_EQ(regFileSet.get(rMax), RegisterValue(40, regSize)); + regFileSet.set(r0, RegisterValue(20ull, regSize)); + regFileSet.set(rMax, RegisterValue(40ull, regSize)); + + EXPECT_EQ(regFileSet.get(r0), RegisterValue(20ull, regSize)); + EXPECT_EQ(regFileSet.get(rMax), RegisterValue(40ull, regSize)); } } diff --git a/test/unit/RegisterValueTest.cc b/test/unit/RegisterValueTest.cc index e382b63d22..ad7c40f211 100644 --- a/test/unit/RegisterValueTest.cc +++ b/test/unit/RegisterValueTest.cc @@ -26,7 +26,7 @@ TEST(RegisterValueTest, Cast) { TEST(RegisterValueTest, MismatchedSizesZeroed) { uint32_t value = 0; auto registerValue = simeng::RegisterValue(value, 8); - EXPECT_EQ(registerValue.get(), 0); + EXPECT_EQ(registerValue.get(), 0ull); } // Tests that low bits of stored values can be read correctly diff --git a/test/unit/aarch64/ArchitectureTest.cc b/test/unit/aarch64/ArchitectureTest.cc index 8f2619a283..ab59cc4498 100644 --- a/test/unit/aarch64/ArchitectureTest.cc +++ b/test/unit/aarch64/ArchitectureTest.cc @@ -165,7 +165,7 @@ TEST_F(AArch64ArchitectureTest, getInitialState) { {RegisterType::SYSTEM, (uint16_t)arch->getSystemRegisterTag(AARCH64_SYSREG_DCZID_EL0)}}; std::vector regVals = {{kernel.getInitialStackPointer(), 8}, - {20, 8}}; + {20ull, 8}}; arch::ProcessStateChange changes = arch->getInitialState(); EXPECT_EQ(changes.type, arch::ChangeType::REPLACEMENT); @@ -187,7 +187,13 @@ TEST_F(AArch64ArchitectureTest, getStreamingVectorLength) { } TEST_F(AArch64ArchitectureTest, updateSystemTimerRegisters) { - RegisterFileSet regFile = config::SimInfo::getArchRegStruct(); + auto regFile = RegisterFileSet(config::SimInfo::getArchRegStruct()); + + // Ensure registers start at 0 + regFile.set({RegisterType::SYSTEM, (uint16_t)arch->getSystemRegisterTag( + AARCH64_SYSREG_CNTVCT_EL0)}, {0ull,8}); + regFile.set({RegisterType::SYSTEM, (uint16_t)arch->getSystemRegisterTag( + AARCH64_SYSREG_PMCCNTR_EL0)}, {0ull,8}); uint8_t vctCount = 0; // In A64FX, Timer frequency = (2.5 * 1e9) / (100 * 1e6) = 18 @@ -211,7 +217,7 @@ TEST_F(AArch64ArchitectureTest, updateSystemTimerRegisters) { .get({RegisterType::SYSTEM, (uint16_t)arch->getSystemRegisterTag( AARCH64_SYSREG_CNTVCT_EL0)}) .get(), - vctCount); + static_cast(vctCount)); } } diff --git a/test/unit/aarch64/ExceptionHandlerTest.cc b/test/unit/aarch64/ExceptionHandlerTest.cc index 26e6f8dc7a..4710df1422 100644 --- a/test/unit/aarch64/ExceptionHandlerTest.cc +++ b/test/unit/aarch64/ExceptionHandlerTest.cc @@ -57,7 +57,7 @@ class AArch64ExceptionHandlerTest : public ::testing::Test { // - InstructionException::SMZAUpdate // All system calls are tested in /test/regression/aarch64/Syscall.cc -// Test that a syscall is processed sucessfully +// Test that a syscall is processed successfully TEST_F(AArch64ExceptionHandlerTest, testSyscall) { // Create "syscall" instruction uint64_t insnAddr = 0x4; @@ -70,8 +70,8 @@ TEST_F(AArch64ExceptionHandlerTest, testSyscall) { insn->setInstructionAddress(insnAddr); // Setup register file for `uname` syscall (chosen as minimal functionality) - archRegFileSet.set(R0, RegisterValue(1234, 8)); - archRegFileSet.set(R8, RegisterValue(160, 8)); + archRegFileSet.set(R0, RegisterValue(1234ull, 8)); + archRegFileSet.set(R8, RegisterValue(160ull, 8)); // Create ExceptionHandler ExceptionHandler handler(insn, core, memory, kernel); @@ -308,8 +308,8 @@ TEST_F(AArch64ExceptionHandlerTest, readBufferThen) { uint64_t length = 192; // Initialise data to "read" from MockMemory - std::vector dataVec(length, 'q'); - std::vector dataVec2(length, 'q'); + std::vector dataVec(length, 'q'); + std::vector dataVec2(length, 'q'); // Initialise the two required targets (128-bytes per read request in // readBufferThen()) memory::MemoryAccessTarget tar1 = {ptr, 128}; diff --git a/test/unit/aarch64/InstructionTest.cc b/test/unit/aarch64/InstructionTest.cc index 8d4b0d87f6..67dae9cd77 100644 --- a/test/unit/aarch64/InstructionTest.cc +++ b/test/unit/aarch64/InstructionTest.cc @@ -411,7 +411,7 @@ TEST_F(AArch64InstructionTest, supplyData) { // Supply needed operands EXPECT_FALSE(insn.isOperandReady(0)); - RegisterValue addr = {0x480, 8}; + RegisterValue addr = {0x480ull, 8}; insn.supplyOperand(0, addr); EXPECT_TRUE(insn.isOperandReady(0)); @@ -455,7 +455,7 @@ TEST_F(AArch64InstructionTest, supplyData_dataAbort) { // Supply needed operands EXPECT_FALSE(insn.isOperandReady(0)); - RegisterValue addr = {0x480, 8}; + RegisterValue addr = {0x480ull, 8}; insn.supplyOperand(0, addr); EXPECT_TRUE(insn.isOperandReady(0)); @@ -483,7 +483,7 @@ TEST_F(AArch64InstructionTest, correctPred_taken) { insn.setInstructionAddress(80); // Check initial state of an instruction's branch related options - BranchPrediction pred = {false, 0}; + BranchPrediction pred = {false, 0ull}; bool matchingPred = (insn.getBranchPrediction() == pred); EXPECT_TRUE(matchingPred); EXPECT_FALSE(insn.wasBranchTaken()); @@ -495,7 +495,7 @@ TEST_F(AArch64InstructionTest, correctPred_taken) { pred = {true, 80 + 0x28}; insn.setBranchPrediction(pred); matchingPred = (insn.getBranchPrediction() == pred); - insn.supplyOperand(0, RegisterValue(0, 8)); + insn.supplyOperand(0, RegisterValue(0ull, 8)); insn.execute(); EXPECT_TRUE(matchingPred); EXPECT_TRUE(insn.wasBranchTaken()); @@ -549,7 +549,7 @@ TEST_F(AArch64InstructionTest, incorrectPred_target) { pred = {true, 80 + 0x28}; insn.setBranchPrediction(pred); matchingPred = (insn.getBranchPrediction() == pred); - insn.supplyOperand(0, RegisterValue(0, 8)); + insn.supplyOperand(0, RegisterValue(0ull, 8)); insn.execute(); EXPECT_TRUE(matchingPred); EXPECT_TRUE(insn.wasBranchTaken()); diff --git a/test/unit/pipeline/DispatchIssueUnitTest.cc b/test/unit/pipeline/DispatchIssueUnitTest.cc index f7ecb2b9b6..c154a51b86 100644 --- a/test/unit/pipeline/DispatchIssueUnitTest.cc +++ b/test/unit/pipeline/DispatchIssueUnitTest.cc @@ -131,9 +131,9 @@ TEST_F(PipelineDispatchIssueUnitTest, singleInstr) { EXPECT_CALL(*uop, getSourceRegisters()) .WillOnce(Return(span(srcRegs))); EXPECT_CALL(*uop, isOperandReady(0)).WillOnce(Return(false)); - EXPECT_CALL(*uop, supplyOperand(0, RegisterValue(0, 8))); + EXPECT_CALL(*uop, supplyOperand(0, RegisterValue(0ull, 8))); EXPECT_CALL(*uop, isOperandReady(1)).WillOnce(Return(false)); - EXPECT_CALL(*uop, supplyOperand(1, RegisterValue(0, 8))); + EXPECT_CALL(*uop, supplyOperand(1, RegisterValue(0ull, 8))); EXPECT_CALL(*uop, getDestinationRegisters()) .WillOnce(Return(span(destRegs))); diff --git a/test/unit/pipeline/MappedRegisterFileSetTest.cc b/test/unit/pipeline/MappedRegisterFileSetTest.cc index fc63657779..ff67f5f55b 100644 --- a/test/unit/pipeline/MappedRegisterFileSetTest.cc +++ b/test/unit/pipeline/MappedRegisterFileSetTest.cc @@ -36,14 +36,14 @@ TEST_F(MappedRegisterFileSetTest, getSet) { const Register rMax = {i, maxRegTag}; std::vector physRegs; - for (int j = 2; j < 12; j++) { + for (uint64_t j = 2; j < 12; j++) { physRegs.push_back(rat.allocate(rMax)); RegisterValue regVal = RegisterValue(j, regSize); mappedRegFile.set(rMax, regVal); EXPECT_EQ(mappedRegFile.get(rMax), regVal); } - for (int k = 0; k < 10; k++) { + for (uint64_t k = 0; k < 10; k++) { // RAT constructed where Arch-Phys mapping is 1:1. So, first re-mapped // value will be to maxArchRegRag + 1 EXPECT_EQ(physRegs[k].tag, maxRegTag + k + 1); diff --git a/test/unit/riscv/ArchitectureTest.cc b/test/unit/riscv/ArchitectureTest.cc index 49e64e42b0..ffbeb427c6 100644 --- a/test/unit/riscv/ArchitectureTest.cc +++ b/test/unit/riscv/ArchitectureTest.cc @@ -151,9 +151,10 @@ TEST_F(RiscVArchitectureTest, updateSystemTimerRegisters) { Register cycleSystemReg = { RegisterType::SYSTEM, static_cast(arch->getSystemRegisterTag(RISCV_SYSREG_CYCLE))}; + regFile.set(cycleSystemReg, {0ull, 8}); uint64_t ticks = 30; - EXPECT_EQ(regFile.get(cycleSystemReg), RegisterValue(0, 8)); + EXPECT_EQ(regFile.get(cycleSystemReg), RegisterValue(0ull, 8)); arch->updateSystemTimerRegisters(®File, ticks); EXPECT_EQ(regFile.get(cycleSystemReg), RegisterValue(ticks, 8)); } diff --git a/test/unit/riscv/ExceptionHandlerTest.cc b/test/unit/riscv/ExceptionHandlerTest.cc index 3e9ac92be7..09a0b17d31 100644 --- a/test/unit/riscv/ExceptionHandlerTest.cc +++ b/test/unit/riscv/ExceptionHandlerTest.cc @@ -66,8 +66,8 @@ TEST_F(RiscVExceptionHandlerTest, testSyscall) { insn->setInstructionAddress(insnAddr); // Setup register file for `uname` syscall (chosen as minimal functionality) - archRegFileSet.set(R0, RegisterValue(1234, 8)); - archRegFileSet.set(R7, RegisterValue(160, 8)); + archRegFileSet.set(R0, RegisterValue(1234ull, 8)); + archRegFileSet.set(R7, RegisterValue(160ull, 8)); // Create ExceptionHandler ExceptionHandler handler(insn, core, memory, kernel); @@ -307,8 +307,8 @@ TEST_F(RiscVExceptionHandlerTest, readBufferThen) { uint64_t length = 192; // Initialise data to "read" from MockMemory - std::vector dataVec(length, 'q'); - std::vector dataVec2(length, 'q'); + std::vector dataVec(length, 'q'); + std::vector dataVec2(length, 'q'); // Initialise the two required targets (128-bytes per read request in // readBufferThen()) memory::MemoryAccessTarget tar1 = {ptr, 128}; diff --git a/test/unit/riscv/InstructionTest.cc b/test/unit/riscv/InstructionTest.cc index 0642b09b95..30207043df 100644 --- a/test/unit/riscv/InstructionTest.cc +++ b/test/unit/riscv/InstructionTest.cc @@ -328,7 +328,7 @@ TEST_F(RiscVInstructionTest, supplyOperand) { EXPECT_FALSE(insn.isOperandReady(1)); // Define mock register values for source registers - RegisterValue val = {0xABBACAFE, 8}; + RegisterValue val = {static_cast(0xABBACAFE), 8}; // Supply values for all source registers insn.supplyOperand(0, val); insn.supplyOperand(1, val); @@ -347,7 +347,7 @@ TEST_F(RiscVInstructionTest, supplyOperand) { insn.execute(); EXPECT_TRUE(insn.hasExecuted()); auto results = insn.getResults(); - RegisterValue refRes = {0x00000001, 8}; + RegisterValue refRes = {static_cast(0x00000001), 8}; EXPECT_EQ(results.size(), 1); EXPECT_EQ(results[0], refRes); } @@ -377,7 +377,7 @@ TEST_F(RiscVInstructionTest, supplyData) { // Supply needed operands EXPECT_FALSE(insn.isOperandReady(0)); - RegisterValue addr = {0x480, 8}; + RegisterValue addr = {0x480ull, 8}; insn.supplyOperand(0, addr); EXPECT_TRUE(insn.isOperandReady(0)); @@ -416,7 +416,7 @@ TEST_F(RiscVInstructionTest, supplyData_dataAbort) { // Supply needed operands EXPECT_FALSE(insn.isOperandReady(0)); - RegisterValue addr = {0x480, 8}; + RegisterValue addr = {0x480ull, 8}; insn.supplyOperand(0, addr); EXPECT_TRUE(insn.isOperandReady(0)); @@ -470,7 +470,7 @@ TEST_F(RiscVInstructionTest, correctPred_notTaken) { insn.setInstructionAddress(400); // Check initial state of an instruction's branch related options - BranchPrediction pred = {false, 0}; + BranchPrediction pred = {false, 0ull}; bool matchingPred = (insn.getBranchPrediction() == pred); EXPECT_TRUE(matchingPred); EXPECT_FALSE(insn.wasBranchTaken()); @@ -483,8 +483,8 @@ TEST_F(RiscVInstructionTest, correctPred_notTaken) { pred = {false, 400 + 4}; insn.setBranchPrediction(pred); matchingPred = (insn.getBranchPrediction() == pred); - insn.supplyOperand(0, RegisterValue(0, 8)); - insn.supplyOperand(1, RegisterValue(3, 8)); + insn.supplyOperand(0, RegisterValue(0ull, 8)); + insn.supplyOperand(1, RegisterValue(3ull, 8)); insn.execute(); EXPECT_TRUE(matchingPred); EXPECT_FALSE(insn.wasBranchTaken()); @@ -528,7 +528,7 @@ TEST_F(RiscVInstructionTest, incorrectPred_taken) { insn.setInstructionAddress(400); // Check initial state of an instruction's branch related options - BranchPrediction pred = {false, 0}; + BranchPrediction pred = {false, 0ull}; bool matchingPred = (insn.getBranchPrediction() == pred); EXPECT_TRUE(matchingPred); EXPECT_FALSE(insn.wasBranchTaken()); @@ -541,8 +541,8 @@ TEST_F(RiscVInstructionTest, incorrectPred_taken) { pred = {true, 400 - 86}; insn.setBranchPrediction(pred); matchingPred = (insn.getBranchPrediction() == pred); - insn.supplyOperand(0, RegisterValue(0, 8)); - insn.supplyOperand(1, RegisterValue(3, 8)); + insn.supplyOperand(0, RegisterValue(0ull, 8)); + insn.supplyOperand(1, RegisterValue(3ull, 8)); insn.execute(); EXPECT_TRUE(matchingPred); EXPECT_FALSE(insn.wasBranchTaken()); From fe1708dfae52162acd227067cd13e9b08c199435 Mon Sep 17 00:00:00 2001 From: dANW34V3R Date: Tue, 13 May 2025 16:49:28 +0100 Subject: [PATCH 10/16] Update sme feature list --- test/regression/aarch64/AArch64RegressionTest.hh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/regression/aarch64/AArch64RegressionTest.hh b/test/regression/aarch64/AArch64RegressionTest.hh index 013be94968..21e95b7c89 100644 --- a/test/regression/aarch64/AArch64RegressionTest.hh +++ b/test/regression/aarch64/AArch64RegressionTest.hh @@ -242,7 +242,7 @@ class AArch64RegressionTest : public RegressionTest { #if SIMENG_LLVM_VERSION < 14 return "+sve,+lse"; #elif SIMENG_LLVM_VERSION < 18 - return "+sve,+lse,+sve2,+sme,+sme-f64"; + return "+sve,+lse,+sve2,+sme,+sme-f64,+sme-i64"; #else return "+sve,+lse,+sve2,+sme,+sme-f64f64,+sme-i16i64,+sme2"; #endif From 9f72acede5b31d35a6ac191970d734126b118554 Mon Sep 17 00:00:00 2001 From: dANW34V3R Date: Tue, 13 May 2025 17:28:52 +0100 Subject: [PATCH 11/16] Use memcpy for non-local values --- src/include/simeng/RegisterValue.hh | 31 +++++++++-------------------- src/lib/RegisterFileSet.cc | 1 - test/unit/RegisterFileSetTest.cc | 1 - 3 files changed, 9 insertions(+), 24 deletions(-) diff --git a/src/include/simeng/RegisterValue.hh b/src/include/simeng/RegisterValue.hh index 53e2907c6b..64354cdc12 100644 --- a/src/include/simeng/RegisterValue.hh +++ b/src/include/simeng/RegisterValue.hh @@ -31,8 +31,6 @@ struct safePointer { /** Global memory pool used by RegisterValue class. */ extern Pool pool; - -// TODO the data is NOT immutable as per AArch64_LD1_MXIPXX_V_D. We should change the class to enforce immutability or concede this functionality /** A class that holds an arbitrary region of immutable data, providing * casting and data accessor functions. For values smaller than or equal to * `MAX_LOCAL_BYTES`, this data is held in a local value, otherwise memory is @@ -47,32 +45,21 @@ class RegisterValue { template , T>* = nullptr> RegisterValue(T value, uint16_t bytes = sizeof(T)) : bytes(bytes) { - if (isLocal()) { - // T* view = reinterpret_cast(this->localValue); - // view[0] = value; - - size_t numBytesToCopy = bytes; - if (bytes > sizeof(T)) { - numBytesToCopy = sizeof(T); - } + // Ensure the high bits are zeroed + size_t numBytesToCopy = bytes; + if (bytes > sizeof(T)) { + numBytesToCopy = sizeof(T); + } + if (isLocal()) { memcpy(this->localValue, &value, numBytesToCopy); - - // if (bytes > sizeof(T)) { - // // Zero the remaining bytes not set by the provided value - // std::fill(this->localValue + sizeof(T), - // this->localValue + bytes, 0); - // } } else { - void* data = pool.allocate(bytes); + uint8_t* data = static_cast(pool.allocate(bytes)); std::memset(data, 0, bytes); - - T* view = reinterpret_cast(data); - view[0] = value; + memcpy(data, &value, numBytesToCopy); this->ptr = std::shared_ptr( - static_cast(data), - [bytes](void* ptr) { pool.deallocate(ptr, bytes); }); + data, [bytes](uint8_t* ptr) { pool.deallocate(ptr, bytes); }); } } diff --git a/src/lib/RegisterFileSet.cc b/src/lib/RegisterFileSet.cc index 3546da72d1..a195af1bca 100644 --- a/src/lib/RegisterFileSet.cc +++ b/src/lib/RegisterFileSet.cc @@ -9,7 +9,6 @@ RegisterFileSet::RegisterFileSet( : registerFiles(registerFileStructures.size()) { for (size_t type = 0; type < registerFileStructures.size(); type++) { const auto& structure = registerFileStructures[type]; - // Initialisation won't always be 0 registerFiles[type] = std::vector( structure.quantity, RegisterValue(0, structure.bytes)); } diff --git a/test/unit/RegisterFileSetTest.cc b/test/unit/RegisterFileSetTest.cc index 572f565ac9..2b4b6e2782 100644 --- a/test/unit/RegisterFileSetTest.cc +++ b/test/unit/RegisterFileSetTest.cc @@ -16,7 +16,6 @@ class RegisterFileSetTest : public ::testing::Test { }; // Ensure RegisterFileSet is constructed correctly -// TODO THIS WILL FAIL AS REGS DON'T GET SET TO 0 WHEN INITIALISED TEST_F(RegisterFileSetTest, validConstruction) { for (uint8_t i = 0; i < regFileStruct.size(); i++) { for (uint16_t j = 0; j < regFileStruct[i].quantity; j++) { From eeb158f19ef3e717591b69d6c9280cec04d1835f Mon Sep 17 00:00:00 2001 From: dANW34V3R Date: Tue, 13 May 2025 17:35:45 +0100 Subject: [PATCH 12/16] Minor cleanup --- src/include/simeng/RegisterFileSet.hh | 1 - src/include/simeng/RegisterValue.hh | 5 +++-- src/lib/RegisterValue.cc | 2 -- test/regression/RegressionTest.cc | 2 +- 4 files changed, 4 insertions(+), 6 deletions(-) diff --git a/src/include/simeng/RegisterFileSet.hh b/src/include/simeng/RegisterFileSet.hh index 1232c761c3..89f768d11a 100644 --- a/src/include/simeng/RegisterFileSet.hh +++ b/src/include/simeng/RegisterFileSet.hh @@ -24,7 +24,6 @@ struct RegisterFileStructure { class RegisterFileSet { public: /** Constructs a set of register files, defined by `registerFileStructures`. - * Initialisation can't be assumed to be 0. */ RegisterFileSet(std::vector registerFileStructures); diff --git a/src/include/simeng/RegisterValue.hh b/src/include/simeng/RegisterValue.hh index 64354cdc12..8540d0e556 100644 --- a/src/include/simeng/RegisterValue.hh +++ b/src/include/simeng/RegisterValue.hh @@ -13,6 +13,9 @@ namespace simeng { inline Pool pool = Pool(); +/** Global memory pool used by RegisterValue class. */ +extern Pool pool; + template struct safePointer { // public: @@ -28,8 +31,6 @@ struct safePointer { const uint8_t* ptr; }; -/** Global memory pool used by RegisterValue class. */ -extern Pool pool; /** A class that holds an arbitrary region of immutable data, providing * casting and data accessor functions. For values smaller than or equal to diff --git a/src/lib/RegisterValue.cc b/src/lib/RegisterValue.cc index 38b068ad54..c0aab52995 100644 --- a/src/lib/RegisterValue.cc +++ b/src/lib/RegisterValue.cc @@ -4,8 +4,6 @@ namespace simeng { -// Pool pool = Pool(); - RegisterValue::RegisterValue() : bytes(0) {} RegisterValue::operator bool() const { return (bytes > 0); } diff --git a/test/regression/RegressionTest.cc b/test/regression/RegressionTest.cc index 8974d4d185..3ed570fab4 100644 --- a/test/regression/RegressionTest.cc +++ b/test/regression/RegressionTest.cc @@ -274,7 +274,7 @@ void RegressionTest::assemble(const char* source, const char* triple, options)); ASSERT_NE(asmParser, nullptr) << "Failed to create LLVM target asm parser"; asmParser->setTargetParser(*targetAsmParser); - + // Run asm parser to generate assembled object code ASSERT_FALSE(asmParser->Run(false)); From 168815874336356b1335a4f9944a0369d9490e1e Mon Sep 17 00:00:00 2001 From: dANW34V3R Date: Thu, 15 May 2025 13:47:50 +0100 Subject: [PATCH 13/16] Hide underlying pointer --- src/include/simeng/RegisterValue.hh | 36 +++-- .../simeng/arch/aarch64/helpers/bitmanip.hh | 7 +- .../simeng/arch/aarch64/helpers/neon.hh | 135 +++++++++--------- src/include/simeng/models/emulation/Core.hh | 4 + src/lib/arch/aarch64/ExceptionHandler.cc | 4 +- src/lib/arch/aarch64/Instruction_execute.cc | 72 +++++----- src/lib/arch/riscv/ExceptionHandler.cc | 5 +- src/lib/memory/FixedLatencyMemoryInterface.cc | 2 +- src/lib/memory/FlatMemoryInterface.cc | 2 +- src/lib/models/emulation/Core.cc | 9 +- src/lib/pipeline/FetchUnit.cc | 6 +- sst/SimEngMemInterface.cc | 7 +- 12 files changed, 165 insertions(+), 124 deletions(-) diff --git a/src/include/simeng/RegisterValue.hh b/src/include/simeng/RegisterValue.hh index 8540d0e556..3fffb7374a 100644 --- a/src/include/simeng/RegisterValue.hh +++ b/src/include/simeng/RegisterValue.hh @@ -18,8 +18,9 @@ extern Pool pool; template struct safePointer { - // public: - // safePointer(const char* ptr) : ptr(ptr) {} + public: + explicit safePointer(const uint8_t* ptr) : ptr(ptr) {} + explicit safePointer() : ptr(nullptr) {} T operator[](const int i) const { T output; @@ -27,10 +28,20 @@ struct safePointer { return output; } - // private: + void copyTo(void* dest, const size_t bytes) const { + memcpy(dest, ptr, bytes); + } + + void copyTo(void* dest, const size_t bytes, const uint64_t offset) { + memcpy(dest, ptr + offset, bytes); + } + + private: const uint8_t* ptr; -}; + // Give RegisterValue access to the underlying pointer + friend class RegisterValue; +}; /** A class that holds an arbitrary region of immutable data, providing * casting and data accessor functions. For values smaller than or equal to @@ -55,12 +66,12 @@ class RegisterValue { if (isLocal()) { memcpy(this->localValue, &value, numBytesToCopy); } else { - uint8_t* data = static_cast(pool.allocate(bytes)); - std::memset(data, 0, bytes); - memcpy(data, &value, numBytesToCopy); - this->ptr = std::shared_ptr( - data, [bytes](uint8_t* ptr) { pool.deallocate(ptr, bytes); }); + static_cast(pool.allocate(bytes)), + [bytes](uint8_t* ptr) { pool.deallocate(ptr, bytes); }); + + std::memset(this->ptr.get(), 0, bytes); + memcpy(this->ptr.get(), &value, numBytesToCopy); } } @@ -87,6 +98,12 @@ class RegisterValue { RegisterValue(const uint8_t* ptr, uint16_t bytes) : RegisterValue(ptr, bytes, bytes) {} + /** Create a new RegisterValue of size 'bytes', copy data from the safePointer + * 'sptr'. */ + template + RegisterValue(const safePointer sptr, uint16_t bytes) + : RegisterValue(sptr.ptr, bytes) {} + /** Create a new RegisterValue by copying bytes from a fixed-size array. The * resultant RegisterValue will have size `C` (defaulting to the no. of * bytes in the array). @@ -113,7 +130,6 @@ class RegisterValue { "data held"); if (isLocal()) { return safePointer{this->localValue}; - // return reinterpret_cast(localValue); } else { return safePointer{ptr.get()}; } diff --git a/src/include/simeng/arch/aarch64/helpers/bitmanip.hh b/src/include/simeng/arch/aarch64/helpers/bitmanip.hh index 3255df1d7b..92915274b3 100644 --- a/src/include/simeng/arch/aarch64/helpers/bitmanip.hh +++ b/src/include/simeng/arch/aarch64/helpers/bitmanip.hh @@ -68,10 +68,13 @@ uint64_t rbit(srcValContainer& sourceValues, * Returns array of uint8_t with number of elements = bytes in T. */ template std::array rev(srcValContainer& sourceValues) { - auto bytes = sourceValues[0].getAsVector(); + // auto bytes = sourceValues[0].getAsVector(); + + std::array forward; + sourceValues[0].getAsVector().copyTo(&forward, sizeof(T)); std::array reversed; // Copy `bytes` backwards onto `reversed` - std::copy(bytes.ptr, bytes.ptr + sizeof(T), std::rbegin(reversed)); + std::copy(forward.begin(), forward.begin() + sizeof(T), std::rbegin(reversed)); return reversed; } diff --git a/src/include/simeng/arch/aarch64/helpers/neon.hh b/src/include/simeng/arch/aarch64/helpers/neon.hh index 60d4f98f95..d03a22d79c 100644 --- a/src/include/simeng/arch/aarch64/helpers/neon.hh +++ b/src/include/simeng/arch/aarch64/helpers/neon.hh @@ -13,8 +13,8 @@ namespace aarch64 { * Returns correctly formatted Register Value. */ template RegisterValue vecAdd_3ops(srcValContainer& sourceValues) { - const safePointer n = sourceValues[0].getAsVector(); - const safePointer m = sourceValues[1].getAsVector(); + const auto n = sourceValues[0].getAsVector(); + const auto m = sourceValues[1].getAsVector(); T out[16 / sizeof(T)] = {0}; for (int i = 0; i < I; i++) { out[i] = static_cast(n[i] + m[i]); @@ -29,8 +29,8 @@ RegisterValue vecAdd_3ops(srcValContainer& sourceValues) { * Returns correctly formatted Register Value. */ template RegisterValue vecAddp_3ops(srcValContainer& sourceValues) { - const safePointer n = sourceValues[0].getAsVector(); - const safePointer m = sourceValues[1].getAsVector(); + const auto n = sourceValues[0].getAsVector(); + const auto m = sourceValues[1].getAsVector(); T out[16 / sizeof(T)] = {0}; uint8_t offset = I / 2; for (int i = 0; i < I; i++) { @@ -50,8 +50,8 @@ RegisterValue vecAddp_3ops(srcValContainer& sourceValues) { * Returns correctly formatted Register Value. */ template RegisterValue vecBic_3ops(srcValContainer& sourceValues) { - const safePointer n = sourceValues[0].getAsVector(); - const safePointer m = sourceValues[1].getAsVector(); + const auto n = sourceValues[0].getAsVector(); + const auto m = sourceValues[1].getAsVector(); T out[16 / sizeof(T)] = {0}; for (int i = 0; i < I; i++) { out[i] = n[i] & ~m[i]; @@ -69,7 +69,7 @@ template RegisterValue vecBicShift_imm( srcValContainer& sourceValues, const simeng::arch::aarch64::InstructionMetadata& metadata) { - const safePointer d = sourceValues[0].getAsVector(); + const auto d = sourceValues[0].getAsVector(); T imm = ~shiftValue(static_cast(metadata.operands[1].imm), metadata.operands[1].shift.type, metadata.operands[1].shift.value); @@ -124,8 +124,8 @@ RegisterValue vecBsl(srcValContainer& sourceValues) { template RegisterValue vecCompare(srcValContainer& sourceValues, bool cmpToZero, std::function func) { - const safePointer n = sourceValues[0].getAsVector(); - const safePointer m = !cmpToZero ? sourceValues[1].getAsVector() : safePointer(); + const auto n = sourceValues[0].getAsVector(); + const auto m = !cmpToZero ? sourceValues[1].getAsVector() : safePointer(); T out[16 / sizeof(T)] = {0}; for (int i = 0; i < I; i++) { out[i] = func(n[i], cmpToZero ? static_cast(0) : m[i]) @@ -181,8 +181,8 @@ template RegisterValue vecExtVecs_index( srcValContainer& sourceValues, const simeng::arch::aarch64::InstructionMetadata& metadata) { - const safePointer n = sourceValues[0].getAsVector(); - const safePointer m = sourceValues[1].getAsVector(); + const auto n = sourceValues[0].getAsVector(); + const auto m = sourceValues[1].getAsVector(); const uint64_t index = static_cast(metadata.operands[3].imm); T out[16 / sizeof(T)] = {0}; @@ -203,8 +203,8 @@ RegisterValue vecExtVecs_index( * Returns correctly formatted RegisterValue. */ template RegisterValue vecFabd(srcValContainer& sourceValues) { - const safePointer n = sourceValues[0].getAsVector(); - const safePointer m = sourceValues[1].getAsVector(); + const auto n = sourceValues[0].getAsVector(); + const auto m = sourceValues[1].getAsVector(); T out[16 / sizeof(T)] = {0}; for (int i = 0; i < I; i++) { out[i] = std::fabs(n[i] - m[i]); @@ -219,7 +219,7 @@ RegisterValue vecFabd(srcValContainer& sourceValues) { * Returns correctly formatted RegisterValue. */ template RegisterValue vecFabs_2ops(srcValContainer& sourceValues) { - const safePointer n = sourceValues[0].getAsVector(); + const auto n = sourceValues[0].getAsVector(); T out[16 / sizeof(T)] = {0}; for (int i = 0; i < I; i++) { out[i] = std::fabs(n[i]); @@ -238,8 +238,8 @@ RegisterValue vecFabs_2ops(srcValContainer& sourceValues) { template RegisterValue vecFCompare(srcValContainer& sourceValues, bool cmpToZero, std::function func) { - const safePointer n = sourceValues[0].getAsVector(); - const safePointer m = !cmpToZero ? sourceValues[1].getAsVector() : safePointer(); + const auto n = sourceValues[0].getAsVector(); + const auto m = !cmpToZero ? sourceValues[1].getAsVector() : safePointer(); C out[16 / sizeof(C)] = {0}; for (int i = 0; i < I; i++) { out[i] = func(n[i], cmpToZero ? static_cast(0) : m[i]) @@ -257,7 +257,7 @@ RegisterValue vecFCompare(srcValContainer& sourceValues, bool cmpToZero, * Returns correctly formatted RegisterValue. */ template RegisterValue vecFcvtl(srcValContainer& sourceValues, bool isFcvtl2) { - const safePointer n = sourceValues[0].getAsVector(); + const auto n = sourceValues[0].getAsVector(); D out[16 / sizeof(D)] = {0}; for (int i = (isFcvtl2 ? I : 0); i < (isFcvtl2 ? (I * 2) : I); i++) { out[isFcvtl2 ? (i - I) : i] = static_cast(n[i]); @@ -273,7 +273,7 @@ RegisterValue vecFcvtl(srcValContainer& sourceValues, bool isFcvtl2) { * Returns correctly formatted RegisterValue. */ template RegisterValue vecFcvtn(srcValContainer& sourceValues, bool isFcvtn2) { - const safePointer n = sourceValues[0].getAsVector(); + const auto n = sourceValues[0].getAsVector(); D out[16 / sizeof(D)] = {0}; for (int i = (isFcvtn2 ? (I / 2) : 0); i < I; i++) { out[i] = static_cast(n[isFcvtn2 ? (i - (I / 2)) : i]); @@ -289,7 +289,7 @@ RegisterValue vecFcvtn(srcValContainer& sourceValues, bool isFcvtn2) { * Returns correctly formatted RegisterValue. */ template RegisterValue vecFcvtzs(srcValContainer& sourceValues) { - const safePointer n = sourceValues[0].getAsVector(); + const auto n = sourceValues[0].getAsVector(); D out[16 / sizeof(D)] = {0}; // TODO: Handle NaNs, denorms, and saturation for (int i = 0; i < I; i++) { @@ -306,9 +306,9 @@ RegisterValue vecFcvtzs(srcValContainer& sourceValues) { * Returns correctly formatted RegisterValue. */ template RegisterValue vecFmla_3vecs(srcValContainer& sourceValues) { - const safePointer d = sourceValues[0].getAsVector(); - const safePointer n = sourceValues[1].getAsVector(); - const safePointer m = sourceValues[2].getAsVector(); + const auto d = sourceValues[0].getAsVector(); + const auto n = sourceValues[1].getAsVector(); + const auto m = sourceValues[2].getAsVector(); T out[16 / sizeof(T)] = {0}; for (int i = 0; i < I; i++) { out[i] = d[i] + n[i] * m[i]; @@ -324,8 +324,8 @@ RegisterValue vecFmla_3vecs(srcValContainer& sourceValues) { template std::enable_if_t, RegisterValue> vecFDiv( srcValContainer& sourceValues) { - const safePointer n = sourceValues[0].getAsVector(); - const safePointer m = sourceValues[1].getAsVector(); + const auto n = sourceValues[0].getAsVector(); + const auto m = sourceValues[1].getAsVector(); T out[16 / sizeof(T)] = {0}; for (int i = 0; i < I; i++) { if (m[i] == 0) @@ -346,8 +346,8 @@ template RegisterValue vecFmlaIndexed_3vecs( srcValContainer& sourceValues, const simeng::arch::aarch64::InstructionMetadata& metadata) { - const safePointer d = sourceValues[0].getAsVector(); - const safePointer n = sourceValues[1].getAsVector(); + const auto d = sourceValues[0].getAsVector(); + const auto n = sourceValues[1].getAsVector(); int index = metadata.operands[2].vector_index; const T m = sourceValues[2].getAsVector()[index]; T out[16 / sizeof(T)] = {0}; @@ -365,9 +365,9 @@ RegisterValue vecFmlaIndexed_3vecs( * Returns correctly formatted RegisterValue. */ template RegisterValue vecFmls_3vecs(srcValContainer& sourceValues) { - const safePointer d = sourceValues[0].getAsVector(); - const safePointer n = sourceValues[1].getAsVector(); - const safePointer m = sourceValues[2].getAsVector(); + const auto d = sourceValues[0].getAsVector(); + const auto n = sourceValues[1].getAsVector(); + const auto m = sourceValues[2].getAsVector(); T out[16 / sizeof(T)] = {0}; for (int i = 0; i < I; i++) { out[i] = d[i] - (n[i] * m[i]); @@ -385,8 +385,8 @@ template RegisterValue vecFmlsIndexed_3vecs( srcValContainer& sourceValues, const simeng::arch::aarch64::InstructionMetadata& metadata) { - const safePointer d = sourceValues[0].getAsVector(); - const safePointer n = sourceValues[1].getAsVector(); + const auto d = sourceValues[0].getAsVector(); + const auto n = sourceValues[1].getAsVector(); int index = metadata.operands[2].vector_index; const T m = sourceValues[2].getAsVector()[index]; T out[16 / sizeof(T)] = {0}; @@ -407,7 +407,7 @@ RegisterValue vecFmulIndexed_vecs( srcValContainer& sourceValues, const simeng::arch::aarch64::InstructionMetadata& metadata) { int index = metadata.operands[2].vector_index; - const safePointer n = sourceValues[0].getAsVector(); + const auto n = sourceValues[0].getAsVector(); const T m = sourceValues[1].getAsVector()[index]; T out[16 / sizeof(T)] = {0}; for (int i = 0; i < I; i++) { @@ -423,7 +423,7 @@ RegisterValue vecFmulIndexed_vecs( * Returns correctly formatted RegisterValue. */ template RegisterValue vecFneg_2ops(srcValContainer& sourceValues) { - const safePointer n = sourceValues[0].getAsVector(); + const auto n = sourceValues[0].getAsVector(); T out[16 / sizeof(T)] = {0}; for (int i = 0; i < I; i++) { out[i] = -n[i]; @@ -438,7 +438,7 @@ RegisterValue vecFneg_2ops(srcValContainer& sourceValues) { * Returns correctly formatted RegisterValue. */ template RegisterValue vecFsqrt_2ops(srcValContainer& sourceValues) { - const safePointer n = sourceValues[0].getAsVector(); + const auto n = sourceValues[0].getAsVector(); T out[16 / sizeof(T)] = {0}; for (int i = 0; i < I; i++) { out[i] = ::sqrt(n[i]); @@ -453,7 +453,7 @@ RegisterValue vecFsqrt_2ops(srcValContainer& sourceValues) { * Returns correctly formatted RegisterValue. */ template RegisterValue vecFrsqrte_2ops(srcValContainer& sourceValues) { - const safePointer n = sourceValues[0].getAsVector(); + const auto n = sourceValues[0].getAsVector(); T out[16 / sizeof(T)] = {0}; for (int i = 0; i < I; i++) { out[i] = 1.0f / sqrtf(n[i]); @@ -469,8 +469,8 @@ RegisterValue vecFrsqrte_2ops(srcValContainer& sourceValues) { * Returns correctly formatted RegisterValue. */ template RegisterValue vecFrsqrts_3ops(srcValContainer& sourceValues) { - const safePointer n = sourceValues[0].getAsVector(); - const safePointer m = sourceValues[1].getAsVector(); + const auto n = sourceValues[0].getAsVector(); + const auto m = sourceValues[1].getAsVector(); T out[16 / sizeof(T)] = {0}; for (int i = 0; i < I; i++) { out[i] = (3.0f - n[i] * m[i]) / 2.0f; @@ -488,8 +488,8 @@ template RegisterValue vecIns_2Index( srcValContainer& sourceValues, const simeng::arch::aarch64::InstructionMetadata& metadata) { - const safePointer d = sourceValues[0].getAsVector(); - const safePointer n = sourceValues[1].getAsVector(); + const auto d = sourceValues[0].getAsVector(); + const auto n = sourceValues[1].getAsVector(); T out[16 / sizeof(T)] = {0}; for (int i = 0; i < I; i++) { @@ -510,7 +510,7 @@ template RegisterValue vecInsIndex_gpr( srcValContainer& sourceValues, const simeng::arch::aarch64::InstructionMetadata& metadata) { - const safePointer d = sourceValues[0].getAsVector(); + const auto d = sourceValues[0].getAsVector(); const T n = sourceValues[1].get(); T out[16 / sizeof(T)] = {0}; @@ -530,7 +530,7 @@ RegisterValue vecInsIndex_gpr( template RegisterValue vecLogicOp_2vecs(srcValContainer& sourceValues, std::function func) { - const safePointer n = sourceValues[0].getAsVector(); + const auto n = sourceValues[0].getAsVector(); T out[16 / sizeof(T)] = {0}; for (int i = 0; i < I; i++) { out[i] = func(n[i]); @@ -547,8 +547,8 @@ RegisterValue vecLogicOp_2vecs(srcValContainer& sourceValues, template RegisterValue vecLogicOp_3vecs(srcValContainer& sourceValues, std::function func) { - const safePointer n = sourceValues[0].getAsVector(); - const safePointer m = sourceValues[1].getAsVector(); + const auto n = sourceValues[0].getAsVector(); + const auto m = sourceValues[1].getAsVector(); T out[16 / sizeof(T)] = {0}; for (int i = 0; i < I; i++) { out[i] = func(n[i], m[i]); @@ -563,13 +563,15 @@ RegisterValue vecLogicOp_3vecs(srcValContainer& sourceValues, * Returns correctly formatted RegisterValue. */ template RegisterValue vecUMaxP(srcValContainer& sourceValues) { - const safePointer n = sourceValues[0].getAsVector(); - const safePointer m = sourceValues[1].getAsVector(); + const auto n = sourceValues[0].getAsVector(); + const auto m = sourceValues[1].getAsVector(); // Concatenate the vectors T temp[2 * I]; - memcpy(temp, n.ptr, sizeof(T) * I); - memcpy(temp + (sizeof(T) * I), m.ptr, sizeof(T) * I); + n.copyTo(temp, sizeof(T) * I); + // memcpy(temp, n.ptr, sizeof(T) * I); + m.copyTo(temp + (sizeof(T) * I),sizeof(T) * I); + // memcpy(temp + (sizeof(T) * I), m.ptr, sizeof(T) * I); // Compare each adjacent pair of elements T out[I]; for (int i = 0; i < I; i++) { @@ -585,13 +587,16 @@ RegisterValue vecUMaxP(srcValContainer& sourceValues) { * Returns correctly formatted RegisterValue. */ template RegisterValue vecUMinP(srcValContainer& sourceValues) { - const safePointer n = sourceValues[0].getAsVector(); - const safePointer m = sourceValues[1].getAsVector(); + const auto n = sourceValues[0].getAsVector(); + const auto m = sourceValues[1].getAsVector(); // Concatenate the vectors T temp[2 * I]; - memcpy(temp, n.ptr, sizeof(T) * I); - memcpy(temp + (sizeof(T) * I), m.ptr, sizeof(T) * I); + n.copyTo(temp, sizeof(T) * I); + m.copyTo(temp + (sizeof(T) * I), sizeof(T) * I); + + // memcpy(temp, n.ptr, sizeof(T) * I); + // memcpy(temp + (sizeof(T) * I), m.ptr, sizeof(T) * I); T out[I]; for (int i = 0; i < I; i++) { @@ -607,7 +612,7 @@ RegisterValue vecUMinP(srcValContainer& sourceValues) { * Returns correctly formatted RegisterValue. */ template RegisterValue vecMaxnmp_2ops(srcValContainer& sourceValues) { - const safePointer n = sourceValues[0].getAsVector(); + const auto n = sourceValues[0].getAsVector(); bool isFP = std::is_floating_point::value; T out = n[0]; @@ -624,7 +629,7 @@ RegisterValue vecMaxnmp_2ops(srcValContainer& sourceValues) { * Returns correctly formatted RegisterValue. */ template RegisterValue vecMinv_2ops(srcValContainer& sourceValues) { - const safePointer n = sourceValues[0].getAsVector(); + const auto n = sourceValues[0].getAsVector(); bool isFP = std::is_floating_point::value; T out = n[0]; @@ -695,7 +700,7 @@ template RegisterValue vecShlShift_vecImm( srcValContainer& sourceValues, const simeng::arch::aarch64::InstructionMetadata& metadata) { - const safePointer n = sourceValues[0].getAsVector(); + const auto n = sourceValues[0].getAsVector(); int64_t shift = metadata.operands[2].imm; T out[16 / sizeof(T)] = {0}; for (int i = 0; i < I; i++) { @@ -760,7 +765,7 @@ template RegisterValue vecSshrShift_imm( srcValContainer& sourceValues, const simeng::arch::aarch64::InstructionMetadata& metadata) { - const safePointer n = sourceValues[0].getAsVector(); + const auto n = sourceValues[0].getAsVector(); uint64_t shift = metadata.operands[2].imm; T out[16 / sizeof(T)] = {0}; for (int i = 0; i < I; i++) { @@ -776,7 +781,7 @@ RegisterValue vecSshrShift_imm( * Returns correctly formatted RegisterValue. */ template RegisterValue vecSumElems_2ops(srcValContainer& sourceValues) { - const safePointer n = sourceValues[0].getAsVector(); + const auto n = sourceValues[0].getAsVector(); T out = 0; for (int i = 0; i < I; i++) { out += n[i]; @@ -870,7 +875,7 @@ RegisterValue vecTbl( * Returns correctly formatted RegisterValue. */ template RegisterValue vecRev(srcValContainer& sourceValues) { - const safePointer source = sourceValues[0].getAsVector(); + const auto source = sourceValues[0].getAsVector(); int element_size = (sizeof(T) * 8); int datasize = I * element_size; int container_size = V; @@ -898,8 +903,8 @@ RegisterValue vecRev(srcValContainer& sourceValues) { * Returns formatted Register Value. */ template RegisterValue vecTrn1(srcValContainer& sourceValues) { - const safePointer n = sourceValues[0].getAsVector(); - const safePointer m = sourceValues[1].getAsVector(); + const auto n = sourceValues[0].getAsVector(); + const auto m = sourceValues[1].getAsVector(); T out[16 / sizeof(T)] = {0}; for (int i = 0; i < I / 2; i++) { @@ -917,8 +922,8 @@ RegisterValue vecTrn1(srcValContainer& sourceValues) { * Returns formatted Register Value. */ template RegisterValue vecTrn2(srcValContainer& sourceValues) { - const safePointer n = sourceValues[0].getAsVector(); - const safePointer m = sourceValues[1].getAsVector(); + const auto n = sourceValues[0].getAsVector(); + const auto m = sourceValues[1].getAsVector(); T out[16 / sizeof(T)] = {0}; for (int i = 0; i < I / 2; i++) { @@ -936,8 +941,8 @@ RegisterValue vecTrn2(srcValContainer& sourceValues) { * Returns formatted Register Value. */ template RegisterValue vecUzp(srcValContainer& sourceValues, bool isUzp1) { - const safePointer n = sourceValues[0].getAsVector(); - const safePointer m = sourceValues[1].getAsVector(); + const auto n = sourceValues[0].getAsVector(); + const auto m = sourceValues[1].getAsVector(); T out[16 / sizeof(T)] = {0}; for (int i = 0; i < I / 2; i++) { @@ -957,8 +962,8 @@ RegisterValue vecUzp(srcValContainer& sourceValues, bool isUzp1) { * Returns formatted Register Value. */ template RegisterValue vecZip(srcValContainer& sourceValues, bool isZip2) { - const safePointer n = sourceValues[0].getAsVector(); - const safePointer m = sourceValues[1].getAsVector(); + const auto n = sourceValues[0].getAsVector(); + const auto m = sourceValues[1].getAsVector(); T out[16 / sizeof(T)] = {0}; int index = isZip2 ? (I / 2) : 0; diff --git a/src/include/simeng/models/emulation/Core.hh b/src/include/simeng/models/emulation/Core.hh index f1e38d7022..80eef34009 100644 --- a/src/include/simeng/models/emulation/Core.hh +++ b/src/include/simeng/models/emulation/Core.hh @@ -23,6 +23,8 @@ class Core : public simeng::Core { memory::MemoryInterface& dataMemory, uint64_t entryPoint, uint64_t programByteLength, const arch::Architecture& isa); + ~Core(); + /** Tick the core. */ void tick() override; @@ -73,6 +75,8 @@ class Core : public simeng::Core { /** The number of branches executed. */ uint64_t branchesExecuted_ = 0; + + uint8_t* fetchBuffer_; }; } // namespace emulation diff --git a/src/lib/arch/aarch64/ExceptionHandler.cc b/src/lib/arch/aarch64/ExceptionHandler.cc index 526a65138f..936b000b32 100644 --- a/src/lib/arch/aarch64/ExceptionHandler.cc +++ b/src/lib/arch/aarch64/ExceptionHandler.cc @@ -858,8 +858,10 @@ bool ExceptionHandler::readBufferThen(uint64_t ptr, uint64_t length, // Append data to buffer assert(response->data && "unhandled failed read in exception handler"); uint8_t bytesRead = response->target.size; - const uint8_t* data = response->data.getAsVector().ptr; + uint8_t* data = (uint8_t*)malloc(bytesRead); + response->data.getAsVector().copyTo(data, bytesRead); dataBuffer_.insert(dataBuffer_.end(), data, data + bytesRead); + free(data); memory_.clearCompletedReads(); // If there is more data, rerun this function for next chunk diff --git a/src/lib/arch/aarch64/Instruction_execute.cc b/src/lib/arch/aarch64/Instruction_execute.cc index 39733a4758..9dc5628193 100644 --- a/src/lib/arch/aarch64/Instruction_execute.cc +++ b/src/lib/arch/aarch64/Instruction_execute.cc @@ -122,7 +122,7 @@ void Instruction::execute() { for (uint16_t row = 0; row < rowCount; row++) { const auto zaRow = sourceValues_[row].getAsVector(); uint64_t out[32] = {0}; - std::memcpy(out, zaRow.ptr, rowCount * sizeof(uint64_t)); + zaRow.copyTo(out, rowCount * sizeof(uint64_t)); // Slice element is active IFF all of the following conditions hold: // - Element in 1st source pred corresponding to horizontal // slice is TRUE @@ -154,7 +154,7 @@ void Instruction::execute() { for (uint16_t row = 0; row < rowCount; row++) { const auto zaRow = sourceValues_[row].getAsVector(); uint32_t out[64] = {0}; - std::memcpy(out, zaRow.ptr, rowCount * sizeof(uint32_t)); + zaRow.copyTo(out, rowCount * sizeof(uint32_t)); // Slice element is active IFF all of the following conditions hold: // - Element in 1st source pred corresponding to horizontal // slice is TRUE @@ -186,7 +186,7 @@ void Instruction::execute() { for (uint16_t row = 0; row < rowCount; row++) { const auto zaRow = sourceValues_[row].getAsVector(); uint64_t out[32] = {0}; - std::memcpy(out, zaRow.ptr, rowCount * sizeof(uint64_t)); + zaRow.copyTo(out, rowCount * sizeof(uint64_t)); // Slice element is active IFF all of the following conditions hold: // - Corresponding element in 1st source pred is TRUE // - Element in 2nd source pred corresponding to vertical @@ -221,7 +221,7 @@ void Instruction::execute() { for (uint16_t row = 0; row < rowCount; row++) { const auto zaRow = sourceValues_[row].getAsVector(); uint32_t out[64] = {0}; - std::memcpy(out, zaRow.ptr, rowCount * sizeof(uint32_t)); + zaRow.copyTo(out, rowCount * sizeof(uint32_t)); // Slice element is active IFF all of the following conditions hold: // - Corresponding element in 1st source pred is TRUE // - Element in 2nd source pred corresponding to vertical @@ -3168,7 +3168,7 @@ void Instruction::execute() { for (uint16_t i = 0; i < rowCount; i++) { const auto row = sourceValues_[i].getAsVector(); uint8_t out[256] = {0}; - memcpy(out, row.ptr, rowCount * sizeof(uint8_t)); + row.copyTo(out, rowCount * sizeof(uint8_t)); uint64_t shifted_active = 1ull << (i % 64); if (pg[i / 64] & shifted_active) out[sliceNum] = zn[i]; results_[i] = {out, 256}; @@ -3194,7 +3194,7 @@ void Instruction::execute() { for (uint16_t i = 0; i < rowCount; i++) { const auto row = sourceValues_[i].getAsVector(); uint64_t out[32] = {0}; - memcpy(out, row.ptr, rowCount * sizeof(uint64_t)); + row.copyTo(out, rowCount * sizeof(uint64_t)); uint64_t shifted_active = 1ull << ((i % 8) * 8); if (pg[i / 8] & shifted_active) out[sliceNum] = zn[i]; results_[i] = {out, 256}; @@ -3220,7 +3220,7 @@ void Instruction::execute() { for (uint16_t i = 0; i < rowCount; i++) { const auto row = sourceValues_[i].getAsVector(); uint16_t out[128] = {0}; - memcpy(out, row.ptr, rowCount * sizeof(uint16_t)); + row.copyTo(out, rowCount * sizeof(uint16_t)); uint64_t shifted_active = 1ull << ((i % 32) * 2); if (pg[i / 32] & shifted_active) out[sliceNum] = zn[i]; results_[i] = {out, 256}; @@ -3245,7 +3245,7 @@ void Instruction::execute() { const auto row = sourceValues_[i].getAsVector(); uint64_t out[32] = {0}; // *2 in memcpy as need 128-bit elements but using uint64_t - memcpy(out, row.ptr, rowCount * sizeof(uint64_t) * 2); + row.copyTo(out, rowCount * sizeof(uint64_t) * 2); // For 128-bit there are 16-bit for each active element uint64_t shifted_active = 1ull << ((i % 4) * 16); if (pg[i / 4] & shifted_active) { @@ -3276,7 +3276,7 @@ void Instruction::execute() { for (uint16_t i = 0; i < rowCount; i++) { const auto row = sourceValues_[i].getAsVector(); uint32_t out[64] = {0}; - memcpy(out, row.ptr, rowCount * sizeof(uint32_t)); + row.copyTo(out, rowCount * sizeof(uint32_t)); uint64_t shifted_active = 1ull << ((i % 16) * 4); if (pg[i / 16] & shifted_active) out[sliceNum] = zn[i]; results_[i] = {out, 256}; @@ -3499,7 +3499,7 @@ void Instruction::execute() { for (int i = 0; i < partition_num; i++) { const auto row = sourceValues_[i].getAsVector(); uint8_t out[256] = {0}; - memcpy(out, row.ptr, partition_num * sizeof(uint8_t)); + row.copyTo(out, partition_num * sizeof(uint8_t)); uint64_t shifted_active = 1ull << (i % 64); if (pg[i / 64] & shifted_active) { out[sliceNum] = data[i]; @@ -3526,7 +3526,7 @@ void Instruction::execute() { for (int i = 0; i < partition_num; i++) { const auto row = sourceValues_[i].getAsVector(); uint64_t out[32] = {0}; - memcpy(out, row.ptr, partition_num * sizeof(uint64_t)); + row.copyTo(out, partition_num * sizeof(uint64_t)); uint64_t shifted_active = 1ull << ((i % 8) * 8); if (pg[i / 8] & shifted_active) { out[sliceNum] = data[i]; @@ -3553,7 +3553,7 @@ void Instruction::execute() { for (int i = 0; i < partition_num; i++) { const auto row = sourceValues_[i].getAsVector(); uint16_t out[128] = {0}; - memcpy(out, row.ptr, partition_num * sizeof(uint16_t)); + row.copyTo(out, partition_num * sizeof(uint16_t)); uint64_t shifted_active = 1ull << ((i % 32) * 2); if (pg[i / 32] & shifted_active) { out[sliceNum] = data[i]; @@ -3582,7 +3582,7 @@ void Instruction::execute() { const auto row = sourceValues_[i].getAsVector(); uint64_t out[32] = {0}; // *2 in memcpy as need 128-bit but using uint64_t - memcpy(out, row.ptr, partition_num * sizeof(uint64_t) * 2); + row.copyTo(out, partition_num * sizeof(uint64_t) * 2); // For 128-bit there are 16-bit for each active element uint64_t shifted_active = 1ull << ((i % 4) * 16); if (pg[i / 4] & shifted_active) { @@ -3612,7 +3612,7 @@ void Instruction::execute() { for (int i = 0; i < partition_num; i++) { const auto row = sourceValues_[i].getAsVector(); uint32_t out[64] = {0}; - memcpy(out, row.ptr, partition_num * sizeof(uint32_t)); + row.copyTo(out, partition_num * sizeof(uint32_t)); uint64_t shifted_active = 1ull << ((i % 16) * 4); if (pg[i / 16] & shifted_active) { out[sliceNum] = data[i]; @@ -5926,7 +5926,7 @@ void Instruction::execute() { // STORE for (int i = 0; i < 4; i++) { memoryData_[i] = - RegisterValue(sourceValues_[i].getAsVector().ptr, + RegisterValue(sourceValues_[i].getAsVector(), 16 * sizeof(uint8_t)); } break; @@ -5937,7 +5937,7 @@ void Instruction::execute() { // STORE for (int i = 0; i < 4; i++) { memoryData_[i] = - RegisterValue(sourceValues_[i].getAsVector().ptr, + RegisterValue(sourceValues_[i].getAsVector(), 16 * sizeof(uint8_t)); } // if #imm post-index, value can only be 64 @@ -5953,7 +5953,7 @@ void Instruction::execute() { // STORE for (int i = 0; i < 4; i++) { memoryData_[i] = RegisterValue( - sourceValues_[i].getAsVector().ptr, + sourceValues_[i].getAsVector(), 2 * sizeof(uint64_t)); } break; @@ -5963,7 +5963,7 @@ void Instruction::execute() { // STORE for (int i = 0; i < 4; i++) { memoryData_[i] = - RegisterValue(sourceValues_[i].getAsVector().ptr, + RegisterValue(sourceValues_[i].getAsVector(), 2 * sizeof(uint64_t)); } // if #imm post-index, value can only be 64 @@ -5979,7 +5979,7 @@ void Instruction::execute() { // STORE for (int i = 0; i < 4; i++) { memoryData_[i] = - RegisterValue(sourceValues_[i].getAsVector().ptr, + RegisterValue(sourceValues_[i].getAsVector(), 2 * sizeof(uint32_t)); } // if #imm post-index, value can only be 32 @@ -5995,7 +5995,7 @@ void Instruction::execute() { // STORE for (int i = 0; i < 4; i++) { memoryData_[i] = - RegisterValue(sourceValues_[i].getAsVector().ptr, + RegisterValue(sourceValues_[i].getAsVector(), 4 * sizeof(uint32_t)); } break; @@ -6005,7 +6005,7 @@ void Instruction::execute() { // STORE for (int i = 0; i < 4; i++) { memoryData_[i] = - RegisterValue(sourceValues_[i].getAsVector().ptr, + RegisterValue(sourceValues_[i].getAsVector(), 4 * sizeof(uint32_t)); } // if #imm post-index, value can only be 64 @@ -6020,8 +6020,8 @@ void Instruction::execute() { // STORE const auto t = sourceValues_[0].getAsVector(); const auto t2 = sourceValues_[1].getAsVector(); - memoryData_[0] = RegisterValue(t.ptr, 16 * sizeof(uint8_t)); - memoryData_[1] = RegisterValue(t2.ptr, 16 * sizeof(uint8_t)); + memoryData_[0] = RegisterValue(t, 16 * sizeof(uint8_t)); + memoryData_[1] = RegisterValue(t2, 16 * sizeof(uint8_t)); break; } case Opcode::AArch64_ST1Twov16b_POST: { // st1 {vt.16b, vt2.16b}, @@ -6029,8 +6029,8 @@ void Instruction::execute() { // STORE const auto t = sourceValues_[0].getAsVector(); const auto t2 = sourceValues_[1].getAsVector(); - memoryData_[0] = RegisterValue(t.ptr, 16 * sizeof(uint8_t)); - memoryData_[1] = RegisterValue(t2.ptr, 16 * sizeof(uint8_t)); + memoryData_[0] = RegisterValue(t, 16 * sizeof(uint8_t)); + memoryData_[1] = RegisterValue(t2, 16 * sizeof(uint8_t)); // if #imm post-index, value can only be 32 const uint64_t postIndex = @@ -6044,8 +6044,8 @@ void Instruction::execute() { // STORE const auto t = sourceValues_[0].getAsVector(); const auto t2 = sourceValues_[1].getAsVector(); - memoryData_[0] = RegisterValue(t.ptr, 2 * sizeof(uint64_t)); - memoryData_[1] = RegisterValue(t2.ptr, 2 * sizeof(uint64_t)); + memoryData_[0] = RegisterValue(t, 2 * sizeof(uint64_t)); + memoryData_[1] = RegisterValue(t2, 2 * sizeof(uint64_t)); break; } case Opcode::AArch64_ST1Twov2d_POST: { // st1 {vt.2d, vt2.2d}, @@ -6053,8 +6053,8 @@ void Instruction::execute() { // STORE const auto t = sourceValues_[0].getAsVector(); const auto t2 = sourceValues_[1].getAsVector(); - memoryData_[0] = RegisterValue(t.ptr, 2 * sizeof(uint64_t)); - memoryData_[1] = RegisterValue(t2.ptr, 2 * sizeof(uint64_t)); + memoryData_[0] = RegisterValue(t, 2 * sizeof(uint64_t)); + memoryData_[1] = RegisterValue(t2, 2 * sizeof(uint64_t)); // if #imm post-index, value can only be 32 const uint64_t postIndex = @@ -6068,8 +6068,8 @@ void Instruction::execute() { // STORE const auto t = sourceValues_[0].getAsVector(); const auto t2 = sourceValues_[1].getAsVector(); - memoryData_[0] = RegisterValue(t.ptr, 4 * sizeof(uint32_t)); - memoryData_[1] = RegisterValue(t2.ptr, 4 * sizeof(uint32_t)); + memoryData_[0] = RegisterValue(t, 4 * sizeof(uint32_t)); + memoryData_[1] = RegisterValue(t2, 4 * sizeof(uint32_t)); break; } case Opcode::AArch64_ST1Twov4s_POST: { // st1 {vt.4s, vt2.4s}, @@ -6077,8 +6077,8 @@ void Instruction::execute() { // STORE const auto t = sourceValues_[0].getAsVector(); const auto t2 = sourceValues_[1].getAsVector(); - memoryData_[0] = RegisterValue(t.ptr, 4 * sizeof(uint32_t)); - memoryData_[1] = RegisterValue(t2.ptr, 4 * sizeof(uint32_t)); + memoryData_[0] = RegisterValue(t, 4 * sizeof(uint32_t)); + memoryData_[1] = RegisterValue(t2, 4 * sizeof(uint32_t)); // if #imm post-index, value can only be 32 const uint64_t postIndex = @@ -6433,7 +6433,7 @@ void Instruction::execute() { const uint64_t PL_bits = VL_bits / 8; const uint16_t partition_num = PL_bits / 8; const auto p = sourceValues_[0].getAsVector(); - memoryData_[0] = RegisterValue(p.ptr, partition_num); + memoryData_[0] = RegisterValue(p, partition_num); break; } case Opcode::AArch64_STR_ZA: { // str za[wv, #imm], [xn|sp{, #imm, mul @@ -6448,14 +6448,14 @@ void Instruction::execute() { const auto zaRow = sourceValues_[(wv + imm) % zaRowCount].getAsVector(); - memoryData_[0] = RegisterValue(zaRow.ptr, zaRowCount); + memoryData_[0] = RegisterValue(zaRow, zaRowCount); break; } case Opcode::AArch64_STR_ZXI: { // str zt, [xn{, #imm, mul vl}] // STORE const uint16_t partition_num = VL_bits / 8; const auto z = sourceValues_[0].getAsVector(); - memoryData_[0] = RegisterValue(z.ptr, partition_num); + memoryData_[0] = RegisterValue(z, partition_num); break; } case Opcode::AArch64_STURBBi: { // sturb wd, [xn, #imm] diff --git a/src/lib/arch/riscv/ExceptionHandler.cc b/src/lib/arch/riscv/ExceptionHandler.cc index 782ddf6cf2..839912c54c 100644 --- a/src/lib/arch/riscv/ExceptionHandler.cc +++ b/src/lib/arch/riscv/ExceptionHandler.cc @@ -838,8 +838,11 @@ bool ExceptionHandler::readBufferThen(uint64_t ptr, uint64_t length, // Append data to buffer assert(response->data && "unhandled failed read in exception handler"); uint8_t bytesRead = response->target.size; - const uint8_t* data = response->data.getAsVector().ptr; + // TODO clean up malloc + uint8_t* data = (uint8_t*)malloc(bytesRead); + response->data.getAsVector().copyTo(data, bytesRead); dataBuffer_.insert(dataBuffer_.end(), data, data + bytesRead); + free(data); memory_.clearCompletedReads(); // If there is more data, rerun this function for next chunk diff --git a/src/lib/memory/FixedLatencyMemoryInterface.cc b/src/lib/memory/FixedLatencyMemoryInterface.cc index 9d73940da9..55c989a3b8 100644 --- a/src/lib/memory/FixedLatencyMemoryInterface.cc +++ b/src/lib/memory/FixedLatencyMemoryInterface.cc @@ -35,7 +35,7 @@ void FixedLatencyMemoryInterface::tick() { auto ptr = memory_ + target.address; // Copy the data from the RegisterValue to memory - memcpy(ptr, request.data.getAsVector().ptr, target.size); + request.data.getAsVector().copyTo(ptr, target.size); } else { // Read: read data into `completedReads` if (target.address + target.size > size_ || diff --git a/src/lib/memory/FlatMemoryInterface.cc b/src/lib/memory/FlatMemoryInterface.cc index b9524f54df..2241a51dfe 100644 --- a/src/lib/memory/FlatMemoryInterface.cc +++ b/src/lib/memory/FlatMemoryInterface.cc @@ -35,7 +35,7 @@ void FlatMemoryInterface::requestWrite(const MemoryAccessTarget& target, auto ptr = memory_ + target.address; // Copy the data from the RegisterValue to memory - memcpy(ptr, data.getAsVector().ptr, target.size); + data.getAsVector().copyTo(ptr, target.size); } const span FlatMemoryInterface::getCompletedReads() const { diff --git a/src/lib/models/emulation/Core.cc b/src/lib/models/emulation/Core.cc index a2a27663c7..96730a9aca 100644 --- a/src/lib/models/emulation/Core.cc +++ b/src/lib/models/emulation/Core.cc @@ -34,8 +34,12 @@ Core::Core(memory::MemoryInterface& instructionMemory, // Query and apply initial state auto state = isa.getInitialState(); applyStateChange(state); + + fetchBuffer_ = new uint8_t[FETCH_SIZE]; } +Core::~Core() { delete[] fetchBuffer_; } + void Core::tick() { if (hasHalted_) return; @@ -53,9 +57,10 @@ void Core::tick() { // We only fetch one instruction at a time, so only ever one result in // complete reads const auto& instructionBytes = instructionMemory_.getCompletedReads()[0].data; + instructionBytes.getAsVector().copyTo(fetchBuffer_, FETCH_SIZE); + // Predecode fetched data - auto bytesRead = isa_.predecode(instructionBytes.getAsVector().ptr, - FETCH_SIZE, pc_, macroOp_); + auto bytesRead = isa_.predecode(fetchBuffer_, FETCH_SIZE, pc_, macroOp_); // Clear the fetched data instructionMemory_.clearCompletedReads(); diff --git a/src/lib/pipeline/FetchUnit.cc b/src/lib/pipeline/FetchUnit.cc index 8294d6f5e2..9ac6ace077 100644 --- a/src/lib/pipeline/FetchUnit.cc +++ b/src/lib/pipeline/FetchUnit.cc @@ -112,12 +112,10 @@ void FetchUnit::tick() { // Data has been successfully read, move into fetch buffer // TODO: Handle memory faults assert(fetched[fetchIndex].data && "Memory read failed"); - const uint8_t* fetchData = - fetched[fetchIndex].data.getAsVector().ptr; // Copy fetched data to fetch buffer after existing data - std::memcpy(fetchBuffer_ + bufferedBytes_, fetchData + bufferOffset, - blockSize_ - bufferOffset); + fetched[fetchIndex].data.getAsVector().copyTo( + fetchBuffer_ + bufferedBytes_, blockSize_ - bufferOffset, bufferOffset); bufferedBytes_ += blockSize_ - bufferOffset; buffer = fetchBuffer_; diff --git a/sst/SimEngMemInterface.cc b/sst/SimEngMemInterface.cc index 99ecc58b47..7e9c210dbc 100644 --- a/sst/SimEngMemInterface.cc +++ b/sst/SimEngMemInterface.cc @@ -76,6 +76,11 @@ std::vector SimEngMemInterface::makeSSTRequests( std::vector SimEngMemInterface::splitAggregatedRequest( AggregateWriteRequest* aggrReq, uint64_t addrStart, uint64_t size) { std::vector requests; + + // TODO avoid malloc and copy altogether + uint8_t* data = (uint8_t*)malloc(size); + aggrReq->data.getAsVector().copyTo(data, size); + uint64_t dataIndex = 0; // Determine the number of cache-lines needed to store the data in the write // request @@ -102,7 +107,6 @@ std::vector SimEngMemInterface::splitAggregatedRequest( // Fill the payload vector currReqSize number of bytes starting // and inclusive of the dataIndex. - const uint8_t* data = aggrReq->data.getAsVector().ptr; memcpy((void*)&payload[0], &(data[dataIndex]), currReqSize); StandardMem::Request* writeReq = new StandardMem::Write(addrStart, currReqSize, payload); @@ -111,6 +115,7 @@ std::vector SimEngMemInterface::splitAggregatedRequest( addrStart += currReqSize; requests.push_back(writeReq); } + free(data); return requests; } From ca89844ed561f57d35d6d46ddf952299369390bb Mon Sep 17 00:00:00 2001 From: dANW34V3R Date: Thu, 15 May 2025 17:01:46 +0100 Subject: [PATCH 14/16] Cleanup --- src/include/simeng/RegisterValue.hh | 8 +- .../simeng/arch/aarch64/helpers/bitmanip.hh | 2 - .../simeng/arch/aarch64/helpers/neon.hh | 23 +-- .../simeng/arch/aarch64/helpers/sve.hh | 193 +++++++++--------- test/unit/ArchitecturalRegisterFileSetTest.cc | 3 - test/unit/RegisterFileSetTest.cc | 3 - 6 files changed, 109 insertions(+), 123 deletions(-) diff --git a/src/include/simeng/RegisterValue.hh b/src/include/simeng/RegisterValue.hh index 3fffb7374a..1b53120899 100644 --- a/src/include/simeng/RegisterValue.hh +++ b/src/include/simeng/RegisterValue.hh @@ -57,7 +57,7 @@ class RegisterValue { template , T>* = nullptr> RegisterValue(T value, uint16_t bytes = sizeof(T)) : bytes(bytes) { - // Ensure the high bits are zeroed + // Ensure the high bits remain zeroed size_t numBytesToCopy = bytes; if (bytes > sizeof(T)) { numBytesToCopy = sizeof(T); @@ -78,7 +78,8 @@ class RegisterValue { /** Create a new RegisterValue of size `capacity`, copying `bytes` * from `ptr`. */ - RegisterValue(const uint8_t* ptr, uint16_t bytes, uint16_t capacity) + template + RegisterValue(const T* ptr, uint16_t bytes, uint16_t capacity) : bytes(capacity) { assert(capacity >= bytes && "Capacity is less than requested bytes"); uint8_t* dest; @@ -95,7 +96,8 @@ class RegisterValue { } /** Create a new RegisterValue of size `bytes`, copying data from `ptr`. */ - RegisterValue(const uint8_t* ptr, uint16_t bytes) + template + RegisterValue(const T* ptr, uint16_t bytes) : RegisterValue(ptr, bytes, bytes) {} /** Create a new RegisterValue of size 'bytes', copy data from the safePointer diff --git a/src/include/simeng/arch/aarch64/helpers/bitmanip.hh b/src/include/simeng/arch/aarch64/helpers/bitmanip.hh index 92915274b3..9956425589 100644 --- a/src/include/simeng/arch/aarch64/helpers/bitmanip.hh +++ b/src/include/simeng/arch/aarch64/helpers/bitmanip.hh @@ -68,8 +68,6 @@ uint64_t rbit(srcValContainer& sourceValues, * Returns array of uint8_t with number of elements = bytes in T. */ template std::array rev(srcValContainer& sourceValues) { - // auto bytes = sourceValues[0].getAsVector(); - std::array forward; sourceValues[0].getAsVector().copyTo(&forward, sizeof(T)); std::array reversed; diff --git a/src/include/simeng/arch/aarch64/helpers/neon.hh b/src/include/simeng/arch/aarch64/helpers/neon.hh index d03a22d79c..3063f8df15 100644 --- a/src/include/simeng/arch/aarch64/helpers/neon.hh +++ b/src/include/simeng/arch/aarch64/helpers/neon.hh @@ -87,9 +87,9 @@ RegisterValue vecBicShift_imm( * Returns correctly formatted RegisterValue. */ template RegisterValue vecBitwiseInsert(srcValContainer& sourceValues, bool isBif) { - const safePointer d = sourceValues[0].getAsVector(); - const safePointer n = sourceValues[1].getAsVector(); - const safePointer m = sourceValues[2].getAsVector(); + const auto d = sourceValues[0].getAsVector(); + const auto n = sourceValues[1].getAsVector(); + const auto m = sourceValues[2].getAsVector(); uint64_t out[2] = {0}; for (int i = 0; i < (I / 8); i++) { out[i] = @@ -105,9 +105,9 @@ RegisterValue vecBitwiseInsert(srcValContainer& sourceValues, bool isBif) { * Returns correctly formatted RegisterValue. */ template RegisterValue vecBsl(srcValContainer& sourceValues) { - const safePointer d = sourceValues[0].getAsVector(); - const safePointer n = sourceValues[1].getAsVector(); - const safePointer m = sourceValues[2].getAsVector(); + const auto d = sourceValues[0].getAsVector(); + const auto n = sourceValues[1].getAsVector(); + const auto m = sourceValues[2].getAsVector(); uint64_t out[2] = {0}; for (int i = 0; i < (I / 8); i++) { out[i] = (d[i] & n[i]) | (~d[i] & m[i]); @@ -142,7 +142,7 @@ RegisterValue vecCompare(srcValContainer& sourceValues, bool cmpToZero, * Returns correctly formatted RegisterValue. */ template RegisterValue vecCountPerByte(srcValContainer& sourceValues) { - const safePointer n = sourceValues[0].getAsVector(); + const auto n = sourceValues[0].getAsVector(); T out[16 / sizeof(T)] = {0}; for (int i = 0; i < I; i++) { for (size_t j = 0; j < (sizeof(T) * 8); j++) { @@ -569,9 +569,7 @@ RegisterValue vecUMaxP(srcValContainer& sourceValues) { // Concatenate the vectors T temp[2 * I]; n.copyTo(temp, sizeof(T) * I); - // memcpy(temp, n.ptr, sizeof(T) * I); m.copyTo(temp + (sizeof(T) * I),sizeof(T) * I); - // memcpy(temp + (sizeof(T) * I), m.ptr, sizeof(T) * I); // Compare each adjacent pair of elements T out[I]; for (int i = 0; i < I; i++) { @@ -595,9 +593,6 @@ RegisterValue vecUMinP(srcValContainer& sourceValues) { n.copyTo(temp, sizeof(T) * I); m.copyTo(temp + (sizeof(T) * I), sizeof(T) * I); - // memcpy(temp, n.ptr, sizeof(T) * I); - // memcpy(temp + (sizeof(T) * I), m.ptr, sizeof(T) * I); - T out[I]; for (int i = 0; i < I; i++) { out[i] = std::min(temp[2 * i], temp[2 * i + 1]); @@ -829,7 +824,7 @@ RegisterValue vecTbl( assert(I == 8 || I == 16); // Vm contains the indices to fetch from table - const safePointer Vm = + const auto Vm = sourceValues[metadata.operandCount - 2] .getAsVector(); // final operand is vecMovi_imm @@ -841,7 +836,7 @@ RegisterValue vecTbl( const uint16_t tableSize = 16 * n_table_regs; std::vector table(tableSize, 0); for (uint8_t i = 0; i < n_table_regs; i++) { - const safePointer currentVector = sourceValues[i].getAsVector(); + const auto currentVector = sourceValues[i].getAsVector(); for (uint8_t j = 0; j < 16; j++) { table[16 * i + j] = currentVector[j]; } diff --git a/src/include/simeng/arch/aarch64/helpers/sve.hh b/src/include/simeng/arch/aarch64/helpers/sve.hh index 8ed710343e..ffaa5ccc0c 100644 --- a/src/include/simeng/arch/aarch64/helpers/sve.hh +++ b/src/include/simeng/arch/aarch64/helpers/sve.hh @@ -15,8 +15,8 @@ namespace aarch64 { template RegisterValue sveAdd_3ops(srcValContainer& sourceValues, const uint16_t VL_bits) { - const safePointer n = sourceValues[0].getAsVector(); - const safePointer m = sourceValues[1].getAsVector(); + const auto n = sourceValues[0].getAsVector(); + const auto m = sourceValues[1].getAsVector(); const uint16_t partition_num = VL_bits / (sizeof(T) * 8); T out[256 / sizeof(T)] = {0}; @@ -34,7 +34,7 @@ RegisterValue sveAdd_imm( srcValContainer& sourceValues, const simeng::arch::aarch64::InstructionMetadata& metadata, const uint16_t VL_bits) { - const safePointer n = sourceValues[0].getAsVector(); + const auto n = sourceValues[0].getAsVector(); const T imm = static_cast(metadata.operands[2].imm); const uint16_t partition_num = VL_bits / (sizeof(T) * 8); @@ -56,7 +56,7 @@ RegisterValue sveAddPredicated_const( const uint16_t VL_bits) { bool isFP = std::is_floating_point::value; const auto p = sourceValues[0].getAsVector(); - const safePointer d = sourceValues[1].getAsVector(); + const auto d = sourceValues[1].getAsVector(); const auto con = isFP ? metadata.operands[3].fp : metadata.operands[3].imm; const uint16_t partition_num = VL_bits / (sizeof(T) * 8); @@ -79,8 +79,8 @@ template RegisterValue sveAddPredicated_vecs(srcValContainer& sourceValues, const uint16_t VL_bits) { const auto p = sourceValues[0].getAsVector(); - const safePointer d = sourceValues[1].getAsVector(); - const safePointer m = sourceValues[2].getAsVector(); + const auto d = sourceValues[1].getAsVector(); + const auto m = sourceValues[2].getAsVector(); const uint16_t partition_num = VL_bits / (sizeof(T) * 8); T out[256 / sizeof(T)] = {0}; @@ -101,7 +101,7 @@ template RegisterValue sveAddvPredicated(srcValContainer& sourceValues, const uint16_t VL_bits) { const auto p = sourceValues[0].getAsVector(); - const safePointer n = sourceValues[1].getAsVector(); + const auto n = sourceValues[1].getAsVector(); const uint16_t partition_num = VL_bits / (sizeof(T) * 8); uint64_t out = 0; @@ -123,8 +123,8 @@ RegisterValue sveAdr_packedOffsets( srcValContainer& sourceValues, const simeng::arch::aarch64::InstructionMetadata& metadata, const uint16_t VL_bits) { - const safePointer n = sourceValues[0].getAsVector(); - const safePointer m = sourceValues[1].getAsVector(); + const auto n = sourceValues[0].getAsVector(); + const auto m = sourceValues[1].getAsVector(); const uint16_t partition_num = VL_bits / (sizeof(T) * 8); T out[256 / sizeof(T)] = {0}; @@ -146,8 +146,8 @@ std::tuple, uint8_t> sveCmpPredicated_toPred( const simeng::arch::aarch64::InstructionMetadata& metadata, const uint16_t VL_bits, bool cmpToImm, std::function func) { const auto p = sourceValues[0].getAsVector(); - const safePointer n = sourceValues[1].getAsVector(); - const safePointer m = !cmpToImm ? sourceValues[2].getAsVector() : safePointer(); + const auto n = sourceValues[1].getAsVector(); + const auto m = !cmpToImm ? sourceValues[2].getAsVector() : safePointer(); T imm = cmpToImm ? static_cast(metadata.operands[3].imm) : 0; const uint16_t partition_num = VL_bits / (sizeof(T) * 8); @@ -210,8 +210,8 @@ std::array sveComparePredicated_vecsToPred( const simeng::arch::aarch64::InstructionMetadata& metadata, const uint16_t VL_bits, bool cmpToZero, std::function func) { const auto p = sourceValues[0].getAsVector(); - const safePointer n = sourceValues[1].getAsVector(); - const safePointer m = !cmpToZero ? sourceValues[2].getAsVector() : safePointer(); + const auto n = sourceValues[1].getAsVector(); + const auto m = !cmpToZero ? sourceValues[2].getAsVector() : safePointer(); const uint16_t partition_num = VL_bits / (sizeof(T) * 8); std::array out = {0}; @@ -303,7 +303,7 @@ RegisterValue sveDup_vecIndexed( const uint16_t VL_bits) { const uint16_t index = static_cast(metadata.operands[1].vector_index); - const safePointer n = sourceValues[0].getAsVector(); + const auto n = sourceValues[0].getAsVector(); const uint16_t partition_num = VL_bits / (sizeof(T) * 8); T out[256 / sizeof(T)] = {0}; @@ -324,9 +324,9 @@ RegisterValue sveDup_vecIndexed( template RegisterValue sveFabsPredicated(srcValContainer& sourceValues, const uint16_t VL_bits) { - const safePointer d = sourceValues[0].getAsVector(); + const auto d = sourceValues[0].getAsVector(); const auto p = sourceValues[1].getAsVector(); - const safePointer n = sourceValues[2].getAsVector(); + const auto n = sourceValues[2].getAsVector(); const uint16_t partition_num = VL_bits / (sizeof(T) * 8); T out[256 / sizeof(T)] = {0}; @@ -351,7 +351,7 @@ RegisterValue sveFaddaPredicated(srcValContainer& sourceValues, const uint16_t VL_bits) { const auto p = sourceValues[0].getAsVector(); const T n = sourceValues[1].get(); - const safePointer m = sourceValues[2].getAsVector(); + const auto m = sourceValues[2].getAsVector(); const uint16_t partition_num = VL_bits / (sizeof(T) * 8); T out[256 / sizeof(T)] = {0}; @@ -376,8 +376,8 @@ RegisterValue sveFcaddPredicated( const simeng::arch::aarch64::InstructionMetadata& metadata, const uint16_t VL_bits) { const auto p = sourceValues[0].getAsVector(); - const safePointer dn = sourceValues[1].getAsVector(); - const safePointer m = sourceValues[2].getAsVector(); + const auto dn = sourceValues[1].getAsVector(); + const auto m = sourceValues[2].getAsVector(); const uint32_t imm = metadata.operands[4].imm; const uint16_t partition_num = VL_bits / (sizeof(T) * 8); @@ -420,10 +420,10 @@ RegisterValue sveFcmlaPredicated( srcValContainer& sourceValues, const simeng::arch::aarch64::InstructionMetadata& metadata, const uint16_t VL_bits) { - const safePointer da = sourceValues[0].getAsVector(); + const auto da = sourceValues[0].getAsVector(); const auto p = sourceValues[1].getAsVector(); - const safePointer n = sourceValues[2].getAsVector(); - const safePointer m = sourceValues[3].getAsVector(); + const auto n = sourceValues[2].getAsVector(); + const auto m = sourceValues[3].getAsVector(); const uint32_t imm = metadata.operands[4].imm; const uint16_t partition_num = VL_bits / (sizeof(T) * 8); @@ -470,7 +470,7 @@ RegisterValue sveFcpy_imm( srcValContainer& sourceValues, const simeng::arch::aarch64::InstructionMetadata& metadata, const uint16_t VL_bits) { - const safePointer dn = sourceValues[0].getAsVector(); + const auto dn = sourceValues[0].getAsVector(); const auto p = sourceValues[1].getAsVector(); const T imm = metadata.operands[2].fp; @@ -601,8 +601,8 @@ template std::enable_if_t, RegisterValue> sveFDivPredicated( srcValContainer& sourceValues, const uint16_t VL_bits) { const auto p = sourceValues[0].getAsVector(); - const safePointer dn = sourceValues[1].getAsVector(); - const safePointer m = sourceValues[2].getAsVector(); + const auto dn = sourceValues[1].getAsVector(); + const auto m = sourceValues[2].getAsVector(); const uint16_t partition_num = VL_bits / (sizeof(T) * 8); T out[256 / sizeof(T)] = {0}; @@ -628,10 +628,10 @@ std::enable_if_t, RegisterValue> sveFDivPredicated( template RegisterValue sveFmadPredicated_vecs(srcValContainer& sourceValues, const uint16_t VL_bits) { - const safePointer d = sourceValues[0].getAsVector(); + const auto d = sourceValues[0].getAsVector(); const auto p = sourceValues[1].getAsVector(); - const safePointer n = sourceValues[2].getAsVector(); - const safePointer m = sourceValues[3].getAsVector(); + const auto n = sourceValues[2].getAsVector(); + const auto m = sourceValues[3].getAsVector(); const uint16_t partition_num = VL_bits / (sizeof(T) * 8); T out[256 / sizeof(T)] = {0}; @@ -652,10 +652,10 @@ RegisterValue sveFmadPredicated_vecs(srcValContainer& sourceValues, template RegisterValue sveFmlsPredicated_vecs(srcValContainer& sourceValues, const uint16_t VL_bits) { - const safePointer d = sourceValues[0].getAsVector(); + const auto d = sourceValues[0].getAsVector(); const auto p = sourceValues[1].getAsVector(); - const safePointer n = sourceValues[2].getAsVector(); - const safePointer m = sourceValues[3].getAsVector(); + const auto n = sourceValues[2].getAsVector(); + const auto m = sourceValues[3].getAsVector(); const uint16_t partition_num = VL_bits / (sizeof(T) * 8); T out[256 / sizeof(T)] = {0}; @@ -676,10 +676,10 @@ RegisterValue sveFmlsPredicated_vecs(srcValContainer& sourceValues, template RegisterValue sveFmsbPredicated_vecs(srcValContainer& sourceValues, const uint16_t VL_bits) { - const safePointer d = sourceValues[0].getAsVector(); + const auto d = sourceValues[0].getAsVector(); const auto p = sourceValues[1].getAsVector(); - const safePointer n = sourceValues[2].getAsVector(); - const safePointer m = sourceValues[3].getAsVector(); + const auto n = sourceValues[2].getAsVector(); + const auto m = sourceValues[3].getAsVector(); const uint16_t partition_num = VL_bits / (sizeof(T) * 8); T out[256 / sizeof(T)] = {0}; @@ -699,8 +699,8 @@ RegisterValue sveFmsbPredicated_vecs(srcValContainer& sourceValues, template RegisterValue sveFmul_3ops(srcValContainer& sourceValues, const uint16_t VL_bits) { - const safePointer n = sourceValues[0].getAsVector(); - const safePointer m = sourceValues[1].getAsVector(); + const auto n = sourceValues[0].getAsVector(); + const auto m = sourceValues[1].getAsVector(); const uint16_t partition_num = VL_bits / (sizeof(T) * 8); T out[256 / sizeof(T)] = {0}; @@ -716,9 +716,9 @@ RegisterValue sveFmul_3ops(srcValContainer& sourceValues, template RegisterValue sveFnegPredicated(srcValContainer& sourceValues, const uint16_t VL_bits) { - const safePointer d = sourceValues[0].getAsVector(); + const auto d = sourceValues[0].getAsVector(); const auto p = sourceValues[1].getAsVector(); - const safePointer n = sourceValues[2].getAsVector(); + const auto n = sourceValues[2].getAsVector(); const uint16_t partition_num = VL_bits / (sizeof(T) * 8); T out[256 / sizeof(T)] = {0}; @@ -740,10 +740,10 @@ RegisterValue sveFnegPredicated(srcValContainer& sourceValues, template RegisterValue sveFnmlsPredicated(srcValContainer& sourceValues, const uint16_t VL_bits) { - const safePointer d = sourceValues[0].getAsVector(); + const auto d = sourceValues[0].getAsVector(); const auto p = sourceValues[1].getAsVector(); - const safePointer n = sourceValues[2].getAsVector(); - const safePointer m = sourceValues[3].getAsVector(); + const auto n = sourceValues[2].getAsVector(); + const auto m = sourceValues[3].getAsVector(); const uint16_t partition_num = VL_bits / (sizeof(T) * 8); T out[256 / sizeof(T)] = {0}; @@ -765,10 +765,10 @@ RegisterValue sveFnmlsPredicated(srcValContainer& sourceValues, template RegisterValue sveFnmsbPredicated(srcValContainer& sourceValues, const uint16_t VL_bits) { - const safePointer n = sourceValues[0].getAsVector(); + const auto n = sourceValues[0].getAsVector(); const auto p = sourceValues[1].getAsVector(); - const safePointer m = sourceValues[2].getAsVector(); - const safePointer a = sourceValues[3].getAsVector(); + const auto m = sourceValues[2].getAsVector(); + const auto a = sourceValues[3].getAsVector(); const uint16_t partition_num = VL_bits / (sizeof(T) * 8); T out[256 / sizeof(T)] = {0}; @@ -790,9 +790,9 @@ RegisterValue sveFnmsbPredicated(srcValContainer& sourceValues, template std::enable_if_t, RegisterValue> sveFrintnPredicated(srcValContainer& sourceValues, const uint16_t VL_bits) { - const safePointer d = sourceValues[0].getAsVector(); + const auto d = sourceValues[0].getAsVector(); const auto p = sourceValues[1].getAsVector(); - const safePointer n = sourceValues[2].getAsVector(); + const auto n = sourceValues[2].getAsVector(); const uint16_t partition_num = VL_bits / (sizeof(T) * 8); T out[256 / sizeof(T)] = {0}; @@ -828,9 +828,9 @@ sveFrintnPredicated(srcValContainer& sourceValues, const uint16_t VL_bits) { template RegisterValue sveFsqrtPredicated_2vecs(srcValContainer& sourceValues, const uint16_t VL_bits) { - const safePointer d = sourceValues[0].getAsVector(); + const auto d = sourceValues[0].getAsVector(); const auto p = sourceValues[1].getAsVector(); - const safePointer n = sourceValues[2].getAsVector(); + const auto n = sourceValues[2].getAsVector(); const uint16_t partition_num = VL_bits / (sizeof(T) * 8); T out[256 / sizeof(T)] = {0}; @@ -871,7 +871,7 @@ RegisterValue sveInc_imm( srcValContainer& sourceValues, const simeng::arch::aarch64::InstructionMetadata& metadata, const uint16_t VL_bits) { - const safePointer n = sourceValues[0].getAsVector(); + const auto n = sourceValues[0].getAsVector(); const uint8_t imm = static_cast(metadata.operands[2].imm); @@ -966,8 +966,8 @@ RegisterValue sveLogicOpPredicated_3vecs(srcValContainer& sourceValues, const uint16_t VL_bits, std::function func) { const auto p = sourceValues[0].getAsVector(); - const safePointer dn = sourceValues[1].getAsVector(); - const safePointer m = sourceValues[2].getAsVector(); + const auto dn = sourceValues[1].getAsVector(); + const auto m = sourceValues[2].getAsVector(); const uint16_t partition_num = VL_bits / (sizeof(T) * 8); T out[256 / sizeof(T)] = {0}; @@ -989,8 +989,8 @@ template RegisterValue sveLogicOpUnPredicated_3vecs(srcValContainer& sourceValues, const uint16_t VL_bits, std::function func) { - const safePointer n = sourceValues[0].getAsVector(); - const safePointer m = sourceValues[1].getAsVector(); + const auto n = sourceValues[0].getAsVector(); + const auto m = sourceValues[1].getAsVector(); const uint16_t partition_num = VL_bits / (sizeof(T) * 8); T out[256 / sizeof(T)] = {0}; @@ -1008,7 +1008,7 @@ RegisterValue sveLsl_imm( srcValContainer& sourceValues, const simeng::arch::aarch64::InstructionMetadata& metadata, const uint16_t VL_bits) { - const safePointer n = sourceValues[0].getAsVector(); + const auto n = sourceValues[0].getAsVector(); const T imm = static_cast(metadata.operands[2].imm); const uint16_t partition_num = VL_bits / (sizeof(T) * 8); @@ -1029,7 +1029,7 @@ RegisterValue sveMax_vecImm( srcValContainer& sourceValues, const simeng::arch::aarch64::InstructionMetadata& metadata, const uint16_t VL_bits) { - const safePointer n = sourceValues[0].getAsVector(); + const auto n = sourceValues[0].getAsVector(); T imm = static_cast(metadata.operands[2].imm); const uint16_t partition_num = VL_bits / (sizeof(T) * 8); @@ -1049,8 +1049,8 @@ template RegisterValue sveMaxPredicated_vecs(srcValContainer& sourceValues, const uint16_t VL_bits) { const auto p = sourceValues[0].getAsVector(); - const safePointer n = sourceValues[1].getAsVector(); - const safePointer m = sourceValues[2].getAsVector(); + const auto n = sourceValues[1].getAsVector(); + const auto m = sourceValues[2].getAsVector(); const uint16_t partition_num = VL_bits / (sizeof(T) * 8); T out[256 / sizeof(T)] = {0}; @@ -1072,10 +1072,10 @@ RegisterValue sveMaxPredicated_vecs(srcValContainer& sourceValues, template RegisterValue sveMlaPredicated_vecs(srcValContainer& sourceValues, const uint16_t VL_bits) { - const safePointer d = sourceValues[0].getAsVector(); + const auto d = sourceValues[0].getAsVector(); const auto p = sourceValues[1].getAsVector(); - const safePointer n = sourceValues[2].getAsVector(); - const safePointer m = sourceValues[3].getAsVector(); + const auto n = sourceValues[2].getAsVector(); + const auto m = sourceValues[3].getAsVector(); const uint16_t partition_num = VL_bits / (sizeof(T) * 8); T out[256 / sizeof(T)] = {0}; @@ -1098,9 +1098,9 @@ RegisterValue sveMlaIndexed_vecs( srcValContainer& sourceValues, const simeng::arch::aarch64::InstructionMetadata& metadata, const uint16_t VL_bits) { - const safePointer d = sourceValues[0].getAsVector(); - const safePointer n = sourceValues[1].getAsVector(); - const safePointer m = sourceValues[2].getAsVector(); + const auto d = sourceValues[0].getAsVector(); + const auto n = sourceValues[1].getAsVector(); + const auto m = sourceValues[2].getAsVector(); const size_t index = static_cast(metadata.operands[2].vector_index); const uint16_t elemsPer128 = 128 / (sizeof(T) * 8); @@ -1126,7 +1126,7 @@ RegisterValue sveMovprfxPredicated_destToZero(srcValContainer& sourceValues, const uint16_t VL_bits) { // TODO: Adopt hint logic of the MOVPRFX instruction const auto p = sourceValues[0].getAsVector(); - const safePointer n = sourceValues[1].getAsVector(); + const auto n = sourceValues[1].getAsVector(); const uint16_t partition_num = VL_bits / (sizeof(T) * 8); T out[256 / sizeof(T)] = {0}; @@ -1150,9 +1150,9 @@ template RegisterValue sveMovprfxPredicated_destUnchanged(srcValContainer& sourceValues, const uint16_t VL_bits) { // TODO: Adopt hint logic of the MOVPRFX instruction - const safePointer d = sourceValues[0].getAsVector(); + const auto d = sourceValues[0].getAsVector(); const auto p = sourceValues[1].getAsVector(); - const safePointer n = sourceValues[2].getAsVector(); + const auto n = sourceValues[2].getAsVector(); const uint16_t partition_num = VL_bits / (sizeof(T) * 8); T out[256 / sizeof(T)] = {0}; @@ -1179,13 +1179,9 @@ RegisterValue sveMulPredicated( const uint16_t VL_bits, bool useImm) { bool isFP = std::is_floating_point::value; const auto p = sourceValues[0].getAsVector(); - const safePointer n = sourceValues[1].getAsVector(); - const safePointer m = !useImm ? sourceValues[2].getAsVector() : safePointer() ; + const auto n = sourceValues[1].getAsVector(); + const auto m = !useImm ? sourceValues[2].getAsVector() : safePointer() ; T imm = useImm ? (isFP ? metadata.operands[3].fp : metadata.operands[3].imm) : T(); - // if (useImm) - // imm = isFP ? metadata.operands[3].fp : metadata.operands[3].imm; - // else - // m = sourceValues[2].getAsVector(); const uint16_t partition_num = VL_bits / (sizeof(T) * 8); T out[256 / sizeof(T)] = {0}; @@ -1211,8 +1207,8 @@ template RegisterValue sveMulhPredicated(srcValContainer& sourceValues, const uint16_t VL_bits) { const auto p = sourceValues[0].getAsVector(); - const safePointer n = sourceValues[1].getAsVector(); - const safePointer m = sourceValues[2].getAsVector(); + const auto n = sourceValues[1].getAsVector(); + const auto m = sourceValues[2].getAsVector(); const uint16_t partition_num = VL_bits / (sizeof(T) * 8); T out[256 / sizeof(T)] = {0}; @@ -1248,8 +1244,8 @@ RegisterValue sveMulhPredicated(srcValContainer& sourceValues, template RegisterValue sveOrr_3vecs(srcValContainer& sourceValues, const uint16_t VL_bits) { - const safePointer n = sourceValues[0].getAsVector(); - const safePointer m = sourceValues[1].getAsVector(); + const auto n = sourceValues[0].getAsVector(); + const auto m = sourceValues[1].getAsVector(); const uint16_t partition_num = VL_bits / (sizeof(T) * 8); T out[256 / sizeof(T)] = {0}; @@ -1366,7 +1362,7 @@ std::array sveRev_predicates(srcValContainer& sourceValues, template RegisterValue sveRev_vecs(srcValContainer& sourceValues, const uint16_t VL_bits) { - const safePointer n = sourceValues[0].getAsVector(); + const auto n = sourceValues[0].getAsVector(); const uint16_t partition_num = VL_bits / (sizeof(T) * 8); T out[256 / sizeof(T)] = {0}; @@ -1387,8 +1383,8 @@ template RegisterValue sveSel_zpzz(srcValContainer& sourceValues, const uint16_t VL_bits) { const auto p = sourceValues[0].getAsVector(); - const safePointer n = sourceValues[1].getAsVector(); - const safePointer m = sourceValues[2].getAsVector(); + const auto n = sourceValues[1].getAsVector(); + const auto m = sourceValues[2].getAsVector(); const uint16_t partition_num = VL_bits / (sizeof(T) * 8); T out[256 / sizeof(T)] = {0}; @@ -1408,7 +1404,7 @@ RegisterValue sveSel_zpzz(srcValContainer& sourceValues, template RegisterValue sveSminv(srcValContainer& sourceValues, const uint16_t VL_bits) { const auto p = sourceValues[0].getAsVector(); - const safePointer n = sourceValues[1].getAsVector(); + const auto n = sourceValues[1].getAsVector(); const uint16_t partition_num = VL_bits / (sizeof(T) * 8); T out = std::numeric_limits::max(); @@ -1427,8 +1423,8 @@ RegisterValue sveSminv(srcValContainer& sourceValues, const uint16_t VL_bits) { template RegisterValue sveSub_3vecs(srcValContainer& sourceValues, const uint16_t VL_bits) { - const safePointer n = sourceValues[0].getAsVector(); - const safePointer m = sourceValues[1].getAsVector(); + const auto n = sourceValues[0].getAsVector(); + const auto m = sourceValues[1].getAsVector(); const uint16_t partition_num = VL_bits / (sizeof(T) * 8); T out[256 / sizeof(T)] = {0}; @@ -1447,8 +1443,8 @@ template RegisterValue sveSubrPredicated_3vecs(srcValContainer& sourceValues, const uint16_t VL_bits) { const auto p = sourceValues[0].getAsVector(); - const safePointer dn = sourceValues[1].getAsVector(); - const safePointer m = sourceValues[2].getAsVector(); + const auto dn = sourceValues[1].getAsVector(); + const auto m = sourceValues[2].getAsVector(); const uint16_t partition_num = VL_bits / (sizeof(T) * 8); T out[256 / sizeof(T)] = {0}; @@ -1475,7 +1471,7 @@ RegisterValue sveSubPredicated_imm( const uint16_t VL_bits) { bool isFP = std::is_floating_point::value; const auto p = sourceValues[0].getAsVector(); - const safePointer dn = sourceValues[1].getAsVector(); + const auto dn = sourceValues[1].getAsVector(); const auto imm = isFP ? metadata.operands[3].fp : metadata.operands[3].imm; const uint16_t partition_num = VL_bits / (sizeof(T) * 8); @@ -1501,9 +1497,9 @@ RegisterValue sveSubPredicated_imm( template RegisterValue sveSxtPredicated(srcValContainer& sourceValues, const uint16_t VL_bits) { - const safePointer d = sourceValues[0].getAsVector(); + const auto d = sourceValues[0].getAsVector(); const auto p = sourceValues[1].getAsVector(); - const safePointer n = sourceValues[2].getAsVector(); + const auto n = sourceValues[2].getAsVector(); const uint16_t partition_num = VL_bits / (sizeof(T) * 8); T out[256 / sizeof(T)] = {0}; @@ -1526,8 +1522,8 @@ RegisterValue sveSxtPredicated(srcValContainer& sourceValues, template RegisterValue sveTrn1_3vecs(srcValContainer& sourceValues, const uint16_t VL_bits) { - const safePointer n = sourceValues[0].getAsVector(); - const safePointer m = sourceValues[1].getAsVector(); + const auto n = sourceValues[0].getAsVector(); + const auto m = sourceValues[1].getAsVector(); const uint16_t partition_num = VL_bits / (sizeof(T) * 8); T out[256 / sizeof(T)] = {0}; @@ -1545,8 +1541,8 @@ RegisterValue sveTrn1_3vecs(srcValContainer& sourceValues, template RegisterValue sveTrn2_3vecs(srcValContainer& sourceValues, const uint16_t VL_bits) { - const safePointer n = sourceValues[0].getAsVector(); - const safePointer m = sourceValues[1].getAsVector(); + const auto n = sourceValues[0].getAsVector(); + const auto m = sourceValues[1].getAsVector(); const uint16_t partition_num = VL_bits / (sizeof(T) * 8); T out[256 / sizeof(T)] = {0}; @@ -1611,8 +1607,8 @@ uint64_t sveUqdec(srcValContainer& sourceValues, template RegisterValue sveUzp_vecs(srcValContainer& sourceValues, const uint16_t VL_bits, bool isUzp1) { - const safePointer n = sourceValues[0].getAsVector(); - const safePointer m = sourceValues[1].getAsVector(); + const auto n = sourceValues[0].getAsVector(); + const auto m = sourceValues[1].getAsVector(); const uint16_t partition_num = VL_bits / (sizeof(T) * 8); T out[256 / sizeof(T)] = {0}; @@ -1702,8 +1698,8 @@ std::array sveZip_preds(srcValContainer& sourceValues, template RegisterValue sveZip_vecs(srcValContainer& sourceValues, const uint16_t VL_bits, bool isZip2) { - const safePointer n = sourceValues[0].getAsVector(); - const safePointer m = sourceValues[1].getAsVector(); + const auto n = sourceValues[0].getAsVector(); + const auto m = sourceValues[1].getAsVector(); const uint16_t partition_num = VL_bits / (sizeof(T) * 8); T out[256 / sizeof(T)] = {0}; @@ -1738,7 +1734,8 @@ std::vector sve_merge_store_data(const safePointer d, const sa // Determine size of array based on the size of the memory access (This is // the C specifier in sve instructions) - std::array mData; + C mData[256/sizeof(C)]; + // std::array mData; uint16_t mdSize = 0; for (uint16_t x = 0; x < numVecElems; x++) { @@ -1749,13 +1746,13 @@ std::vector sve_merge_store_data(const safePointer d, const sa mdSize++; } else if (mdSize) { outputData.push_back( - RegisterValue(reinterpret_cast(mData.data()), mdSize * sizeof(C))); + RegisterValue(mData, mdSize * sizeof(C), mdSize * sizeof(C))); mdSize = 0; } } if (mdSize) { outputData.push_back( - RegisterValue(reinterpret_cast(mData.data()), mdSize * sizeof(C))); + RegisterValue(mData, mdSize * sizeof(C), mdSize * sizeof(C))); } return outputData; } diff --git a/test/unit/ArchitecturalRegisterFileSetTest.cc b/test/unit/ArchitecturalRegisterFileSetTest.cc index f1bd978184..59c81ded0b 100644 --- a/test/unit/ArchitecturalRegisterFileSetTest.cc +++ b/test/unit/ArchitecturalRegisterFileSetTest.cc @@ -26,9 +26,6 @@ TEST_F(ArchitecturalRegisterFileSetTest, readWrite) { const Register r0 = {i, 0}; const Register rMax = {i, maxRegTag}; - archRegFileSet.set(r0, RegisterValue(0ull, regSize)); - archRegFileSet.set(rMax, RegisterValue(0ull, regSize)); - EXPECT_EQ(archRegFileSet.get(r0), RegisterValue(0ull, regSize)); EXPECT_EQ(archRegFileSet.get(rMax), RegisterValue(0ull, regSize)); diff --git a/test/unit/RegisterFileSetTest.cc b/test/unit/RegisterFileSetTest.cc index 2b4b6e2782..83dfc29bfd 100644 --- a/test/unit/RegisterFileSetTest.cc +++ b/test/unit/RegisterFileSetTest.cc @@ -33,9 +33,6 @@ TEST_F(RegisterFileSetTest, readWrite) { const Register r0 = {i, 0}; const Register rMax = {i, maxRegTag}; - regFileSet.set(r0, RegisterValue(0ull, regSize)); - regFileSet.set(rMax, RegisterValue(0ull, regSize)); - EXPECT_EQ(regFileSet.get(r0), RegisterValue(0ull, regSize)); EXPECT_EQ(regFileSet.get(rMax), RegisterValue(0ull, regSize)); From 3e4de99c022684826b0b8b0f2f750f01bc2a63bc Mon Sep 17 00:00:00 2001 From: dANW34V3R Date: Thu, 15 May 2025 17:33:35 +0100 Subject: [PATCH 15/16] Remove reinterpret casts --- src/lib/arch/aarch64/ExceptionHandler.cc | 5 ++--- src/lib/arch/riscv/ExceptionHandler.cc | 5 ++--- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/src/lib/arch/aarch64/ExceptionHandler.cc b/src/lib/arch/aarch64/ExceptionHandler.cc index 936b000b32..3b7357fbab 100644 --- a/src/lib/arch/aarch64/ExceptionHandler.cc +++ b/src/lib/arch/aarch64/ExceptionHandler.cc @@ -120,7 +120,7 @@ bool ExceptionHandler::init() { // Get pointer and size of the buffer uint64_t iDst = bufPtr; // Write data for this buffer in 128-byte chunks - auto iSrc = reinterpret_cast(dataBuffer_.data()); + auto iSrc = dataBuffer_.data(); while (totalRead > 0) { uint8_t len = totalRead > 128 ? 128 : static_cast(totalRead); @@ -160,7 +160,7 @@ bool ExceptionHandler::init() { uint64_t iLength = static_cast(totalRead); // Write data for this buffer in 128-byte chunks - auto iSrc = reinterpret_cast(dataBuffer_.data()); + auto iSrc = dataBuffer_.data(); while (iLength > 0) { uint8_t len = iLength > 128 ? 128 : static_cast(iLength); stateChange.memoryAddresses.push_back({iDst, len}); @@ -799,7 +799,6 @@ void ExceptionHandler::readLinkAt(span path) { const auto bufSize = registerFileSet.get(R3).get(); uint8_t buffer[kernel::Linux::LINUX_PATH_MAX]; - // TODO check reinterpret cast is safe here auto result = linux_.readlinkat(dirfd, path.data(), reinterpret_cast(buffer), bufSize); diff --git a/src/lib/arch/riscv/ExceptionHandler.cc b/src/lib/arch/riscv/ExceptionHandler.cc index 839912c54c..ac7f589e70 100644 --- a/src/lib/arch/riscv/ExceptionHandler.cc +++ b/src/lib/arch/riscv/ExceptionHandler.cc @@ -118,7 +118,7 @@ bool ExceptionHandler::init() { // Get pointer and size of the buffer uint64_t iDst = bufPtr; // Write data for this buffer in 128-byte chunks - auto iSrc = reinterpret_cast(dataBuffer_.data()); + auto iSrc = dataBuffer_.data(); while (totalRead > 0) { uint8_t len = totalRead > 128 ? 128 : static_cast(totalRead); @@ -158,7 +158,7 @@ bool ExceptionHandler::init() { uint64_t iLength = static_cast(totalRead); // Write data for this buffer in 128-byte chunks - auto iSrc = reinterpret_cast(dataBuffer_.data()); + auto iSrc = dataBuffer_.data(); while (iLength > 0) { uint8_t len = iLength > 128 ? 128 : static_cast(iLength); stateChange.memoryAddresses.push_back({iDst, len}); @@ -779,7 +779,6 @@ void ExceptionHandler::readLinkAt(span path) { const auto bufSize = registerFileSet.get(R3).get(); uint8_t buffer[kernel::Linux::LINUX_PATH_MAX]; - // TODO check if reinterpret cast is dangerous here auto result = linux_.readlinkat(dirfd, path.data(), reinterpret_cast(buffer), bufSize); From e1c3ed3dc626ff379835e95a90a0a6726f3ffab7 Mon Sep 17 00:00:00 2001 From: dANW34V3R Date: Fri, 16 May 2025 11:49:30 +0100 Subject: [PATCH 16/16] Remove extern --- src/include/simeng/RegisterValue.hh | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/include/simeng/RegisterValue.hh b/src/include/simeng/RegisterValue.hh index 1b53120899..5a1a0cb04f 100644 --- a/src/include/simeng/RegisterValue.hh +++ b/src/include/simeng/RegisterValue.hh @@ -11,10 +11,8 @@ namespace simeng { -inline Pool pool = Pool(); - /** Global memory pool used by RegisterValue class. */ -extern Pool pool; +inline Pool pool = Pool(); template struct safePointer {