diff --git a/CMakeLists.txt b/CMakeLists.txt index 73636245..66d66dea 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -146,6 +146,7 @@ set(srcs src/util/bit.hpp src/util/parse.hpp src/util/pretty.hpp + src/util/large_pages.hpp src/util/static_vector.hpp src/util/types.hpp src/util/vec/sse2.hpp diff --git a/src/search.cpp b/src/search.cpp index 07f9fc98..6269d25c 100644 --- a/src/search.cpp +++ b/src/search.cpp @@ -10,6 +10,7 @@ #include "tm.hpp" #include "tuned.hpp" #include "uci.hpp" +#include "util/large_pages.hpp" #include "util/log2.hpp" #include "util/types.hpp" #include @@ -118,9 +119,9 @@ void Searcher::initialize(size_t thread_count) { started_barrier = std::make_unique>(1 + thread_count); if (thread_count > 0) { - m_workers.push_back(std::make_unique(*this, ThreadType::MAIN)); + m_workers.push_back(make_unique_huge_page(*this, ThreadType::MAIN)); for (size_t i = 1; i < thread_count; i++) { - m_workers.push_back(std::make_unique(*this, ThreadType::SECONDARY)); + m_workers.push_back(make_unique_huge_page(*this, ThreadType::SECONDARY)); } } } diff --git a/src/search.hpp b/src/search.hpp index 427bbb84..8f1764fc 100644 --- a/src/search.hpp +++ b/src/search.hpp @@ -126,7 +126,7 @@ class Searcher { } private: - std::vector> m_workers; + std::vector> m_workers; }; class alignas(128) Worker { diff --git a/src/tt.cpp b/src/tt.cpp index 8f3c7456..52368c91 100644 --- a/src/tt.cpp +++ b/src/tt.cpp @@ -63,10 +63,6 @@ TT::TT(size_t mb) : resize(mb); } -TT::~TT() { - aligned_free(m_clusters); -} - std::optional TT::probe(const Position& pos, i32 ply) const { size_t idx = mulhi64(pos.get_hash_key(), m_size); const auto cluster = this->m_clusters[idx].load(); @@ -161,13 +157,12 @@ void TT::store(const Position& pos, } void TT::resize(size_t mb) { - aligned_free(m_clusters); size_t bytes = mb * 1024 * 1024; size_t entries = bytes / sizeof(TTClusterMemory); m_size = entries; - m_clusters = static_cast(aligned_alloc(TT_ALIGNMENT, bytes)); + m_clusters = make_unique_for_overwrite_huge_page(m_size); clear(); } diff --git a/src/tt.hpp b/src/tt.hpp index 1c802f67..5984c59b 100644 --- a/src/tt.hpp +++ b/src/tt.hpp @@ -1,6 +1,7 @@ #pragma once #include "position.hpp" +#include "util/large_pages.hpp" #include #include #include @@ -90,7 +91,6 @@ class TT { static constexpr u8 AGE_MASK = 0x1F; TT(size_t mb = DEFAULT_SIZE_MB); - ~TT(); std::optional probe(const Position& position, i32 ply) const; void store(const Position& position, @@ -107,9 +107,9 @@ class TT { i32 hashfull() const; private: - TTClusterMemory* m_clusters; - size_t m_size; - u8 m_age; + unique_ptr_huge_page m_clusters; + size_t m_size; + u8 m_age; }; } // namespace Clockwork diff --git a/src/util/large_pages.hpp b/src/util/large_pages.hpp new file mode 100644 index 00000000..75d426e2 --- /dev/null +++ b/src/util/large_pages.hpp @@ -0,0 +1,156 @@ +#pragma once + +#include +#include +#include +#include +#include +#include + +#ifdef __linux__ + #include +#elif defined(_WIN32) + #include +#endif + +template +using unique_ptr_huge_page = + std::conditional_t, + std::unique_ptr*)>>, + std::unique_ptr>>; + +template +T* allocate_huge_page(std::size_t size) { + constexpr static auto huge_page_size = 2 * 1024 * 1024; // 2MB pages + +#ifdef __linux__ + size = ((size + huge_page_size - 1) / huge_page_size) * huge_page_size; + T* data = static_cast(std::aligned_alloc(huge_page_size, size)); + if (data) { + madvise(data, size, MADV_HUGEPAGE); + } + return data; +#elif defined(_WIN32) + HANDLE hToken; + TOKEN_PRIVILEGES tp; + LUID luid; + + // Get the current process token + if (!OpenProcessToken(GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &hToken)) { + return static_cast( + VirtualAlloc(nullptr, size, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE)); + } + + // Get the LUID for the SeLockMemoryPrivilege + if (!LookupPrivilegeValue(nullptr, SE_LOCK_MEMORY_NAME, &luid)) { + CloseHandle(hToken); + return static_cast( + VirtualAlloc(nullptr, size, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE)); + } + + // Enable the SeLockMemoryPrivilege + tp.PrivilegeCount = 1; + tp.Privileges[0].Luid = luid; + tp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED; + + if (!AdjustTokenPrivileges(hToken, FALSE, &tp, sizeof(tp), nullptr, nullptr)) { + CloseHandle(hToken); + return static_cast( + VirtualAlloc(nullptr, size, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE)); + } + + // Even if AdjustTokenPrivileges returns success, must check GetLastError for ERROR_NOT_ALL_ASSIGNED + if (GetLastError() == ERROR_NOT_ALL_ASSIGNED) { + CloseHandle(hToken); + return static_cast( + VirtualAlloc(nullptr, size, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE)); + } + + // Get the large page minimum size (typically 2MB on x64 Windows) + SIZE_T largePageMinimum = GetLargePageMinimum(); + SIZE_T roundedSize = ((size + largePageMinimum - 1) / largePageMinimum) * largePageMinimum; + + // Allocate with MEM_LARGE_PAGES + T* data = static_cast(VirtualAlloc( + nullptr, roundedSize, MEM_COMMIT | MEM_RESERVE | MEM_LARGE_PAGES, PAGE_READWRITE)); + + if (!data) { + CloseHandle(hToken); + return static_cast( + VirtualAlloc(nullptr, size, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE)); + } + + CloseHandle(hToken); + return data; +#else + // Fallback for other platforms + size = ((size + huge_page_size - 1) / huge_page_size) * huge_page_size; + T* data = static_cast(std::aligned_alloc(huge_page_size, size)); + return data; +#endif +} + +template +void deallocate_huge_page(T* ptr) { +#ifdef __linux__ + std::free(ptr); +#elif defined(_WIN32) + VirtualFree(ptr, 0, MEM_RELEASE); +#else + std::free(ptr); +#endif +} + +template + requires(!std::is_array_v) +unique_ptr_huge_page make_unique_huge_page(Args&&... args) { + T* data = allocate_huge_page(sizeof(T)); + std::construct_at(data, std::forward(args)...); + return unique_ptr_huge_page(data, [](T* ptr) { + std::destroy_at(ptr); + deallocate_huge_page(ptr); + }); +} + +template + requires std::is_unbounded_array_v +unique_ptr_huge_page make_unique_huge_page(std::size_t n) { + using E = std::remove_all_extents_t; + E* data = allocate_huge_page(n * sizeof(E)); + std::uninitialized_value_construct_n(data, n); + return unique_ptr_huge_page(data, [n](E* ptr) { + std::destroy_n(ptr, n); + deallocate_huge_page(ptr); + }); +} + +template + requires std::is_bounded_array_v +void make_unique_huge_page(Args&&...) = delete; + +template + requires(!std::is_array_v) +unique_ptr_huge_page make_unique_for_overwrite_huge_page() { + T* data = allocate_huge_page(sizeof(T)); + new (data) T; + return unique_ptr_huge_page(data, [](T* ptr) { + std::destroy_at(ptr); + deallocate_huge_page(ptr); + }); +} + +template + requires std::is_unbounded_array_v +unique_ptr_huge_page make_unique_for_overwrite_huge_page(std::size_t n) { + using E = std::remove_all_extents_t; + E* data = allocate_huge_page(n * sizeof(E)); + std::uninitialized_default_construct_n(data, n); + return unique_ptr_huge_page(data, [n](E* ptr) { + std::destroy_n(ptr, n); + deallocate_huge_page(ptr); + }); +} + +template + requires std::is_bounded_array_v +void make_unique_for_overwrite_huge_page(Args&&...) = delete;