From 2fb45c3af903b39ff5fcf5a6ff3cf729f4fc086a Mon Sep 17 00:00:00 2001 From: Jarmo Tiitto Date: Fri, 28 Jul 2023 16:44:01 +0300 Subject: [PATCH 01/23] Memory mapped file API (mapped_file library) MIT license in mapped_file.hpp and mapped_file.cpp - Supports 64-bit file seeking. (+4GiB files) - Can memory map portions of the opened file or entire file. - Can flush modified read-write mappings back into disk. - Read-write regions will grow the backing file in multiple 4096 blocks. - mapped::file class for accessing an file on disk. - mapped::region class for memory mapping raw area of file. - mapped::struct_region template for accessing an on-disk structure - mapped::array_region template for accessing an on-disk array of T --- cpp/CMakeLists.txt | 4 + cpp/libraries/mapped_file.cpp | 317 +++++++++++++++++++++++ cpp/libraries/mapped_file.hpp | 467 ++++++++++++++++++++++++++++++++++ 3 files changed, 788 insertions(+) create mode 100644 cpp/libraries/mapped_file.cpp create mode 100644 cpp/libraries/mapped_file.hpp diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 6151054..05e50f0 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -38,6 +38,9 @@ macro(ConfigureTarget Target) ) endmacro() +add_library(mapped_file STATIC "libraries/mapped_file.cpp") +ConfigureTarget(mapped_file) + # Source files add_library(CubeObjs OBJECT "src/cubes.cpp" @@ -50,6 +53,7 @@ ConfigureTarget(CubeObjs) # Build main program add_executable(${PROJECT_NAME} "program.cpp" $) target_link_libraries(${PROJECT_NAME} pthread) +target_link_libraries(${PROJECT_NAME} mapped_file) ConfigureTarget(${PROJECT_NAME}) # Optionally build tests diff --git a/cpp/libraries/mapped_file.cpp b/cpp/libraries/mapped_file.cpp new file mode 100644 index 0000000..a3731cd --- /dev/null +++ b/cpp/libraries/mapped_file.cpp @@ -0,0 +1,317 @@ +/** + * Copyright 2023 Jarmo A Tiitto + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the “Software”), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "mapped_file.hpp" + +#include +#include +#include +#include + +// POSIX/Linux APIs +#include +#include +#include +#include + +#ifndef MAP_HUGE_2MB +#define MAP_HUGE_2MB (21 << MAP_HUGE_SHIFT) +#define MAP_HUGE_1GB (30 << MAP_HUGE_SHIFT) +#endif + +namespace mapped { + +/** + * Mapped file POSIX/Linux compatible implementation + */ +file::file() : fd(-1), fd_size(0) {} + +file::~file() { close(); } + +void file::close() { + if (fd >= 0) { + ::fsync(fd); + ::close(fd); + fd = -1; + fd_size = 0; + } +} + +int file::open(const char* fname) { + close(); + + fd = ::open64(fname, O_RDONLY); + if (fd == -1) { + std::fprintf(stderr, "Error opening file for reading\n"); + return -1; + } + + struct stat64 finfo; + if (fstat64(fd, &finfo)) { + std::fprintf(stderr, "Error opening file for reading\n"); + return -1; + } + fd_size = finfo.st_size; + fd_rw = false; + return 0; +} + +int file::openrw(const char* fname, size_t maxsize, int flags) { + // create new files with "normal" permissions: "-rw-r--r--" + const mode_t fperms = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH; + + close(); + + maxsize = roundUp(maxsize); + + if (!flags) { + fd = ::open64(fname, O_RDWR | O_CLOEXEC); + if (fd == -1) { + std::fprintf(stderr, "Error opening file:%s\n", std::strerror(errno)); + return -1; + } + + fd_rw = true; + + struct stat64 finfo; + if (fstat64(fd, &finfo)) { + std::fprintf(stderr, "Error getting file size:%s\n", std::strerror(errno)); + return -1; + } + return truncate(finfo.st_size); + + } else if ((flags & (CREATE | RESIZE)) == (CREATE | RESIZE)) { + fd = ::open64(fname, O_CREAT | O_RDWR | O_TRUNC | O_CLOEXEC, fperms); + if (fd == -1) { + std::fprintf(stderr, "Error opening file:%s\n", std::strerror(errno)); + return -1; + } + fd_rw = true; + return truncate(maxsize); + + } else if ((flags & RESIZE) != 0) { + fd = ::open64(fname, O_RDWR | O_CLOEXEC, fperms); + if (fd == -1) { + std::fprintf(stderr, "Error opening file:%s\n", std::strerror(errno)); + return -1; + } + fd_rw = true; + return truncate(maxsize); + } else { + std::fprintf(stderr, "Invalid open flags:%s\n", std::strerror(errno)); + return -1; + } +} + +bool file::is_rw() const { return fd_rw; } + +seekoff_t file::size() const { return fd_size; } + +int file::truncate(seekoff_t newsize) { + // resize the backing file + if (newsize != fd_size && ftruncate64(fd, newsize)) { + std::fprintf(stderr, "Error resizing backing file:%s\n", std::strerror(errno)); + return -1; + } + fd_size = newsize; + return 0; +} + +/** + * Mapped region POSIX/Linux compatible implementation. + */ + +region::region(std::shared_ptr src, seekoff_t fpos, len_t size) : mfile(src) { + std::lock_guard lock(mfile->mut); + remap(fpos, size); +} + +region::region(std::shared_ptr src) : mfile(src) { + std::lock_guard lock(mfile->mut); + remap(0, mfile->size()); +} + +region::~region() { + std::lock_guard lock(mfile->mut); + map_fseek = 0; + remap(0, 0); +} + +/** + * This is the core implementation of mapped_file: + * remap(0,0) releases the mapping. + * remap(0, n) mmap roundUp(n) bytes at offset 0 + * remap(0, k) mremap roundUp(n) bytes at offset 0 (grows the existing mapping) + * remap(n, j) munmap old region, mmap new at offset roundDown(n) + * + * In read-write mode the backing file is grown to fit the mapping. + */ +void region::remap(const seekoff_t fpos, const len_t size) { + if (fpos == usr_fseek && size == usr_size) return; // No-op + // check if [fpos, fpos+size] fits into the existing + // mmap() window and only adjust the user region. + if (size && map_ptr && (map_fseek <= fpos && fpos + size <= map_fseek + map_size)) { + usr_fseek = fpos; + usr_ptr = (uint8_t*)map_ptr + (fpos - map_fseek); + usr_size = size; + return; + } + + // if size == 0 or the usr_fseek != fpos, + // we have to unmap the old region first, if any. + if (!!map_ptr && (size == 0 || usr_fseek != fpos)) { + if (::munmap(map_ptr, map_size) == -1) { + std::fprintf(stderr, "Error mapping file memory\n"); + return; + } + map_ptr = nullptr; + map_size = 0; + usr_ptr = nullptr; + usr_size = 0; + if (size == 0) return; + } + // keep what user tried to ask: + usr_fseek = fpos; + usr_size = size; + + if (map_ptr && map_fseek == fpos) { + // this mapping exists already at same map_fseek + // remap it to grow the region. + auto newsize = roundUp(size); + void* newptr = mremap(map_ptr, map_size, newsize, MREMAP_MAYMOVE); + if (newptr == MAP_FAILED) { + std::fprintf(stderr, "Error resizing memory-map of file:%s\n", std::strerror(errno)); + std::abort(); + return; + } + map_ptr = newptr; + map_size = size; + return; + } + + // create new mapping + if (mfile->is_rw()) { + // RW mapping + auto newsize = roundUp(size); + if (mfile->size() < fpos + newsize && mfile->truncate(fpos + newsize)) { + // failed. Disk full? + std::abort(); + return; + } + // mmap requires fpos && size to be multiple of PAGE_SIZE + map_fseek = roundDown(fpos); + if (map_fseek < fpos) { + // adjust size to cover. + newsize += PAGE_SIZE; + } + map_size = newsize; + map_ptr = mmap(0, map_size, PROT_READ | PROT_WRITE, MAP_SHARED, mfile->fd, map_fseek); + if (map_ptr == MAP_FAILED) { + std::fprintf(stderr, "Error memory-mapping file:%s %lu %d %lu\n", std::strerror(errno), size, mfile->fd, fpos); + std::abort(); + return; + } + } else { + // RO mapping + if (mfile->size() < fpos) { + // can't: the backing file is too small. + std::fprintf(stderr, "Error seeking past end of file.\n"); + std::abort(); + return; + } + map_size = roundUp(size); + map_fseek = roundDown(fpos); + // Map the region. (use huge pages, don't reserve backing store) + map_ptr = mmap(0, map_size, PROT_READ, MAP_SHARED | MAP_NORESERVE | MAP_HUGE_2MB, mfile->fd, map_fseek); + + if (!map_ptr || map_ptr == MAP_FAILED) { + std::fprintf(stderr, "Error mapping file\n"); + std::abort(); + return; + } + } + // adjust the usr_ptr to fix + // any page misalignment. + usr_ptr = (uint8_t*)map_ptr + (fpos - map_fseek); +} + +void region::jump(seekoff_t fpos) { + std::lock_guard lock(mfile->mut); + remap(fpos, map_size); + is_dirty = false; +} + +void region::flushJump(seekoff_t fpos) { + flush(); + std::lock_guard lock(mfile->mut); + remap(fpos, map_size); +} + +void region::flush() { + // only flush if dirty and RW mapped. + std::lock_guard lock(mfile->mut); + if (is_dirty && mfile->is_rw()) { + is_dirty = false; + if (msync(map_ptr, map_size, MS_ASYNC)) { + std::fprintf(stderr, "Error flushing memory-map:%s\n", std::strerror(errno)); + } + } +} + +void region::sync() { + // only flush if dirty and RW mapped. + std::lock_guard lock(mfile->mut); + if (is_dirty && mfile->is_rw()) { + is_dirty = false; + if (msync(map_ptr, map_size, MS_SYNC)) { + std::fprintf(stderr, "Error flushing memory-map:%s\n", std::strerror(errno)); + } + } +} + +/* +TODO: +void region::resident(void * paddr, size_t lenght, bool resident) { + // Align paddr to PAGE_SIZE + void * start = reinterpret_cast(uintptr_t(paddr) & ~(PAGE_SIZE-1)); + lenght = roundToPage(lenght); + + if(madvise(start, lenght, resident ? MADV_WILLNEED : MADV_DONTNEED)) { + std::fprintf(stderr,"Error setting memory-map residency:%s\n",std::strerror(errno)); + } +} + +void region::discard(void * paddr, size_t lenght) { + // get range of pages that may be discarded. + // this is always an subset of [paddr, paddr+lenght] range. + void * start = (void*)roundUp((uintptr_t)paddr, PAGE_SIZE); + lenght = roundDown(lenght, PAGE_SIZE); + + if(start < (char*)paddr + lenght && lenght >= PAGE_SIZE) { + // note: errors are ignored here. + madvise(start, lenght, MADV_REMOVE); + } +} +*/ + +}; // namespace mapped diff --git a/cpp/libraries/mapped_file.hpp b/cpp/libraries/mapped_file.hpp new file mode 100644 index 0000000..0aaefff --- /dev/null +++ b/cpp/libraries/mapped_file.hpp @@ -0,0 +1,467 @@ +/** + * Copyright 2023 Jarmo A Tiitto + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the “Software”), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef MAPPEDFILE_HPP_INCLUDED +#define MAPPEDFILE_HPP_INCLUDED + +#include +#include +#include +#include +#include +#include +#include + +/** + * Memory mapped file I/O utilities + * - mapped::file class for opening an file + * - mapped::region class for RW/RO memory mapping part the file instance. + * - mapped::struct_region template for RW/RO accessing part the file as specified type. + * - mapped::array_region template for RW/RO accessing part of the file as array of T elements. + * + * @note + * When doing read-only mapping the region instance + * should be const qualified as this restricts + * the region class API to read-only operations and prevents + * accidental modification of the file. + * Use std::make_unique() in this case. + * + * @note + * When using the read-write features the backing file is resized + * in multiple PAGE_SIZE blocks even if the actually mapped size is + * something else. + * openrw(...,size,RESIZE) always truncates the file to roundUp(size). + * You should do file->truncate(< sizeInBytes>) to make the file + * size exactly what you want before the file is closed. + * + * Modified regions should flush() or sync() before they are destroyed + * or the modified data may not end up in the file. + * + * TODO: + * - Two region instances should not overlap, + * i.e same portion of the file should not be mapped twice. + * (Not sure if this is actually broken now, but you have been warned) + * - Multi-threading support not tested/written. + * Currently the same mapped region can be used by multiple threads, + * but cannot it be modified. + * - Better error handling. (exceptions?, error codes?) + * Currently critical errors are printed and std::abort() is called. + * How do we handle system errors that happen in constructors? + */ +namespace mapped { + +const size_t PAGE_SIZE = 4096; + +static inline size_t roundToPage(ptrdiff_t x) { return (std::max(0, x - 1) & ~(PAGE_SIZE - 1)) + PAGE_SIZE; } + +constexpr inline size_t roundUp(uintptr_t x) { return (x + (PAGE_SIZE - 1)) & ~(PAGE_SIZE - 1); } + +constexpr inline size_t roundDown(uintptr_t x) { return (x & ~(PAGE_SIZE - 1)); } + +/** + * seekoff_t: Position of the file cursor + */ +using seekoff_t = uint64_t; +/** + * len_t: length of file data + */ +using len_t = size_t; + +class file; + +/** + * Memory-mapped region + * @brief + * the region base class implementation memory maps + * an raw memory range from the file. + */ +class region { + protected: + // actually mapped region: + void* map_ptr = nullptr; + size_t map_size = 0; + seekoff_t map_fseek = 0; + // what constructor asked: + void* usr_ptr = nullptr; + size_t usr_size = 0; + seekoff_t usr_fseek = 0; + // todo: maybe use std::weak_ptr? + // that would allow file to be released and + // any any existing region(s) would still work. + // (but only if remap() is not called) + std::shared_ptr mfile; + // non-const data access sets is_dirty. + bool is_dirty = false; + + void remap(const seekoff_t fpos, const len_t size); + + public: + /** + * Open memory mapped region into a file. + * @brief + * Seeks at fpos in file and map size bytes + * starting from that position in file. + * @note + * - Seeking past the EOF in file that is read-only will fail. + * The mapped size may extend past EOF but accessing past EOF + * either returns undefined data or program is terminated by OS. + * (EOF is at file->size()) + * - Seeking past the EOF that is read-write + * grows the backing file to fit the mapping. + * The backing file is always extended in multiple of PAGE_SIZE bytes. + * @note + * If size and/or fpos are not aligned to multiple of PAGE_SIZE + * they are forcibly aligned internally. This results in + * regionSize() and regionSeek() that may differ compared to + * size() and getSeek(). + * Side-effect is that backing file may grow more than expected. + */ + region(std::shared_ptr src, seekoff_t fpos, len_t size); + + /** + * Open memory mapped region into the file + * @brief + * same as region(myfile, 0, myfile.size()) + * and memory maps the entire file. + */ + explicit region(std::shared_ptr src); + + /** + * Note: even if region was modified, + * destructor will not flush()/sync() before tearing down the mapping. + */ + virtual ~region(); + + /** + * Get data pointer. + */ + const void* data() const { return usr_ptr; } + void* data() { + is_dirty = true; + return usr_ptr; + } + + std::shared_ptr getFile() { return mfile; } + + /** + * Get the seek used to init this region. + */ + seekoff_t getSeek() const { return usr_fseek; } + /** + * Get the size used to init this region. + */ + len_t size() const { return usr_size; } + + /** + * Get page aligned seek <= getSeek() + */ + seekoff_t regionSeek() const { return map_fseek; } + /** + * Get page aligned size >= size() + */ + len_t regionSize() const { return map_size; } + + /** + * Resize the mapped region. + * @note the mapped memory address may move, + * but current contents are preserved. + * @warn all pointers or references into + * the mapping are invalidated. + */ + void resize(len_t newsize); + + // todo: window(len_t virtsize) + // since region() is already lying that it can map + // non-page-aligned offsets and sizes + // window() would grow this over-aligned window + // to arbitrary size and keep the initialized + // user size. + // This allows remap() to just adjust the usr_ptr + // if the region window fits in. + + /** + * Flush mapped memory region into the file. + * @brief this is an hint to operating system that + * memory region shall be synchronized to disk. + * It may not wait for this to have completed before returning. + * @note Use sync() instead if you must guarantee the data has + * reached persistent storage. + */ + void flush(); + + /** + * Synchronize modified memory region onto disk. + */ + void sync(); + + /** + * Set memory region to resident/or released. + * @brief setting memory range to non-resident state + * causes system to drop the data from system memory. + * Reading non-resident memory region again causes system to + * fetch data from the disk again. + * @warn if memory region is not flushed before setting + * it non-resident any writes may be discarded to backing file. + */ + // void resident(bool state); + + /** + * Discard memory region. + * @brief discarding memory range causes system + * to reclaim the memory *and* the on-disk area. + * This means the data is lost in the mapped memory region, + * and any data within will not be written onto disk by sync() + * Subsequent reads after discard() return undefined data. + */ + // void discard(); + + /** + * Seek in the file to fpos position and + * remap the memory region there. + * @warn all pointers or references into + * the mapping are invalidated. + */ + void jump(seekoff_t fpos); + + /** + * Flush the current region and + * Seek in the file to fpos position and + * remap the memory region there. + * @warn all pointers or references into + * the mapping are invalidated. + */ + void flushJump(seekoff_t fpos); +}; + +static_assert(std::is_move_constructible_v); +static_assert(std::is_move_assignable_v); +static_assert(std::is_swappable_v); + +/** + * Typed region. + * struct_region allows directly accessing an on-disk structure. + * The region size is implicit from the type. + */ +template +class struct_region : protected region { + public: + using type = typename std::decay::type; + static_assert(std::is_standard_layout_v, "T must be plain-old-data type"); + + /** + * Memory map struct_region at fpos in file. + */ + struct_region(std::shared_ptr f, seekoff_t fpos) : region(f, fpos, sizeof(type)) {} + + type* get() { return static_cast(data()); } + const type* get() const { return static_cast(data()); } + + type* operator->() { return get(); } + const type* operator->() const { return get(); } + + type& operator*() { return *get(); } + const type& operator*() const { return *get(); } + + using region::flush; + using region::getFile; + using region::getSeek; + using region::sync; + + // note: size means the sizeof(T) + using region::size; + + /** + * Get the file seek position just after *this. + */ + seekoff_t getEndSeek() const { return getSeek() + sizeof(T); } + + /** + * Seek to fpos in file and remap the region. + * @return the pointer into the new position + */ + type* jump(seekoff_t fpos) { + region::jump(fpos); + return get(); + } + + type* flushJump(seekoff_t fpos) { + region::flushJump(fpos); + return get(); + } +}; + +/** + * Typed array region. + * @brief + * array_region allows directly accessing an on-disk array of structures + * The element size is implicit from the type and length of the array + * is provided by the constructor. + * @provides resize(), operator[], begin(), end() + */ +template +class array_region : protected region { + protected: + size_t num_elements = 0; + + public: + using type = typename std::decay::type; + static_assert(std::is_standard_layout_v, "T must be plain-old-data type"); + + /** + * Memory map array_region at fpos in file and map array_size elements. + */ + array_region(std::shared_ptr f, seekoff_t fpos, size_t array_size) : region(f, fpos, sizeof(type) * array_size), num_elements(array_size) {} + + /** + * Get pointer to first mapped element. + */ + type* get() { return static_cast(data()); } + const type* get() const { return static_cast(data()); } + + using region::flush; + using region::getFile; + using region::getSeek; + using region::sync; + + /** + * Resize the mapped array region. + */ + void resize(size_t elements) { + region::resize(sizeof(T) * elements); + num_elements = elements; + } + + /** + * Get number of mapped *elements* + */ + size_t size() const { return num_elements; } + + /** + * Access the array elements + */ + T& operator[](size_t index) { + assert(index < num_elements); + return get()[index]; + } + const T& operator[](size_t index) const { + assert(index < num_elements); + return get()[index]; + } + /** + * Iterators + */ + T* begin() { return get(); } + T* end() { return get() + num_elements; } + const T* begin() const { return get(); } + const T* end() const { return get() + num_elements; } + + /** + * Get the file seek position just after *this. + */ + seekoff_t getEndSeek() const { return getSeek() + sizeof(T) * num_elements; } + + /** + * Seek to fpos in file and remap the region. + * @return the pointer into the first element in the array + */ + type* jump(seekoff_t fpos) { + region::jump(fpos); + return get(); + } + + type* flushJump(seekoff_t fpos) { + region::flushJump(fpos); + return get(); + } +}; + +/** + * Memory-mapped file I/O class. + * @note + * file should be created with std::make_shared() + * as mapped region(s) take shared ownership of the file. + */ +class file : public std::enable_shared_from_this { + private: + std::mutex mut; + int fd; + seekoff_t fd_size; + bool fd_rw; + // the file and region classes are inherently coupled, + // and we don't want to expose the internals. + friend class region; + + public: + enum : int { + CREATE = 0x1, // Create new file, if doesn't exist. + RESIZE = 0x2, // Resize file. + RO = 0x4 // + }; + + file(); + ~file(); + + /** + * Open file in read-only mode. + * @return non-zero if error occurred. + */ + int open(const char* file); + + /** + * Create/Open file in read-write mode. + * @param flags + * - CREATE|RESIZE creates or replaces existing file + * that is truncated to maxsize. + * - RESIZE opens existing file and truncates it to + * maxsize. The file must exist already. + * - flags == 0 ignores the maxsize argument and opens + * existing file. + * @warn default open mode discards any previous file contents! + * @return non-zero if error occurred. + */ + int openrw(const char* file, len_t maxsize, int flags = CREATE | RESIZE); + + /** + * Check if file open R/W or RO + */ + bool is_rw() const; + + /** + * Resize the open file to newsize bytes. + * (file must be open in R/W mode) + * @return non-zero if error occurred. + */ + int truncate(seekoff_t newsize); + + /** + * Current length of the file + * The file EOF (end-of-file) is at this position. + */ + seekoff_t size() const; + + // Close the file. + void close(); +}; + +}; // namespace mapped +#endif From f37d0f18e7beaccc9ed7afedcea2a8397e0e4e81 Mon Sep 17 00:00:00 2001 From: Jarmo Tiitto Date: Sun, 30 Jul 2023 22:04:24 +0300 Subject: [PATCH 02/23] - fixup region::remap() mremap case not saving the correct size. - silence few std::printf's since opening non-existing file is handled by returning -1 --- cpp/libraries/mapped_file.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/cpp/libraries/mapped_file.cpp b/cpp/libraries/mapped_file.cpp index a3731cd..698a673 100644 --- a/cpp/libraries/mapped_file.cpp +++ b/cpp/libraries/mapped_file.cpp @@ -62,13 +62,13 @@ int file::open(const char* fname) { fd = ::open64(fname, O_RDONLY); if (fd == -1) { - std::fprintf(stderr, "Error opening file for reading\n"); + //std::fprintf(stderr, "Error opening file for reading\n"); return -1; } struct stat64 finfo; if (fstat64(fd, &finfo)) { - std::fprintf(stderr, "Error opening file for reading\n"); + std::fprintf(stderr, "Error getting file size: %s\n", std::strerror(errno)); return -1; } fd_size = finfo.st_size; @@ -87,7 +87,7 @@ int file::openrw(const char* fname, size_t maxsize, int flags) { if (!flags) { fd = ::open64(fname, O_RDWR | O_CLOEXEC); if (fd == -1) { - std::fprintf(stderr, "Error opening file:%s\n", std::strerror(errno)); + //std::fprintf(stderr, "Error opening file:%s\n", std::strerror(errno)); return -1; } @@ -103,7 +103,7 @@ int file::openrw(const char* fname, size_t maxsize, int flags) { } else if ((flags & (CREATE | RESIZE)) == (CREATE | RESIZE)) { fd = ::open64(fname, O_CREAT | O_RDWR | O_TRUNC | O_CLOEXEC, fperms); if (fd == -1) { - std::fprintf(stderr, "Error opening file:%s\n", std::strerror(errno)); + //std::fprintf(stderr, "Error opening file:%s\n", std::strerror(errno)); return -1; } fd_rw = true; @@ -112,7 +112,7 @@ int file::openrw(const char* fname, size_t maxsize, int flags) { } else if ((flags & RESIZE) != 0) { fd = ::open64(fname, O_RDWR | O_CLOEXEC, fperms); if (fd == -1) { - std::fprintf(stderr, "Error opening file:%s\n", std::strerror(errno)); + //std::fprintf(stderr, "Error opening file:%s\n", std::strerror(errno)); return -1; } fd_rw = true; @@ -205,7 +205,7 @@ void region::remap(const seekoff_t fpos, const len_t size) { return; } map_ptr = newptr; - map_size = size; + map_size = newsize; return; } @@ -233,7 +233,7 @@ void region::remap(const seekoff_t fpos, const len_t size) { } } else { // RO mapping - if (mfile->size() < fpos) { + if (mfile->size() <= fpos) { // can't: the backing file is too small. std::fprintf(stderr, "Error seeking past end of file.\n"); std::abort(); From 48a71f93cebf883e7969784cccd1ce15a1d943b3 Mon Sep 17 00:00:00 2001 From: Jarmo Tiitto Date: Sun, 30 Jul 2023 22:24:03 +0300 Subject: [PATCH 03/23] fixup missing const in struct_region and array_region --- cpp/libraries/mapped_file.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/libraries/mapped_file.hpp b/cpp/libraries/mapped_file.hpp index 0aaefff..59f89ac 100644 --- a/cpp/libraries/mapped_file.hpp +++ b/cpp/libraries/mapped_file.hpp @@ -336,7 +336,7 @@ class array_region : protected region { * Get pointer to first mapped element. */ type* get() { return static_cast(data()); } - const type* get() const { return static_cast(data()); } + const type* get() const { return static_cast(data()); } using region::flush; using region::getFile; From 6a637a6623d9a2e5709168d37b94f45c67e204b1 Mon Sep 17 00:00:00 2001 From: Jarmo Tiitto Date: Mon, 31 Jul 2023 04:12:10 +0300 Subject: [PATCH 04/23] libmappedfile: implement oversized mapped region The memory map now supports mapping oversized "window" into the file: - flush(), sync() only flush the user area - jump(), flushJump() have fast path speed up when new user area fits into the oversized window. --- cpp/libraries/mapped_file.cpp | 33 +++++++++++++++++++++------------ cpp/libraries/mapped_file.hpp | 13 ++++++++++--- 2 files changed, 31 insertions(+), 15 deletions(-) diff --git a/cpp/libraries/mapped_file.cpp b/cpp/libraries/mapped_file.cpp index 698a673..e69ddbb 100644 --- a/cpp/libraries/mapped_file.cpp +++ b/cpp/libraries/mapped_file.cpp @@ -141,20 +141,21 @@ int file::truncate(seekoff_t newsize) { * Mapped region POSIX/Linux compatible implementation. */ -region::region(std::shared_ptr src, seekoff_t fpos, len_t size) : mfile(src) { +region::region(std::shared_ptr src, seekoff_t fpos, len_t size, len_t window) : mfile(src) { std::lock_guard lock(mfile->mut); - remap(fpos, size); + remap(fpos, size, window); } region::region(std::shared_ptr src) : mfile(src) { std::lock_guard lock(mfile->mut); - remap(0, mfile->size()); + auto sz = mfile->size(); + remap(0, sz, sz); } region::~region() { std::lock_guard lock(mfile->mut); map_fseek = 0; - remap(0, 0); + remap(0, 0, 0); } /** @@ -166,7 +167,7 @@ region::~region() { * * In read-write mode the backing file is grown to fit the mapping. */ -void region::remap(const seekoff_t fpos, const len_t size) { +void region::remap(const seekoff_t fpos, const len_t size, const len_t window) { if (fpos == usr_fseek && size == usr_size) return; // No-op // check if [fpos, fpos+size] fits into the existing // mmap() window and only adjust the user region. @@ -197,7 +198,7 @@ void region::remap(const seekoff_t fpos, const len_t size) { if (map_ptr && map_fseek == fpos) { // this mapping exists already at same map_fseek // remap it to grow the region. - auto newsize = roundUp(size); + auto newsize = roundUp(std::max(size, window)); void* newptr = mremap(map_ptr, map_size, newsize, MREMAP_MAYMOVE); if (newptr == MAP_FAILED) { std::fprintf(stderr, "Error resizing memory-map of file:%s\n", std::strerror(errno)); @@ -212,7 +213,7 @@ void region::remap(const seekoff_t fpos, const len_t size) { // create new mapping if (mfile->is_rw()) { // RW mapping - auto newsize = roundUp(size); + auto newsize = roundUp(std::max(size, window)); if (mfile->size() < fpos + newsize && mfile->truncate(fpos + newsize)) { // failed. Disk full? std::abort(); @@ -239,7 +240,7 @@ void region::remap(const seekoff_t fpos, const len_t size) { std::abort(); return; } - map_size = roundUp(size); + map_size = roundUp(std::max(size, window)); map_fseek = roundDown(fpos); // Map the region. (use huge pages, don't reserve backing store) map_ptr = mmap(0, map_size, PROT_READ, MAP_SHARED | MAP_NORESERVE | MAP_HUGE_2MB, mfile->fd, map_fseek); @@ -257,14 +258,14 @@ void region::remap(const seekoff_t fpos, const len_t size) { void region::jump(seekoff_t fpos) { std::lock_guard lock(mfile->mut); - remap(fpos, map_size); + remap(fpos, usr_size, map_size); is_dirty = false; } void region::flushJump(seekoff_t fpos) { flush(); std::lock_guard lock(mfile->mut); - remap(fpos, map_size); + remap(fpos, usr_size, map_size); } void region::flush() { @@ -272,7 +273,11 @@ void region::flush() { std::lock_guard lock(mfile->mut); if (is_dirty && mfile->is_rw()) { is_dirty = false; - if (msync(map_ptr, map_size, MS_ASYNC)) { + auto flush_begin = (void*)roundDown((uintptr_t)usr_ptr); + auto flush_len = roundUp(usr_size); + if(flush_begin < usr_ptr) + flush_len += PAGE_SIZE; + if (msync(flush_begin, flush_len, MS_ASYNC)) { std::fprintf(stderr, "Error flushing memory-map:%s\n", std::strerror(errno)); } } @@ -283,7 +288,11 @@ void region::sync() { std::lock_guard lock(mfile->mut); if (is_dirty && mfile->is_rw()) { is_dirty = false; - if (msync(map_ptr, map_size, MS_SYNC)) { + auto flush_begin = (void*)roundDown((uintptr_t)usr_ptr); + auto flush_len = roundUp(usr_size); + if(flush_begin < usr_ptr) + flush_len += PAGE_SIZE; + if (msync(flush_begin, flush_len, MS_SYNC)) { std::fprintf(stderr, "Error flushing memory-map:%s\n", std::strerror(errno)); } } diff --git a/cpp/libraries/mapped_file.hpp b/cpp/libraries/mapped_file.hpp index 59f89ac..57b1521 100644 --- a/cpp/libraries/mapped_file.hpp +++ b/cpp/libraries/mapped_file.hpp @@ -113,7 +113,7 @@ class region { // non-const data access sets is_dirty. bool is_dirty = false; - void remap(const seekoff_t fpos, const len_t size); + void remap(const seekoff_t fpos, const len_t size, const len_t window); public: /** @@ -121,6 +121,13 @@ class region { * @brief * Seeks at fpos in file and map size bytes * starting from that position in file. + * @param window + * over-extend mapping up to max(size,window) bytes. + * Setting window bigger than size allows more efficient operation: + * [fpos, fpos + window] area is memory mapped + * but region will only operate on the + * [roundDown(fpos), roundup(fpos+size)] + * sub-portion of the memory. * @note * - Seeking past the EOF in file that is read-only will fail. * The mapped size may extend past EOF but accessing past EOF @@ -136,7 +143,7 @@ class region { * size() and getSeek(). * Side-effect is that backing file may grow more than expected. */ - region(std::shared_ptr src, seekoff_t fpos, len_t size); + region(std::shared_ptr src, seekoff_t fpos, len_t size, len_t window = 0); /** * Open memory mapped region into the file @@ -271,7 +278,7 @@ class struct_region : protected region { /** * Memory map struct_region at fpos in file. */ - struct_region(std::shared_ptr f, seekoff_t fpos) : region(f, fpos, sizeof(type)) {} + struct_region(std::shared_ptr f, seekoff_t fpos, len_t window = 0) : region(f, fpos, sizeof(type), window) {} type* get() { return static_cast(data()); } const type* get() const { return static_cast(data()); } From 27d56d02721d504b8676fe83c060b9dbd041afee Mon Sep 17 00:00:00 2001 From: Jarmo Tiitto Date: Wed, 2 Aug 2023 13:14:26 +0300 Subject: [PATCH 05/23] libmappedfile: Provide writeAt() readAt() API - Provide region::writeAt() and region::readAt() that enable copying data into/from the backing file even if the target area of the backing file is not memory-mapped. - Fixup flushed length in flush() sync() - Run clang-format --- cpp/libraries/mapped_file.cpp | 85 +++++++++++++++++++++++++++++++---- cpp/libraries/mapped_file.hpp | 38 ++++++++++++++-- 2 files changed, 112 insertions(+), 11 deletions(-) diff --git a/cpp/libraries/mapped_file.cpp b/cpp/libraries/mapped_file.cpp index e69ddbb..ef888f9 100644 --- a/cpp/libraries/mapped_file.cpp +++ b/cpp/libraries/mapped_file.cpp @@ -62,7 +62,7 @@ int file::open(const char* fname) { fd = ::open64(fname, O_RDONLY); if (fd == -1) { - //std::fprintf(stderr, "Error opening file for reading\n"); + // std::fprintf(stderr, "Error opening file for reading\n"); return -1; } @@ -87,7 +87,7 @@ int file::openrw(const char* fname, size_t maxsize, int flags) { if (!flags) { fd = ::open64(fname, O_RDWR | O_CLOEXEC); if (fd == -1) { - //std::fprintf(stderr, "Error opening file:%s\n", std::strerror(errno)); + // std::fprintf(stderr, "Error opening file:%s\n", std::strerror(errno)); return -1; } @@ -103,7 +103,7 @@ int file::openrw(const char* fname, size_t maxsize, int flags) { } else if ((flags & (CREATE | RESIZE)) == (CREATE | RESIZE)) { fd = ::open64(fname, O_CREAT | O_RDWR | O_TRUNC | O_CLOEXEC, fperms); if (fd == -1) { - //std::fprintf(stderr, "Error opening file:%s\n", std::strerror(errno)); + // std::fprintf(stderr, "Error opening file:%s\n", std::strerror(errno)); return -1; } fd_rw = true; @@ -112,7 +112,7 @@ int file::openrw(const char* fname, size_t maxsize, int flags) { } else if ((flags & RESIZE) != 0) { fd = ::open64(fname, O_RDWR | O_CLOEXEC, fperms); if (fd == -1) { - //std::fprintf(stderr, "Error opening file:%s\n", std::strerror(errno)); + // std::fprintf(stderr, "Error opening file:%s\n", std::strerror(errno)); return -1; } fd_rw = true; @@ -275,8 +275,7 @@ void region::flush() { is_dirty = false; auto flush_begin = (void*)roundDown((uintptr_t)usr_ptr); auto flush_len = roundUp(usr_size); - if(flush_begin < usr_ptr) - flush_len += PAGE_SIZE; + if (flush_begin < usr_ptr) flush_len += PAGE_SIZE; if (msync(flush_begin, flush_len, MS_ASYNC)) { std::fprintf(stderr, "Error flushing memory-map:%s\n", std::strerror(errno)); } @@ -290,14 +289,84 @@ void region::sync() { is_dirty = false; auto flush_begin = (void*)roundDown((uintptr_t)usr_ptr); auto flush_len = roundUp(usr_size); - if(flush_begin < usr_ptr) - flush_len += PAGE_SIZE; + if (flush_begin < usr_ptr) flush_len += PAGE_SIZE; if (msync(flush_begin, flush_len, MS_SYNC)) { std::fprintf(stderr, "Error flushing memory-map:%s\n", std::strerror(errno)); } } } +void region::writeAt(seekoff_t fpos, len_t datasize, const void* data) { + auto srcmem = (const char*)data; + + std::lock_guard lock(mfile->mut); + if(mfile->size() < fpos+datasize && mfile->truncate(fpos+datasize)) { + return; + } + + // does write fall out the mapped area begin? + if (fpos < map_fseek) { + // max size that can be written before map_fseek + ssize_t wr = std::min(map_fseek - fpos, datasize); + if (pwrite(mfile->fd, srcmem, wr, fpos) != wr) { + std::fprintf(stderr, "Error writing file:%s\n", std::strerror(errno)); + } + srcmem += wr; + fpos += wr; + datasize -= wr; + } + + if (fpos >= map_fseek && fpos < map_fseek + map_size && datasize) { + // max size that can be copied into this mapping: + ssize_t wr = std::min(map_size - (fpos - map_fseek), datasize); + std::memcpy((char*)map_ptr + (fpos - map_fseek), srcmem, wr); + srcmem += wr; + fpos += wr; + datasize -= wr; + } + + // does write fall out the mapped area end? + if (datasize) { + // write into backing file after the mapped area: + if (pwrite(mfile->fd, srcmem, datasize, fpos) != ssize_t(datasize)) { + std::fprintf(stderr, "Error writing file:%s\n", std::strerror(errno)); + } + } +} + +void region::readAt(seekoff_t fpos, len_t datasize, void* data) { + auto dstmem = (char*)data; + + // does read fall out the mapped area begin? + if (fpos < map_fseek) { + // max size that can be written before map_fseek + ssize_t rd = std::min(map_fseek - fpos, datasize); + if (pread(mfile->fd, dstmem, rd, fpos) != rd) { + std::fprintf(stderr, "Error reading file:%s\n", std::strerror(errno)); + } + dstmem += rd; + fpos += rd; + datasize -= rd; + } + + if (fpos >= map_fseek && fpos < map_fseek + map_size && datasize) { + // max size that can be copied from this mapping: + ssize_t rd = std::min(map_size - (fpos - map_fseek), datasize); + std::memcpy(dstmem, (char*)map_ptr + (fpos - map_fseek), rd); + dstmem += rd; + fpos += rd; + datasize -= rd; + } + + // does read fall out the mapped area end? + if (datasize) { + // read from backing file after the mapped area: + if (pread(mfile->fd, dstmem, datasize, fpos) != ssize_t(datasize)) { + std::fprintf(stderr, "Error reading file:%s\n", std::strerror(errno)); + } + } +} + /* TODO: void region::resident(void * paddr, size_t lenght, bool resident) { diff --git a/cpp/libraries/mapped_file.hpp b/cpp/libraries/mapped_file.hpp index 57b1521..dd2f61f 100644 --- a/cpp/libraries/mapped_file.hpp +++ b/cpp/libraries/mapped_file.hpp @@ -200,17 +200,18 @@ class region { // todo: window(len_t virtsize) // since region() is already lying that it can map // non-page-aligned offsets and sizes - // window() would grow this over-aligned window + // window() would grow this over-extended the memory mapping // to arbitrary size and keep the initialized // user size. - // This allows remap() to just adjust the usr_ptr - // if the region window fits in. /** * Flush mapped memory region into the file. * @brief this is an hint to operating system that * memory region shall be synchronized to disk. * It may not wait for this to have completed before returning. + * @note only the page aligned region + * [roundDown(data()), roundUp(data()+size())] + * is flushed. * @note Use sync() instead if you must guarantee the data has * reached persistent storage. */ @@ -221,6 +222,33 @@ class region { */ void sync(); + /** + * Write data into the backing file. + * @brief + * writeAt() stores range of bytes into the backing file. + * @note + * The region doesn't need to have this area to be memory-mapped: + * The data that falls into the memory-mapped + * [regionSeek(), regionSeek()+regionSize()] area is simply memcpy'ed. + * Any data that falls out this window is written directly + * into the backing file. + * The backing file is grown to fit the data when needed. + */ + void writeAt(seekoff_t fpos, len_t datasize, const void* data); + + /** + * Read data from the backing file. + * @brief + * readAt() reads [fpos, fpos+datasize] range of bytes from the backing file + * @note + * The region doesn't need to have this area memory-mapped + * The read out area that falls into the memory-mapped + * [regionSeek(), regionSeek()+regionSize()] area is simply memcpy'ed. + * Any data that falls out this window is read directly + * from the backing file. + */ + void readAt(seekoff_t fpos, len_t datasize, void* data); + /** * Set memory region to resident/or released. * @brief setting memory range to non-resident state @@ -292,7 +320,9 @@ class struct_region : protected region { using region::flush; using region::getFile; using region::getSeek; + using region::readAt; using region::sync; + using region::writeAt; // note: size means the sizeof(T) using region::size; @@ -348,7 +378,9 @@ class array_region : protected region { using region::flush; using region::getFile; using region::getSeek; + using region::readAt; using region::sync; + using region::writeAt; /** * Resize the mapped array region. From d5ca48b2a83163b41f0b505eebd8db107f07ff3b Mon Sep 17 00:00:00 2001 From: Jarmo Tiitto Date: Wed, 2 Aug 2023 23:26:33 +0300 Subject: [PATCH 06/23] libmappedfile: Misc changes - Provide FSTUNE flag that attempts to speed up file access when new file created with CREATE|RESIZE. It effectievely sets chattr +X and +A flags on the file. - Make readAt() const qualified. --- cpp/libraries/mapped_file.cpp | 12 +++++++++++- cpp/libraries/mapped_file.hpp | 9 +++++---- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/cpp/libraries/mapped_file.cpp b/cpp/libraries/mapped_file.cpp index ef888f9..5318596 100644 --- a/cpp/libraries/mapped_file.cpp +++ b/cpp/libraries/mapped_file.cpp @@ -34,6 +34,9 @@ #include #include +#include +#include + #ifndef MAP_HUGE_2MB #define MAP_HUGE_2MB (21 << MAP_HUGE_SHIFT) #define MAP_HUGE_1GB (30 << MAP_HUGE_SHIFT) @@ -107,6 +110,13 @@ int file::openrw(const char* fname, size_t maxsize, int flags) { return -1; } fd_rw = true; + + if(flags & FSTUNE) { + int flags = 0; + ioctl(fd, FS_IOC_GETFLAGS, &flags); + flags |= FS_NOATIME_FL | FS_NOCOW_FL; + ioctl(fd, FS_IOC_SETFLAGS, &flags); + } return truncate(maxsize); } else if ((flags & RESIZE) != 0) { @@ -334,7 +344,7 @@ void region::writeAt(seekoff_t fpos, len_t datasize, const void* data) { } } -void region::readAt(seekoff_t fpos, len_t datasize, void* data) { +void region::readAt(seekoff_t fpos, len_t datasize, void* data) const { auto dstmem = (char*)data; // does read fall out the mapped area begin? diff --git a/cpp/libraries/mapped_file.hpp b/cpp/libraries/mapped_file.hpp index dd2f61f..7cc3b1e 100644 --- a/cpp/libraries/mapped_file.hpp +++ b/cpp/libraries/mapped_file.hpp @@ -247,7 +247,7 @@ class region { * Any data that falls out this window is read directly * from the backing file. */ - void readAt(seekoff_t fpos, len_t datasize, void* data); + void readAt(seekoff_t fpos, len_t datasize, void* data) const; /** * Set memory region to resident/or released. @@ -452,9 +452,10 @@ class file : public std::enable_shared_from_this { public: enum : int { - CREATE = 0x1, // Create new file, if doesn't exist. - RESIZE = 0x2, // Resize file. - RO = 0x4 // + CREATE = 0x1, //!< Create new file, if doesn't exist. + RESIZE = 0x2, //!< Resize file. + FSTUNE = 0x4 //!< When creating new file attempt to set + //!< file system attributes to improve performance. }; file(); From fbcd80a101af1b3b324e197769330cf78a1211de Mon Sep 17 00:00:00 2001 From: Jarmo Tiitto Date: Thu, 10 Aug 2023 00:06:19 +0300 Subject: [PATCH 07/23] libmapped_file: Make region moveable - Provide proper move aware object. region objects are now safe to use in STL containers like vector/deque. - Implement region::resident() (not tested) --- cpp/libraries/mapped_file.cpp | 47 +++++++++++++++++++++++++++-------- cpp/libraries/mapped_file.hpp | 38 ++++++++++++++++++++++++++-- 2 files changed, 72 insertions(+), 13 deletions(-) diff --git a/cpp/libraries/mapped_file.cpp b/cpp/libraries/mapped_file.cpp index 5318596..c101f3a 100644 --- a/cpp/libraries/mapped_file.cpp +++ b/cpp/libraries/mapped_file.cpp @@ -26,6 +26,7 @@ #include #include #include +#include #include // POSIX/Linux APIs @@ -163,7 +164,7 @@ region::region(std::shared_ptr src) : mfile(src) { } region::~region() { - std::lock_guard lock(mfile->mut); + // destructor is not thread-safe. map_fseek = 0; remap(0, 0, 0); } @@ -238,7 +239,30 @@ void region::remap(const seekoff_t fpos, const len_t size, const len_t window) { map_size = newsize; map_ptr = mmap(0, map_size, PROT_READ | PROT_WRITE, MAP_SHARED, mfile->fd, map_fseek); if (map_ptr == MAP_FAILED) { + // If this gets triggered we are in deep trouble std::fprintf(stderr, "Error memory-mapping file:%s %lu %d %lu\n", std::strerror(errno), size, mfile->fd, fpos); + std::fprintf(stderr, "Dumping /proc/self/maps:\n"); + // for debugging information try print /proc/self/mmaps contents + // as this explains why we hit some limit of the system. + std::ifstream fmaps("/proc/self/maps"); + std::string buf; + int count = 0; + while(std::getline(fmaps, buf)) { + std::fprintf(stderr, "%s\n", buf.c_str()); + ++count; + } + std::fprintf(stderr, "counted %d memory-maps in process.\n", count); + + + + // todo: if this really is an hard limit of the hardware + // for *number of mmap() areas* this means we forced to: + // - register all regions in ordered list by mapped seek offset in the mapped::file + // - when mmap fails we have to merge adjacent regions + // - reference count the regions + // - data() returned memory address becomes even more unstable: + // it is invalidated by adjacent construction/deconstruction of region objects + // - destruction gets complicated. std::abort(); return; } @@ -377,18 +401,19 @@ void region::readAt(seekoff_t fpos, len_t datasize, void* data) const { } } -/* -TODO: -void region::resident(void * paddr, size_t lenght, bool resident) { - // Align paddr to PAGE_SIZE - void * start = reinterpret_cast(uintptr_t(paddr) & ~(PAGE_SIZE-1)); - lenght = roundToPage(lenght); - - if(madvise(start, lenght, resident ? MADV_WILLNEED : MADV_DONTNEED)) { - std::fprintf(stderr,"Error setting memory-map residency:%s\n",std::strerror(errno)); - } + +void region::resident(bool resident) { + std::lock_guard lock(mfile->mut); + auto _begin = (void*)roundDown((uintptr_t)usr_ptr); + auto _len = roundUp(usr_size); + if (_begin < usr_ptr) _len += PAGE_SIZE; + + if(madvise(_begin, _len, resident ? MADV_WILLNEED : MADV_DONTNEED)) { + std::fprintf(stderr,"Error setting memory-map residency:%s\n",std::strerror(errno)); + } } +/* void region::discard(void * paddr, size_t lenght) { // get range of pages that may be discarded. // this is always an subset of [paddr, paddr+lenght] range. diff --git a/cpp/libraries/mapped_file.hpp b/cpp/libraries/mapped_file.hpp index 7cc3b1e..5cda7c1 100644 --- a/cpp/libraries/mapped_file.hpp +++ b/cpp/libraries/mapped_file.hpp @@ -115,6 +115,8 @@ class region { void remap(const seekoff_t fpos, const len_t size, const len_t window); + region() {} + public: /** * Open memory mapped region into a file. @@ -159,6 +161,31 @@ class region { */ virtual ~region(); + // region is not copyable + region(const region&) =delete; + region& operator=(const region&) =delete; + + // region is moveable + friend void swap(region& a, region& b) { + using std::swap; + // thread-safety? None. + swap(a.map_ptr,b.map_ptr); + swap(a.map_size,b.map_size); + swap(a.map_fseek,b.map_fseek); + swap(a.usr_ptr,b.usr_ptr); + swap(a.usr_size,b.usr_size); + swap(a.usr_fseek,b.usr_fseek); + swap(a.mfile,b.mfile); + swap(a.is_dirty,b.is_dirty); + } + region(region&& mv) : region() { + swap(*this, mv); + } + region& operator=(region&& mv) { + swap(*this, mv); + return *this; + } + /** * Get data pointer. */ @@ -256,9 +283,11 @@ class region { * Reading non-resident memory region again causes system to * fetch data from the disk again. * @warn if memory region is not flushed before setting - * it non-resident any writes may be discarded to backing file. + * resident(false) any writes may be discarded to backing file. + * @todo: more strict version? + * actually unmap the region() until data() is called. */ - // void resident(bool state); + void resident(bool state); /** * Discard memory region. @@ -323,6 +352,7 @@ class struct_region : protected region { using region::readAt; using region::sync; using region::writeAt; + using region::resident; // note: size means the sizeof(T) using region::size; @@ -347,6 +377,10 @@ class struct_region : protected region { } }; +static_assert(std::is_move_constructible_v>); +static_assert(std::is_move_assignable_v>); +static_assert(std::is_swappable_v>); + /** * Typed array region. * @brief From a8b5e1fbd9a11eacdb273d596cd09e295e3b8ff7 Mon Sep 17 00:00:00 2001 From: Jarmo Tiitto Date: Thu, 10 Aug 2023 03:55:44 +0300 Subject: [PATCH 08/23] libmappedfile: Implement region::window() - region::window() allows over-extending the memory-mapping The "user mapped" portions stays same but regionSize() is changed. --- cpp/libraries/mapped_file.cpp | 8 ++++++++ cpp/libraries/mapped_file.hpp | 16 ++++++++++------ 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/cpp/libraries/mapped_file.cpp b/cpp/libraries/mapped_file.cpp index c101f3a..f4a02e1 100644 --- a/cpp/libraries/mapped_file.cpp +++ b/cpp/libraries/mapped_file.cpp @@ -290,6 +290,14 @@ void region::remap(const seekoff_t fpos, const len_t size, const len_t window) { usr_ptr = (uint8_t*)map_ptr + (fpos - map_fseek); } +void region::window(len_t window) { + std::lock_guard lock(mfile->mut); + auto usize = usr_size; + // note: remap() does nothing if window == usr_size + remap(usr_fseek, window, window); + usr_size = usize; +} + void region::jump(seekoff_t fpos) { std::lock_guard lock(mfile->mut); remap(fpos, usr_size, map_size); diff --git a/cpp/libraries/mapped_file.hpp b/cpp/libraries/mapped_file.hpp index 5cda7c1..e9893e1 100644 --- a/cpp/libraries/mapped_file.hpp +++ b/cpp/libraries/mapped_file.hpp @@ -224,12 +224,15 @@ class region { */ void resize(len_t newsize); - // todo: window(len_t virtsize) - // since region() is already lying that it can map - // non-page-aligned offsets and sizes - // window() would grow this over-extended the memory mapping - // to arbitrary size and keep the initialized - // user size. + /** + * @brief over-extend mapping up to max(size(),window) bytes. + * Setting window bigger than size() allows more efficient operation: + * [regionSeek(), regionSeek() + window] area is memory mapped + * but region will only operate on the + * [roundDown(getSeek()), roundUp(getSeek()+size())] + * sub-portion of the memory. + */ + void window(len_t window = 0); /** * Flush mapped memory region into the file. @@ -353,6 +356,7 @@ class struct_region : protected region { using region::sync; using region::writeAt; using region::resident; + using region::window; // note: size means the sizeof(T) using region::size; From aeac7267afb7104ff133cecdc33f229a63ebed3f Mon Sep 17 00:00:00 2001 From: Jarmo Tiitto Date: Thu, 10 Aug 2023 07:11:47 +0300 Subject: [PATCH 09/23] libmapped_file: Tune the memory mapping a bit - For resident() it is better to mark the entire mapped region rather than just the user area. --- cpp/libraries/mapped_file.cpp | 23 ++++++----------------- 1 file changed, 6 insertions(+), 17 deletions(-) diff --git a/cpp/libraries/mapped_file.cpp b/cpp/libraries/mapped_file.cpp index f4a02e1..56e1d11 100644 --- a/cpp/libraries/mapped_file.cpp +++ b/cpp/libraries/mapped_file.cpp @@ -252,18 +252,6 @@ void region::remap(const seekoff_t fpos, const len_t size, const len_t window) { ++count; } std::fprintf(stderr, "counted %d memory-maps in process.\n", count); - - - - // todo: if this really is an hard limit of the hardware - // for *number of mmap() areas* this means we forced to: - // - register all regions in ordered list by mapped seek offset in the mapped::file - // - when mmap fails we have to merge adjacent regions - // - reference count the regions - // - data() returned memory address becomes even more unstable: - // it is invalidated by adjacent construction/deconstruction of region objects - // - destruction gets complicated. - std::abort(); return; } } else { @@ -285,6 +273,11 @@ void region::remap(const seekoff_t fpos, const len_t size, const len_t window) { return; } } + + // hint that this memory is accessed in random order. + if(madvise(map_ptr, map_size, MADV_RANDOM)) { + std::fprintf(stderr, "warn: madvice(MADV_RANDOM) failed: %s\n", std::strerror(errno)); + } // adjust the usr_ptr to fix // any page misalignment. usr_ptr = (uint8_t*)map_ptr + (fpos - map_fseek); @@ -412,11 +405,7 @@ void region::readAt(seekoff_t fpos, len_t datasize, void* data) const { void region::resident(bool resident) { std::lock_guard lock(mfile->mut); - auto _begin = (void*)roundDown((uintptr_t)usr_ptr); - auto _len = roundUp(usr_size); - if (_begin < usr_ptr) _len += PAGE_SIZE; - - if(madvise(_begin, _len, resident ? MADV_WILLNEED : MADV_DONTNEED)) { + if(madvise(map_ptr, map_size, resident ? MADV_WILLNEED : MADV_DONTNEED)) { std::fprintf(stderr,"Error setting memory-map residency:%s\n",std::strerror(errno)); } } From a6702de4002ed76664bd49ae6795d2ceb68584b2 Mon Sep 17 00:00:00 2001 From: Jarmo Tiitto Date: Fri, 11 Aug 2023 21:28:43 +0300 Subject: [PATCH 10/23] libmappedfile: comment fixups --- cpp/libraries/mapped_file.hpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/cpp/libraries/mapped_file.hpp b/cpp/libraries/mapped_file.hpp index e9893e1..a2fccbd 100644 --- a/cpp/libraries/mapped_file.hpp +++ b/cpp/libraries/mapped_file.hpp @@ -92,8 +92,7 @@ class file; /** * Memory-mapped region * @brief - * the region base class implementation memory maps - * an raw memory range from the file. + * the region base class memory-maps an raw memory range from the file. */ class region { protected: From ac6a20005afa4b752e41fb3b84cc5957e5320af9 Mon Sep 17 00:00:00 2001 From: Jarmo Tiitto Date: Sat, 12 Aug 2023 05:20:40 +0300 Subject: [PATCH 11/23] libmappedfile: Locking and discard work - Implement more fine-grained locking for region. - Implement region::discard() This effectively zero fills memory area within the mapping and punches hole into the backing file. --- cpp/libraries/mapped_file.cpp | 62 ++++++++++++++++++++++++----------- cpp/libraries/mapped_file.hpp | 20 +++++++---- 2 files changed, 56 insertions(+), 26 deletions(-) diff --git a/cpp/libraries/mapped_file.cpp b/cpp/libraries/mapped_file.cpp index 56e1d11..e7261dc 100644 --- a/cpp/libraries/mapped_file.cpp +++ b/cpp/libraries/mapped_file.cpp @@ -153,18 +153,19 @@ int file::truncate(seekoff_t newsize) { */ region::region(std::shared_ptr src, seekoff_t fpos, len_t size, len_t window) : mfile(src) { - std::lock_guard lock(mfile->mut); + std::lock_guard lock(mtx); remap(fpos, size, window); } region::region(std::shared_ptr src) : mfile(src) { - std::lock_guard lock(mfile->mut); + std::lock_guard lock(mtx); auto sz = mfile->size(); remap(0, sz, sz); } region::~region() { // destructor is not thread-safe. + std::lock_guard lock(mtx); map_fseek = 0; remap(0, 0, 0); } @@ -177,6 +178,8 @@ region::~region() { * remap(n, j) munmap old region, mmap new at offset roundDown(n) * * In read-write mode the backing file is grown to fit the mapping. + * + * @warn this->mtx must be held when this function is called. */ void region::remap(const seekoff_t fpos, const len_t size, const len_t window) { if (fpos == usr_fseek && size == usr_size) return; // No-op @@ -225,11 +228,16 @@ void region::remap(const seekoff_t fpos, const len_t size, const len_t window) { if (mfile->is_rw()) { // RW mapping auto newsize = roundUp(std::max(size, window)); + + // take file lock so size() check --> truncate is atomic. + std::unique_lock trunclock(mfile->mut); if (mfile->size() < fpos + newsize && mfile->truncate(fpos + newsize)) { // failed. Disk full? std::abort(); return; } + trunclock.unlock(); + // mmap requires fpos && size to be multiple of PAGE_SIZE map_fseek = roundDown(fpos); if (map_fseek < fpos) { @@ -265,7 +273,7 @@ void region::remap(const seekoff_t fpos, const len_t size, const len_t window) { map_size = roundUp(std::max(size, window)); map_fseek = roundDown(fpos); // Map the region. (use huge pages, don't reserve backing store) - map_ptr = mmap(0, map_size, PROT_READ, MAP_SHARED | MAP_NORESERVE | MAP_HUGE_2MB, mfile->fd, map_fseek); + map_ptr = mmap(0, map_size, PROT_READ, MAP_SHARED | MAP_NORESERVE, mfile->fd, map_fseek); if (!map_ptr || map_ptr == MAP_FAILED) { std::fprintf(stderr, "Error mapping file\n"); @@ -284,7 +292,7 @@ void region::remap(const seekoff_t fpos, const len_t size, const len_t window) { } void region::window(len_t window) { - std::lock_guard lock(mfile->mut); + std::lock_guard lock(mtx); auto usize = usr_size; // note: remap() does nothing if window == usr_size remap(usr_fseek, window, window); @@ -292,20 +300,20 @@ void region::window(len_t window) { } void region::jump(seekoff_t fpos) { - std::lock_guard lock(mfile->mut); + std::lock_guard lock(mtx); remap(fpos, usr_size, map_size); is_dirty = false; } void region::flushJump(seekoff_t fpos) { flush(); - std::lock_guard lock(mfile->mut); + std::lock_guard lock(mtx); remap(fpos, usr_size, map_size); } void region::flush() { // only flush if dirty and RW mapped. - std::lock_guard lock(mfile->mut); + std::lock_guard lock(mtx); if (is_dirty && mfile->is_rw()) { is_dirty = false; auto flush_begin = (void*)roundDown((uintptr_t)usr_ptr); @@ -319,7 +327,7 @@ void region::flush() { void region::sync() { // only flush if dirty and RW mapped. - std::lock_guard lock(mfile->mut); + std::lock_guard lock(mtx); if (is_dirty && mfile->is_rw()) { is_dirty = false; auto flush_begin = (void*)roundDown((uintptr_t)usr_ptr); @@ -334,10 +342,12 @@ void region::sync() { void region::writeAt(seekoff_t fpos, len_t datasize, const void* data) { auto srcmem = (const char*)data; - std::lock_guard lock(mfile->mut); + // take file lock so that file size check --> truncate is atomic. + std::unique_lock trunclock(mfile->mut); if(mfile->size() < fpos+datasize && mfile->truncate(fpos+datasize)) { return; } + trunclock.unlock(); // does write fall out the mapped area begin? if (fpos < map_fseek) { @@ -404,24 +414,36 @@ void region::readAt(seekoff_t fpos, len_t datasize, void* data) const { void region::resident(bool resident) { - std::lock_guard lock(mfile->mut); + std::lock_guard lock(mtx); if(madvise(map_ptr, map_size, resident ? MADV_WILLNEED : MADV_DONTNEED)) { std::fprintf(stderr,"Error setting memory-map residency:%s\n",std::strerror(errno)); } } -/* -void region::discard(void * paddr, size_t lenght) { - // get range of pages that may be discarded. - // this is always an subset of [paddr, paddr+lenght] range. - void * start = (void*)roundUp((uintptr_t)paddr, PAGE_SIZE); - lenght = roundDown(lenght, PAGE_SIZE); - if(start < (char*)paddr + lenght && lenght >= PAGE_SIZE) { - // note: errors are ignored here. - madvise(start, lenght, MADV_REMOVE); +void region::discard(seekoff_t fpos, len_t datasize) { + + auto cur = usr_fseek + fpos; + + if (cur < map_fseek + map_size) { + // max size that can discarded from this mapping: + ssize_t dm = std::min(map_size - (cur - map_fseek), datasize); + + // Have to be careful here: if we delete too much + // caller will not have an good time. + // align size down to page size. + dm = roundDown(dm); + // align file offset up + auto _first = roundUp(cur - map_fseek); + if(_first > cur - map_fseek) + dm -= PAGE_SIZE; + + if(dm >= (signed)PAGE_SIZE) { + if(madvise((char*)map_ptr + _first, dm, MADV_REMOVE)) { + std::fprintf(stderr,"Error discarding memory-map region:%s\n",std::strerror(errno)); + } } + } } -*/ }; // namespace mapped diff --git a/cpp/libraries/mapped_file.hpp b/cpp/libraries/mapped_file.hpp index a2fccbd..b86657a 100644 --- a/cpp/libraries/mapped_file.hpp +++ b/cpp/libraries/mapped_file.hpp @@ -92,10 +92,11 @@ class file; /** * Memory-mapped region * @brief - * the region base class memory-maps an raw memory range from the file. + * the base class memory-maps an raw range of bytes from the backing file. */ class region { protected: + std::mutex mtx; // actually mapped region: void* map_ptr = nullptr; size_t map_size = 0; @@ -167,7 +168,9 @@ class region { // region is moveable friend void swap(region& a, region& b) { using std::swap; - // thread-safety? None. + //std::lock(a.mtx,b.mtx); + //std::lock_guard l0(a.mtx, std::adopt_lock); + //std::lock_guard l1(b.mtx, std::adopt_lock); swap(a.map_ptr,b.map_ptr); swap(a.map_size,b.map_size); swap(a.map_fseek,b.map_fseek); @@ -286,8 +289,6 @@ class region { * fetch data from the disk again. * @warn if memory region is not flushed before setting * resident(false) any writes may be discarded to backing file. - * @todo: more strict version? - * actually unmap the region() until data() is called. */ void resident(bool state); @@ -297,9 +298,15 @@ class region { * to reclaim the memory *and* the on-disk area. * This means the data is lost in the mapped memory region, * and any data within will not be written onto disk by sync() - * Subsequent reads after discard() return undefined data. + * Subsequent reads after discard() return zero filled data. + * @note + * The discarded area shall be within the mapped area. + * @param fpos + * file offset from begin of this mapping. (getSeek() + fpos) + * @param datasize + * length of the data area to discard. */ - // void discard(); + void discard(seekoff_t fpos, len_t datasize); /** * Seek in the file to fpos position and @@ -356,6 +363,7 @@ class struct_region : protected region { using region::writeAt; using region::resident; using region::window; + using region::discard; // note: size means the sizeof(T) using region::size; From 0f3a340e8577070a19b07425fc8fdac44cf75a92 Mon Sep 17 00:00:00 2001 From: Jarmo Tiitto Date: Mon, 31 Jul 2023 00:06:43 +0300 Subject: [PATCH 12/23] Do const safety pass - The filePointer points into read-only memory from mmap() so apply const to few places to ensure nothing is writing into it. - getCubesByShape() may return pointers to past-end of the mmap() area if shape table entry size is zero. ShapeEntry::offset can be wrong if the size is also zero. --- cpp/include/cube.hpp | 2 +- cpp/include/newCache.hpp | 24 ++++++++++++------------ cpp/src/newCache.cpp | 9 ++++++--- 3 files changed, 19 insertions(+), 16 deletions(-) diff --git a/cpp/include/cube.hpp b/cpp/include/cube.hpp index 83feaa7..f612ef4 100644 --- a/cpp/include/cube.hpp +++ b/cpp/include/cube.hpp @@ -69,7 +69,7 @@ struct Cube { // Construct from external source. // Cube shares this the memory until modified. // Caller guarantees the memory given will live longer than *this - Cube(XYZ *start, uint8_t n) : bits{1, n}, array(start) {} + Cube(const XYZ *start, uint8_t n) : bits{1, n}, array(const_cast(start)) {} // Copy ctor. Cube(const Cube ©) : Cube(copy.size()) { std::copy(copy.begin(), copy.end(), begin()); } diff --git a/cpp/include/newCache.hpp b/cpp/include/newCache.hpp index 29e622e..04bb3fe 100644 --- a/cpp/include/newCache.hpp +++ b/cpp/include/newCache.hpp @@ -18,7 +18,7 @@ class CubeIterator { using reference = Cube&; // or also value_type& // constructor - CubeIterator(uint32_t _n, XYZ* ptr) : n(_n), m_ptr(ptr) {} + CubeIterator(uint32_t _n, const XYZ* ptr) : n(_n), m_ptr(ptr) {} // invalid iterator (can't deference) explicit CubeIterator() : n(0), m_ptr(nullptr) {} @@ -53,13 +53,13 @@ class CubeIterator { private: uint32_t n; - XYZ* m_ptr; + const XYZ* m_ptr; }; class ShapeRange { public: - ShapeRange(XYZ* start, XYZ* stop, uint64_t _cubeLen, XYZ _shape) - : b(_cubeLen, start), e(_cubeLen, stop), size_(((uint64_t)stop - (uint64_t)start) / (_cubeLen * sizeof(XYZ))), shape_(_shape) {} + ShapeRange(const XYZ* start, const XYZ* stop, uint64_t _cubeLen, XYZ _shape) + : b(_cubeLen, start), e(_cubeLen, stop), size_(std::distance(start, stop) / _cubeLen), shape_(_shape) {} CubeIterator begin() { return b; } CubeIterator end() { return e; } @@ -118,26 +118,26 @@ class CacheReader : public ICache { }; CubeIterator begin() { - uint8_t* start = filePointer + shapes[0].offset; - return CubeIterator(header->n, (XYZ*)start); + const uint8_t* start = filePointer + shapes[0].offset; + return CubeIterator(header->n, (const XYZ*)start); } CubeIterator end() { - uint8_t* stop = filePointer + shapes[0].offset + header->numPolycubes * header->n * XYZ_SIZE; - return CubeIterator(header->n, (XYZ*)stop); + const uint8_t* stop = filePointer + shapes[0].offset + header->numPolycubes * header->n * XYZ_SIZE; + return CubeIterator(header->n, (const XYZ*)stop); } ShapeRange getCubesByShape(uint32_t i) override; private: - uint8_t* filePointer; + const uint8_t* filePointer; std::string path_; int fileDescriptor_; uint64_t fileSize_; bool fileLoaded_; - Header dummyHeader; - Header* header; - ShapeEntry* shapes; + const Header dummyHeader; + const Header* header; + const ShapeEntry* shapes; }; class FlatCache : public ICache { diff --git a/cpp/src/newCache.cpp b/cpp/src/newCache.cpp index ef925dc..db57900 100644 --- a/cpp/src/newCache.cpp +++ b/cpp/src/newCache.cpp @@ -65,15 +65,18 @@ ShapeRange CacheReader::getCubesByShape(uint32_t i) { if (i >= header->numShapes) { return ShapeRange{nullptr, nullptr, 0, XYZ(0, 0, 0)}; } - XYZ* start = reinterpret_cast(filePointer + shapes[i].offset); - XYZ* end = reinterpret_cast(filePointer + shapes[i].offset + shapes[i].size); + if(shapes[i].size <= 0) { + return ShapeRange(nullptr, nullptr, header->n, XYZ(shapes[i].dim0, shapes[i].dim1, shapes[i].dim2)); + } + auto start = reinterpret_cast(filePointer + shapes[i].offset); + auto end = reinterpret_cast(filePointer + shapes[i].offset + shapes[i].size); return ShapeRange(start, end, header->n, XYZ(shapes[i].dim0, shapes[i].dim1, shapes[i].dim2)); } void CacheReader::unload() { // unmap file from memory if (fileLoaded_) { - if (munmap(filePointer, fileSize_) == -1) { + if (munmap((void*)filePointer, fileSize_) == -1) { // error handling std::printf("error unmapping file\n"); } From dc47afa2500819014598ed677dc11c27d1bf0eca Mon Sep 17 00:00:00 2001 From: Jarmo Tiitto Date: Mon, 31 Jul 2023 00:28:21 +0300 Subject: [PATCH 13/23] Close the `friend class Workset` trick. - I can actually read how the progress is calculated. --- cpp/include/newCache.hpp | 4 +++- cpp/src/cubes.cpp | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/cpp/include/newCache.hpp b/cpp/include/newCache.hpp index 04bb3fe..9189182 100644 --- a/cpp/include/newCache.hpp +++ b/cpp/include/newCache.hpp @@ -27,6 +27,8 @@ class CubeIterator { const value_type operator*() const { return Cube(m_ptr, n); } // pointer operator->() { return (pointer)m_ptr; } + const XYZ* data() const { return m_ptr; } + // Prefix increment CubeIterator& operator++() { m_ptr += n; @@ -49,7 +51,7 @@ class CubeIterator { friend bool operator<(const CubeIterator& a, const CubeIterator& b) { return a.m_ptr < b.m_ptr; }; friend bool operator>(const CubeIterator& a, const CubeIterator& b) { return a.m_ptr > b.m_ptr; }; friend bool operator!=(const CubeIterator& a, const CubeIterator& b) { return a.m_ptr != b.m_ptr; }; - friend class Workset; + //friend class Workset; private: uint32_t n; diff --git a/cpp/src/cubes.cpp b/cpp/src/cubes.cpp index cdcc5b4..2bcd3c1 100644 --- a/cpp/src/cubes.cpp +++ b/cpp/src/cubes.cpp @@ -48,7 +48,7 @@ struct Workset { auto a = _begin; _begin += 500; if (_begin > _end) _begin = _end; - return {a, _begin, a < _end, 100 * (float)((uint64_t)a.m_ptr - (uint64_t)_begin_total.m_ptr) / ((uint64_t)_end.m_ptr - (uint64_t)_begin_total.m_ptr)}; + return {a, _begin, a < _end, 100 * float(std::distance(_begin_total.data(), a.data())) / std::distance(_begin_total.data(), _end.data())}; } void expand(const Cube &c) { From 0b27a88bda8a3d7ffea6486e9d35f14a86b032f3 Mon Sep 17 00:00:00 2001 From: Jarmo Tiitto Date: Mon, 31 Jul 2023 01:00:52 +0300 Subject: [PATCH 14/23] Update newCache to use libmappedfile --- cpp/include/newCache.hpp | 17 ++++++--- cpp/src/newCache.cpp | 78 +++++++++++++++++++++++----------------- 2 files changed, 58 insertions(+), 37 deletions(-) diff --git a/cpp/include/newCache.hpp b/cpp/include/newCache.hpp index 9189182..ca3d71f 100644 --- a/cpp/include/newCache.hpp +++ b/cpp/include/newCache.hpp @@ -6,6 +6,7 @@ #include "cube.hpp" #include "hashes.hpp" +#include "mapped_file.hpp" class Workset; @@ -119,7 +120,10 @@ class CacheReader : public ICache { uint64_t size; // in bytes should be multiple of XYZ_SIZE }; - CubeIterator begin() { + // Do begin() and end() make sense for CacheReader + // If the cache file provides data for more than single shape? + // The data might not even be mapped contiguously to save memory. + /*CubeIterator begin() { const uint8_t* start = filePointer + shapes[0].offset; return CubeIterator(header->n, (const XYZ*)start); } @@ -127,15 +131,18 @@ class CacheReader : public ICache { CubeIterator end() { const uint8_t* stop = filePointer + shapes[0].offset + header->numPolycubes * header->n * XYZ_SIZE; return CubeIterator(header->n, (const XYZ*)stop); - } + }*/ + // get shapes at index [0, numShapes()[ ShapeRange getCubesByShape(uint32_t i) override; private: - const uint8_t* filePointer; + std::shared_ptr file_; + std::unique_ptr> header_; + std::unique_ptr> shapes_; + std::unique_ptr> xyz_; + std::string path_; - int fileDescriptor_; - uint64_t fileSize_; bool fileLoaded_; const Header dummyHeader; const Header* header; diff --git a/cpp/src/newCache.cpp b/cpp/src/newCache.cpp index db57900..cd9726c 100644 --- a/cpp/src/newCache.cpp +++ b/cpp/src/newCache.cpp @@ -6,8 +6,7 @@ #include -CacheReader::CacheReader() - : filePointer(nullptr), path_(""), fileDescriptor_(-1), fileSize_(0), fileLoaded_(false), dummyHeader{0, 0, 0, 0}, header(&dummyHeader), shapes(nullptr) {} +CacheReader::CacheReader() : path_(""), fileLoaded_(false), dummyHeader{0, 0, 0, 0}, header(&dummyHeader), shapes(nullptr) {} void CacheReader::printHeader() { if (fileLoaded_) { @@ -33,28 +32,37 @@ int CacheReader::printShapes(void) { int CacheReader::loadFile(const std::string path) { unload(); path_ = path; - fileDescriptor_ = open(path.c_str(), O_RDONLY); - if (fileDescriptor_ == -1) { + // open read-only backing file: + file_ = std::make_shared(); + if (file_->open(path.c_str())) { std::printf("error opening file\n"); return 1; } - // get filesize - fileSize_ = lseek(fileDescriptor_, 0, SEEK_END); - lseek(fileDescriptor_, 0, SEEK_SET); - - // memory map file - filePointer = (uint8_t*)mmap(NULL, fileSize_, PROT_READ, MAP_SHARED, fileDescriptor_, 0); - if (filePointer == MAP_FAILED) { - // error handling - std::printf("errorm mapping file memory"); - close(fileDescriptor_); - return 2; + // map the header struct + header_ = std::make_unique>(file_, 0); + header = header_->get(); + + if (header->magic != MAGIC) { + std::printf("error opening file: file not recognized\n"); + return 1; } - header = (Header*)(filePointer); - shapes = (ShapeEntry*)(filePointer + sizeof(Header)); + // map the ShapeEntry array: + shapes_ = std::make_unique>(file_, header_->getEndSeek(), (*header_)->numShapes); + shapes = shapes_->get(); + + size_t datasize = 0; + for (unsigned int i = 0; i < header->numShapes; ++i) { + datasize += shapes[i].size; + } + + // map rest of the file as XYZ data: + if (file_->size() != shapes_->getEndSeek() + datasize) { + std::printf("warn: file size does not match expected value\n"); + } + xyz_ = std::make_unique>(file_, shapes_->getEndSeek(), datasize); fileLoaded_ = true; @@ -65,28 +73,34 @@ ShapeRange CacheReader::getCubesByShape(uint32_t i) { if (i >= header->numShapes) { return ShapeRange{nullptr, nullptr, 0, XYZ(0, 0, 0)}; } - if(shapes[i].size <= 0) { - return ShapeRange(nullptr, nullptr, header->n, XYZ(shapes[i].dim0, shapes[i].dim1, shapes[i].dim2)); + if (shapes[i].size <= 0) { + return ShapeRange{nullptr, nullptr, header->n, XYZ(shapes[i].dim0, shapes[i].dim1, shapes[i].dim2)}; + } + // get section start + // note: shapes[i].offset may have bogus offset + // if any earlier shape table entry was empty before i + // so we ignore the offset here. + size_t offset = 0; + for (unsigned int k = 0; k < i; ++k) { + offset += shapes[k].size; } - auto start = reinterpret_cast(filePointer + shapes[i].offset); - auto end = reinterpret_cast(filePointer + shapes[i].offset + shapes[i].size); - return ShapeRange(start, end, header->n, XYZ(shapes[i].dim0, shapes[i].dim1, shapes[i].dim2)); + auto index = offset / XYZ_SIZE; + auto num_xyz = shapes[i].size / XYZ_SIZE; + // pointers to Cube data: + auto start = xyz_->get() + index; + auto end = xyz_->get() + index + num_xyz; + return ShapeRange{start, end, header->n, XYZ(shapes[i].dim0, shapes[i].dim1, shapes[i].dim2)}; } void CacheReader::unload() { - // unmap file from memory + // unload file from memory if (fileLoaded_) { - if (munmap((void*)filePointer, fileSize_) == -1) { - // error handling - std::printf("error unmapping file\n"); - } - - // close file descriptor - close(fileDescriptor_); + xyz_.reset(); + shapes_.reset(); + header_.reset(); + file_.reset(); fileLoaded_ = false; } - fileDescriptor_ = -1; - filePointer = nullptr; header = &dummyHeader; shapes = nullptr; } From 1e60971557007ce69f2acee61e6407e0ca6979aa Mon Sep 17 00:00:00 2001 From: Jarmo Tiitto Date: Sun, 13 Aug 2023 21:04:53 +0300 Subject: [PATCH 15/23] fixup tests not compiling. --- cpp/tests/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index b30d160..42e0014 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -19,4 +19,5 @@ add_executable(${PROJECT_NAME} $ ${TESTS}) target_link_libraries(GTest::GTest INTERFACE gtest_main) target_link_libraries(${PROJECT_NAME} pthread GTest::GTest) +target_link_libraries(${PROJECT_NAME} mapped_file) ConfigureTarget(${PROJECT_NAME}) From 8a8e8505b42f27d94d769e1d427e39cfcb675257 Mon Sep 17 00:00:00 2001 From: Jarmo Tiitto Date: Wed, 2 Aug 2023 13:32:37 +0300 Subject: [PATCH 16/23] Make DEBUG_PRINT less noisy DEBUG_LEVEL selects the level of debug prints that are compiled in. 0 => Same as not compiling with DEBUG at all. 1 => Only DEBUG_PRINT() 2 => DEBUG1_PRINT() and lower levels are enabled 3 => DEBUG2_PRINT() and lower levels are enabled Change few of the noisiest prints to be silent with DEBUG_LEVEL == 1 --- cpp/include/hashes.hpp | 2 +- cpp/include/utils.hpp | 35 +++++++++++++++++++++++++++++++---- cpp/src/cubes.cpp | 4 ++-- 3 files changed, 34 insertions(+), 7 deletions(-) diff --git a/cpp/include/hashes.hpp b/cpp/include/hashes.hpp index 7999d5c..09feeed 100644 --- a/cpp/include/hashes.hpp +++ b/cpp/include/hashes.hpp @@ -100,7 +100,7 @@ struct Hashy { DEBUG_PRINTF("%ld maps by shape\n\r", byshape.size()); for (auto &set : byshape) { auto part = set.second.size(); - DEBUG_PRINTF("bucket [%2d %2d %2d]: %ld\n", set.first.x(), set.first.y(), set.first.z(), part); + DEBUG1_PRINTF("bucket [%2d %2d %2d]: %ld\n", set.first.x(), set.first.y(), set.first.z(), part); sum += part; } return sum; diff --git a/cpp/include/utils.hpp b/cpp/include/utils.hpp index 4cd23e3..f895877 100644 --- a/cpp/include/utils.hpp +++ b/cpp/include/utils.hpp @@ -3,12 +3,39 @@ #define OPENCUBES_UTILS_HPP #include + +// Debug print level: all prints enabled +// below DEBUG_LEVEL. +// DEBUG_LEVEL -> 0 all prints disabled. +// DEBUG_LEVEL -> 1 enable DEBUG_PRINTF() statements +// DEBUG_LEVEL -> 2 enable DEBUG1_PRINTF() statements and earlier +// DEBUG_LEVEL -> 3 all prints enabled. +#define DEBUG_LEVEL 1 + #ifdef DEBUG + +#if DEBUG_LEVEL >= 1 #define DEBUG_PRINTF(...) std::printf(__VA_ARGS__) -#else -#define DEBUG_PRINTF(...) \ - do { \ - } while (0) #endif +#if DEBUG_LEVEL >= 2 +#define DEBUG1_PRINTF(...) std::printf(__VA_ARGS__) +#endif + +#if DEBUG_LEVEL >= 3 +#define DEBUG2_PRINTF(...) std::printf(__VA_ARGS__) +#endif + +#endif + +#ifndef DEBUG_PRINTF +#define DEBUG_PRINTF(...) do {} while (0) #endif +#ifndef DEBUG1_PRINTF +#define DEBUG1_PRINTF(...) do {} while (0) +#endif +#ifndef DEBUG2_PRINTF +#define DEBUG2_PRINTF(...) do {} while (0) +#endif + +#endif \ No newline at end of file diff --git a/cpp/src/cubes.cpp b/cpp/src/cubes.cpp index 2bcd3c1..bea7327 100644 --- a/cpp/src/cubes.cpp +++ b/cpp/src/cubes.cpp @@ -87,14 +87,14 @@ struct Workset { std::set_difference(candidates.begin(), end, c.begin(), c.end(), std::back_inserter(tmp)); candidates = std::move(tmp); - DEBUG_PRINTF("candidates: %lu\n\r", candidates.size()); + DEBUG1_PRINTF("candidates: %lu\n\r", candidates.size()); Cube newCube(c.size() + 1); Cube lowestHashCube(newCube.size()); Cube rotatedCube(newCube.size()); for (const auto &p : candidates) { - DEBUG_PRINTF("(%2d %2d %2d)\n\r", p.x(), p.y(), p.z()); + DEBUG2_PRINTF("(%2d %2d %2d)\n\r", p.x(), p.y(), p.z()); int ax = (p.x() < 0) ? 1 : 0; int ay = (p.y() < 0) ? 1 : 0; int az = (p.z() < 0) ? 1 : 0; From 3ea29d1866fe33c11c813a0ccc4463a7bba88d6d Mon Sep 17 00:00:00 2001 From: Jarmo Tiitto Date: Thu, 10 Aug 2023 08:13:52 +0300 Subject: [PATCH 17/23] Hack Cube struct into 8-bytes This is v3 reversion of this hack: Previously the uint8_t bit-field actually caused Cube to be 16-bytes due to padding. Bitpack/Hack the size, is_shared flag and memory address into into private struct bits_t. This halves the Cube struct size. Note: If we get any segfaults from de-referencing the pointer returned by get() helper this hack must be reverted. --- cpp/include/cube.hpp | 74 ++++++++++++++++++++++++++++++-------------- 1 file changed, 51 insertions(+), 23 deletions(-) diff --git a/cpp/include/cube.hpp b/cpp/include/cube.hpp index f612ef4..e6719a7 100644 --- a/cpp/include/cube.hpp +++ b/cpp/include/cube.hpp @@ -7,6 +7,7 @@ #include #include #include +#include #include "utils.hpp" @@ -45,20 +46,47 @@ using XYZSet = std::unordered_set>; struct Cube { private: - struct { - uint8_t is_shared : 1; - uint8_t size : 7; // MAX 127 - } bits; - XYZ *array = nullptr; - - static_assert(sizeof(bits) == sizeof(uint8_t)); + // cube memory is stored two ways: + // normal, new'd buffer + // shared, external memory. + + struct bits_t { + uint64_t is_shared : 1; + uint64_t size : 7; // MAX 127 + uint64_t addr : 56; // low 56-bits of memory address. + }; + // fields + bits_t fields; + + static_assert(sizeof(bits_t) == sizeof(void*)); + + static XYZ *get(bits_t key) { + // pointer bit-hacking: + uint64_t addr = key.addr; + return reinterpret_cast(addr); + } + static bits_t put(bool is_shared, int size, XYZ *addr) { + // mask off top byte from the memory address to fit it into bits_t::addr + // on x86-64 it is not used by the hardware (yet). + // This hack actually saves 8 bytes because previously + // the uint8_t caused padding to 16 bytes. + // @note if we get segfaults dereferencing get(fields) + // then this is the problem and this hack must be undone. + uint64_t tmp = reinterpret_cast((void *)addr); + tmp &= 0xffffffffffffff; + bits_t bits; + bits.addr = tmp; + bits.is_shared = is_shared; + bits.size = size; + return bits; + } public: // Empty cube - Cube() : bits{0, 0} {} + Cube() : fields{put(0, 0, nullptr)} {} // Cube with N capacity - explicit Cube(uint8_t N) : bits{0, N}, array(new XYZ[bits.size]) {} + explicit Cube(uint8_t N) : fields{put(0,N, new XYZ[N])} {} // Construct from pieces Cube(std::initializer_list il) : Cube(il.size()) { std::copy(il.begin(), il.end(), begin()); } @@ -69,20 +97,23 @@ struct Cube { // Construct from external source. // Cube shares this the memory until modified. // Caller guarantees the memory given will live longer than *this - Cube(const XYZ *start, uint8_t n) : bits{1, n}, array(const_cast(start)) {} + Cube(const XYZ *start, uint8_t n) : fields{put(1,n,const_cast(start))} {} // Copy ctor. Cube(const Cube ©) : Cube(copy.size()) { std::copy(copy.begin(), copy.end(), begin()); } ~Cube() { + bits_t bits = fields; if (!bits.is_shared) { - delete[] array; + delete[] get(bits); } } friend void swap(Cube &a, Cube &b) { using std::swap; - swap(a.array, b.array); - swap(a.bits, b.bits); + bits_t abits = a.fields; + bits_t bbits = b.fields; + a.fields = bbits; + b.fields = abits; } Cube(Cube &&mv) : Cube() { swap(*this, mv); } @@ -98,19 +129,15 @@ struct Cube { return *this; } - size_t size() const { return bits.size; } + size_t size() const { return fields.size; } XYZ *data() { - if (bits.is_shared) { - // lift to RAM: this should never happen really. - Cube tmp(array, bits.size); - swap(*this, tmp); - std::printf("Bad use of Cube\n"); - } - return array; - } + return get(fields); + } - const XYZ *data() const { return array; } + const XYZ *data() const { + return get(fields); + } XYZ *begin() { return data(); } @@ -140,6 +167,7 @@ struct Cube { } }; +static_assert(sizeof(Cube) == 8, "Unexpected sizeof(Cube) for Cube"); static_assert(std::is_move_assignable_v, "Cube must be moveable"); static_assert(std::is_swappable_v, "Cube must swappable"); From 0c0b1dad62b511af543e383470af6b34eb3b31f2 Mon Sep 17 00:00:00 2001 From: Jarmo Tiitto Date: Fri, 11 Aug 2023 19:54:07 +0300 Subject: [PATCH 18/23] Hashy const qualifiers. - Small changes diffed. --- cpp/include/cube.hpp | 6 +++--- cpp/include/hashes.hpp | 16 ++++++++++------ 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/cpp/include/cube.hpp b/cpp/include/cube.hpp index e6719a7..72b9d97 100644 --- a/cpp/include/cube.hpp +++ b/cpp/include/cube.hpp @@ -47,8 +47,8 @@ using XYZSet = std::unordered_set>; struct Cube { private: // cube memory is stored two ways: - // normal, new'd buffer - // shared, external memory. + // normal, new'd buffer: is_shared == false + // shared, external memory: is_shared == true struct bits_t { uint64_t is_shared : 1; @@ -59,7 +59,7 @@ struct Cube { bits_t fields; static_assert(sizeof(bits_t) == sizeof(void*)); - + // extract the pointer from bits_t static XYZ *get(bits_t key) { // pointer bit-hacking: uint64_t addr = key.addr; diff --git a/cpp/include/hashes.hpp b/cpp/include/hashes.hpp index 09feeed..b154d2a 100644 --- a/cpp/include/hashes.hpp +++ b/cpp/include/hashes.hpp @@ -28,7 +28,7 @@ using CubeSet = std::unordered_set>; struct Hashy { struct Subsubhashy { CubeSet set; - std::shared_mutex set_mutex; + mutable std::shared_mutex set_mutex; template void insert(CubeT &&c) { @@ -36,12 +36,16 @@ struct Hashy { set.emplace(std::forward(c)); } - bool contains(const Cube &c) { + bool contains(const Cube &c) const { std::shared_lock lock(set_mutex); - return set.count(c); + auto itr = set.find(c); + if(itr != set.end()) { + return true; + } + return false; } - auto size() { + auto size() const { std::shared_lock lock(set_mutex); return set.size(); } @@ -59,7 +63,7 @@ struct Hashy { // printf("new size %ld\n\r", byshape[shape].size()); } - auto size() { + auto size() const { size_t sum = 0; for (auto &set : byhash) { auto part = set.size(); @@ -95,7 +99,7 @@ struct Hashy { set.insert(std::forward(c)); } - auto size() { + auto size() const { size_t sum = 0; DEBUG_PRINTF("%ld maps by shape\n\r", byshape.size()); for (auto &set : byshape) { From 759be1a194ebde281fe8a3afcc8b3101232116ce Mon Sep 17 00:00:00 2001 From: Jarmo Tiitto Date: Sat, 12 Aug 2023 02:40:59 +0300 Subject: [PATCH 19/23] cubes: Refactor thread scheduling - Launching new threads is expensive. Refactor the cubes.cpp threading code so that The started threads are kept running until the main process is complete. - Allow main thread do a it's preparation work in parallel with the running Workset. (The next cache file can be loaded while the old one is being processed.) --- cpp/src/cubes.cpp | 138 ++++++++++++++++++++++++++++++++++------------ 1 file changed, 104 insertions(+), 34 deletions(-) diff --git a/cpp/src/cubes.cpp b/cpp/src/cubes.cpp index bea7327..ac04e12 100644 --- a/cpp/src/cubes.cpp +++ b/cpp/src/cubes.cpp @@ -7,6 +7,8 @@ #include #include #include +#include +#include #include "cache.hpp" #include "cube.hpp" @@ -19,22 +21,27 @@ const int PERF_STEP = 500; struct Workset { std::mutex mu; + + CacheReader cr; CubeIterator _begin_total; CubeIterator _begin; CubeIterator _end; Hashy &hashes; XYZ targetShape, shape, expandDim; bool notSameShape; - Workset(ShapeRange &data, Hashy &hashes, XYZ targetShape, XYZ shape, XYZ expandDim, bool notSameShape) - : _begin_total(data.begin()) - , _begin(data.begin()) - , _end(data.end()) - , hashes(hashes) + Workset(Hashy &hashes, XYZ targetShape, XYZ shape, XYZ expandDim, bool notSameShape) + : hashes(hashes) , targetShape(targetShape) , shape(shape) , expandDim(expandDim) , notSameShape(notSameShape) {} + void setRange(ShapeRange &data) { + _begin_total = data.begin(); + _begin = data.begin(); + _end = data.end(); + } + struct Subset { CubeIterator _begin, _end; bool valid; @@ -131,26 +138,69 @@ struct Workset { }; struct Worker { - Workset &ws; + std::shared_ptr ws; int id; - Worker(Workset &ws_, int id_) : ws(ws_), id(id_) {} + int state = 3; // 1 == completed/waiting for job, 2 == processing, 3 == job assigned. + std::mutex mtx; + std::condition_variable cond; + std::condition_variable cond2; + std::thread thr; + + Worker(int id_) : id(id_), thr(&Worker::run, this) {} + ~Worker() { + std::unique_lock lock(mtx); + state = 0; + cond.notify_one(); + lock.unlock(); + thr.join(); + } + + void launch(std::shared_ptr ws_) { + std::unique_lock lock(mtx); + while(state > 1) { + cond2.wait(lock); + } + ws = ws_; + state = 3; + cond.notify_one(); + } + + void sync() { + std::unique_lock lock(mtx); + while(state > 1) { + cond2.wait(lock); + } + ws.reset(); + } + void run() { - // std::printf("start %d\n", id); - auto subset = ws.getPart(); - while (subset.valid) { - if (id == 0) { - std::printf(" %5.2f%%\r", subset.percent); - std::flush(std::cout); - } - // std::cout << id << " next subset " << &*subset.begin() << " to " << &*subset.end() << "\n"; - for (auto &c : subset) { - // std::printf("%p\n", (void *)&c); - // c.print(); - ws.expand(c); + std::unique_lock lock(mtx); + std::printf("thread nro. %d started.\n", id); + while(state) { + state = 1; + cond2.notify_one(); + while(state == 1) + cond.wait(lock); + if(!state) + return; + state = 2; + // std::printf("start %d\n", id); + auto subset = ws->getPart(); + while (subset.valid) { + if (id == 0) { + std::printf(" %5.2f%%\r", subset.percent); + std::flush(std::cout); + } + // std::cout << id << " next subset " << &*subset.begin() << " to " << &*subset.end() << "\n"; + for (auto &c : subset) { + // std::printf("%p\n", (void *)&c); + // c.print(); + ws->expand(c); + } + subset = ws->getPart(); } - subset = ws.getPart(); + // std::printf("finished %d\n", id); } - // std::printf("finished %d\n", id); } }; @@ -185,10 +235,19 @@ FlatCache gen(int n, int threads, bool use_cache, bool write_cache, bool split_c } std::printf("N = %d || generating new cubes from %lu base cubes.\n\r", n, base->size()); hashes.init(n); + + // Start worker threads. + std::deque workers; + for (int i = 0; i < threads; ++i) { + workers.emplace_back(i); + } + + uint64_t totalSum = 0; auto start = std::chrono::steady_clock::now(); uint32_t totalOutputShapes = hashes.byshape.size(); uint32_t outShapeCount = 0; + auto prevShapes = Hashy::generateShapes(n - 1); for (auto &tup : hashes.byshape) { outShapeCount++; @@ -210,13 +269,14 @@ FlatCache gen(int n, int threads, bool use_cache, bool write_cache, bool split_c if (diffy == 1) if (shape.y() == shape.x()) diffx = 1; - std::printf(" shape %d %d %d\n\r", shape.x(), shape.y(), shape.z()); + auto ws = std::make_shared(hashes, targetShape, shape, XYZ(diffx, diffy, diffz), abssum); if (use_split_cache) { // load cache file only for this shape std::string cachefile = base_path + "cubes_" + std::to_string(n - 1) + "_" + std::to_string(prevShapes[sid].x()) + "-" + std::to_string(prevShapes[sid].y()) + "-" + std::to_string(prevShapes[sid].z()) + ".bin"; - cr.loadFile(cachefile); + ws->cr.loadFile(cachefile); + base = &ws->cr; // cr.printHeader(); } auto s = base->getCubesByShape(sid); @@ -224,20 +284,26 @@ FlatCache gen(int n, int threads, bool use_cache, bool write_cache, bool split_c std::printf("ERROR caches shape does not match expected shape!\n"); exit(-1); } - // std::printf("starting %d threads\n\r", threads); - std::vector ts; - Workset ws(s, hashes, targetShape, shape, XYZ(diffx, diffy, diffz), abssum); - std::vector workers; - ts.reserve(threads); - workers.reserve(threads); - for (int i = 0; i < threads; ++i) { - workers.emplace_back(ws, i); - ts.emplace_back(&Worker::run, std::ref(workers[i])); + + ws->setRange(s); + + // Wait for jobs to complete. + for (auto& thr : workers) { + thr.sync(); } - for (int i = 0; i < threads; ++i) { - ts[i].join(); + std::printf(" shape %d %d %d\n\r", shape.x(), shape.y(), shape.z()); + // launch the new jobs. + // Because the workset is held by shared_ptr + // main thread can do above preparation work in parallel + // while the jobs are running. + for (auto& thr : workers) { + thr.launch(ws); } } + // Wait for jobs to complete. + for (auto& thr : workers) { + thr.sync(); + } std::printf(" num: %lu\n\r", hashes.byshape[targetShape].size()); totalSum += hashes.byshape[targetShape].size(); if (write_cache && split_cache) { @@ -252,6 +318,10 @@ FlatCache gen(int n, int threads, bool use_cache, bool write_cache, bool split_c } } } + + // Stop the workers. + workers.clear(); + if (write_cache && !split_cache) { Cache::save(base_path + "cubes_" + std::to_string(n) + ".bin", hashes, n); } From 243fb3e0baef405e067f6cf76e99648d14cdcfcc Mon Sep 17 00:00:00 2001 From: Jarmo Tiitto Date: Sun, 13 Aug 2023 20:13:47 +0300 Subject: [PATCH 20/23] CacheWriter class Implement replacement for Cache::save() CacheWriter should produce identical files to the old code, but is slightly faster as it doesn't wait for the file finalization. The old code still exists as reference but nothing is using it except tests. - libmappedfile would allow the serialization process to be parallelized. (WIP, Not implemented yet.) - Move Header ShapeEntry into cacheformat namespace - Implement CacheWriter - Update cubes.cpp to use the new CacheWriter - Cube::copyout() helper. Idea for this helper is that if the cube representation is something else than plain XYZ array. --- cpp/include/cube.hpp | 7 +++ cpp/include/hashes.hpp | 2 +- cpp/include/newCache.hpp | 74 +++++++++++++++--------- cpp/src/cubes.cpp | 12 ++-- cpp/src/newCache.cpp | 119 +++++++++++++++++++++++++++++++++++++-- 5 files changed, 178 insertions(+), 36 deletions(-) diff --git a/cpp/include/cube.hpp b/cpp/include/cube.hpp index 72b9d97..e92e570 100644 --- a/cpp/include/cube.hpp +++ b/cpp/include/cube.hpp @@ -165,6 +165,13 @@ struct Cube { void print() const { for (auto &p : *this) std::printf(" (%2d %2d %2d)\n\r", p.x(), p.y(), p.z()); } + + /** + * Copy cube data into destination buffer. + */ + void copyout(int num, XYZ* dest) const { + std::copy_n(begin(), num, dest); + } }; static_assert(sizeof(Cube) == 8, "Unexpected sizeof(Cube) for Cube"); diff --git a/cpp/include/hashes.hpp b/cpp/include/hashes.hpp index b154d2a..49462d2 100644 --- a/cpp/include/hashes.hpp +++ b/cpp/include/hashes.hpp @@ -101,7 +101,7 @@ struct Hashy { auto size() const { size_t sum = 0; - DEBUG_PRINTF("%ld maps by shape\n\r", byshape.size()); + DEBUG1_PRINTF("%ld maps by shape\n\r", byshape.size()); for (auto &set : byshape) { auto part = set.second.size(); DEBUG1_PRINTF("bucket [%2d %2d %2d]: %ld\n", set.first.x(), set.first.y(), set.first.z(), part); diff --git a/cpp/include/newCache.hpp b/cpp/include/newCache.hpp index ca3d71f..242a273 100644 --- a/cpp/include/newCache.hpp +++ b/cpp/include/newCache.hpp @@ -4,11 +4,33 @@ #include #include +#include +#include + #include "cube.hpp" #include "hashes.hpp" #include "mapped_file.hpp" -class Workset; +namespace cacheformat { + static constexpr uint32_t MAGIC = 0x42554350; + static constexpr uint32_t XYZ_SIZE = 3; + static constexpr uint32_t ALL_SHAPES = -1; + + struct Header { + uint32_t magic = MAGIC; // shoud be "PCUB" = 0x42554350 + uint32_t n; // we will never need 32bit but it is nicely aligned + uint32_t numShapes; // defines length of the shapeTable + uint64_t numPolycubes; // total number of polycubes + }; + struct ShapeEntry { + uint8_t dim0; // offset by -1 + uint8_t dim1; // offset by -1 + uint8_t dim2; // offset by -1 + uint8_t reserved; // for alignment + uint64_t offset; // from beginning of file + uint64_t size; // in bytes should be multiple of XYZ_SIZE + }; +}; class CubeIterator { public: @@ -52,7 +74,6 @@ class CubeIterator { friend bool operator<(const CubeIterator& a, const CubeIterator& b) { return a.m_ptr < b.m_ptr; }; friend bool operator>(const CubeIterator& a, const CubeIterator& b) { return a.m_ptr > b.m_ptr; }; friend bool operator!=(const CubeIterator& a, const CubeIterator& b) { return a.m_ptr != b.m_ptr; }; - //friend class Workset; private: uint32_t n; @@ -101,25 +122,6 @@ class CacheReader : public ICache { uint32_t numShapes() override { return header->numShapes; }; operator bool() { return fileLoaded_; } - static constexpr uint32_t MAGIC = 0x42554350; - static constexpr uint32_t XYZ_SIZE = 3; - static constexpr uint32_t ALL_SHAPES = -1; - - struct Header { - uint32_t magic = MAGIC; // shoud be "PCUB" = 0x42554350 - uint32_t n; // we will never need 32bit but it is nicely aligned - uint32_t numShapes; // defines length of the shapeTable - uint64_t numPolycubes; // total number of polycubes - }; - struct ShapeEntry { - uint8_t dim0; // offset by -1 - uint8_t dim1; // offset by -1 - uint8_t dim2; // offset by -1 - uint8_t reserved; // for alignment - uint64_t offset; // from beginning of file - uint64_t size; // in bytes should be multiple of XYZ_SIZE - }; - // Do begin() and end() make sense for CacheReader // If the cache file provides data for more than single shape? // The data might not even be mapped contiguously to save memory. @@ -138,15 +140,15 @@ class CacheReader : public ICache { private: std::shared_ptr file_; - std::unique_ptr> header_; - std::unique_ptr> shapes_; + std::unique_ptr> header_; + std::unique_ptr> shapes_; std::unique_ptr> xyz_; std::string path_; bool fileLoaded_; - const Header dummyHeader; - const Header* header; - const ShapeEntry* shapes; + const cacheformat::Header dummyHeader; + const cacheformat::Header* header; + const cacheformat::ShapeEntry* shapes; }; class FlatCache : public ICache { @@ -180,4 +182,24 @@ class FlatCache : public ICache { size_t size() override { return allXYZs.size() / n / sizeof(XYZ); } }; +class CacheWriter { +protected: + // CacheWriter flushes the data in background. + std::deque> m_flushes; +public: + CacheWriter() {} + ~CacheWriter(); + + /** + * Capture snapshot of the Hashy and write cache file. + * The data may not be entirely flushed before save() returns. + */ + void save(std::string path, Hashy &hashes, uint8_t n); + + /** + * Complete all flushes immediately. + */ + void flush(); +}; + #endif diff --git a/cpp/src/cubes.cpp b/cpp/src/cubes.cpp index ac04e12..7e5cf66 100644 --- a/cpp/src/cubes.cpp +++ b/cpp/src/cubes.cpp @@ -10,7 +10,6 @@ #include #include -#include "cache.hpp" #include "cube.hpp" #include "hashes.hpp" #include "newCache.hpp" @@ -216,7 +215,8 @@ FlatCache gen(int n, int threads, bool use_cache, bool write_cache, bool split_c hashes.insert(Cube{{XYZ(0, 0, 0)}}, XYZ(0, 0, 0)); std::printf("%ld elements for %d\n\r", hashes.size(), n); if (write_cache) { - Cache::save(base_path + "cubes_" + std::to_string(n) + ".bin", hashes, n); + CacheWriter cw; + cw.save(base_path + "cubes_" + std::to_string(n) + ".bin", hashes, n); } return FlatCache(hashes, n); } @@ -242,6 +242,7 @@ FlatCache gen(int n, int threads, bool use_cache, bool write_cache, bool split_c workers.emplace_back(i); } + CacheWriter cw; uint64_t totalSum = 0; auto start = std::chrono::steady_clock::now(); @@ -307,7 +308,7 @@ FlatCache gen(int n, int threads, bool use_cache, bool write_cache, bool split_c std::printf(" num: %lu\n\r", hashes.byshape[targetShape].size()); totalSum += hashes.byshape[targetShape].size(); if (write_cache && split_cache) { - Cache::save(base_path + "cubes_" + std::to_string(n) + "_" + std::to_string(targetShape.x()) + "-" + std::to_string(targetShape.y()) + "-" + + cw.save(base_path + "cubes_" + std::to_string(n) + "_" + std::to_string(targetShape.x()) + "-" + std::to_string(targetShape.y()) + "-" + std::to_string(targetShape.z()) + ".bin", hashes, n); } @@ -323,8 +324,11 @@ FlatCache gen(int n, int threads, bool use_cache, bool write_cache, bool split_c workers.clear(); if (write_cache && !split_cache) { - Cache::save(base_path + "cubes_" + std::to_string(n) + ".bin", hashes, n); + cw.save(base_path + "cubes_" + std::to_string(n) + ".bin", hashes, n); } + + cw.flush(); + auto end = std::chrono::steady_clock::now(); auto dt_ms = std::chrono::duration_cast(end - start).count(); std::printf("took %.2f s\033[0K\n\r", dt_ms / 1000.f); diff --git a/cpp/src/newCache.cpp b/cpp/src/newCache.cpp index cd9726c..f7af6ed 100644 --- a/cpp/src/newCache.cpp +++ b/cpp/src/newCache.cpp @@ -41,16 +41,16 @@ int CacheReader::loadFile(const std::string path) { } // map the header struct - header_ = std::make_unique>(file_, 0); + header_ = std::make_unique>(file_, 0); header = header_->get(); - if (header->magic != MAGIC) { + if (header->magic != cacheformat::MAGIC) { std::printf("error opening file: file not recognized\n"); return 1; } // map the ShapeEntry array: - shapes_ = std::make_unique>(file_, header_->getEndSeek(), (*header_)->numShapes); + shapes_ = std::make_unique>(file_, header_->getEndSeek(), (*header_)->numShapes); shapes = shapes_->get(); size_t datasize = 0; @@ -84,8 +84,8 @@ ShapeRange CacheReader::getCubesByShape(uint32_t i) { for (unsigned int k = 0; k < i; ++k) { offset += shapes[k].size; } - auto index = offset / XYZ_SIZE; - auto num_xyz = shapes[i].size / XYZ_SIZE; + auto index = offset / cacheformat::XYZ_SIZE; + auto num_xyz = shapes[i].size / cacheformat::XYZ_SIZE; // pointers to Cube data: auto start = xyz_->get() + index; auto end = xyz_->get() + index + num_xyz; @@ -106,3 +106,112 @@ void CacheReader::unload() { } CacheReader::~CacheReader() { unload(); } + +CacheWriter::CacheWriter::~CacheWriter() +{ + flush(); +} + + +void CacheWriter::save(std::string path, Hashy &hashes, uint8_t n) { + if (hashes.size() == 0) return; + + using namespace mapped; + using namespace cacheformat; + + auto file_ = std::make_shared(); + if (file_->openrw(path.c_str(), 0)) { + std::printf("error opening file\n"); + return; + } + + auto header = std::make_unique>(file_, 0); + (*header)->magic = cacheformat::MAGIC; + (*header)->n = n; + (*header)->numShapes = hashes.byshape.size(); + (*header)->numPolycubes = hashes.size(); + + std::vector keys; + keys.reserve((*header)->numShapes); + for (auto &pair : hashes.byshape) keys.push_back(pair.first); + std::sort(keys.begin(), keys.end()); + + auto shapeEntry = std::make_unique>(file_, header->getEndSeek(), (*header)->numShapes); + + uint64_t offset = shapeEntry->getEndSeek(); + size_t num_cubes = 0; + int i = 0; + for (auto &key : keys) { + auto& se = (*shapeEntry)[i++]; + se.dim0 = key.x(); + se.dim1 = key.y(); + se.dim2 = key.z(); + se.reserved = 0; + se.offset = offset; + auto count = hashes.byshape[key].size() ; + num_cubes += count; + se.size = count * XYZ_SIZE * n; + offset += se.size; + } + + // put XYZs + // do this in parallel? + // it takes an long while to write out the file. + // note: we are at peak memory use in this function. + + auto xyz = std::make_unique>(file_, (*shapeEntry)[0].offset, num_cubes * n); + auto put = xyz->get(); + + for (auto &key : keys) { + for (auto &subset : hashes.byshape[key].byhash) { + auto itr = subset.set.begin(); + while(itr != subset.set.end()) { + static_assert(sizeof(XYZ) == XYZ_SIZE); + assert(itr->size() == n); + itr->copyout(n, put); + put += n; + ++itr; + } + } + } + // move the resources into lambda and async launch it. + // the file is finalized in background. + m_flushes.emplace_back(std::async(std::launch::async, [ + file = std::move(file_), + header = std::move(header), + shapeEntry = std::move(shapeEntry), + xyz = std::move(xyz)]() mutable { + // flush. + header->flush(); + shapeEntry->flush(); + xyz->flush(); + // Truncate file to proper size. + file->truncate(xyz->getEndSeek()); + file->close(); + xyz.reset(); + shapeEntry.reset(); + header.reset(); + file.reset(); + })); + + // cleanup completed flushes. (don't wait) + auto rm = std::remove_if(m_flushes.begin(), m_flushes.end(), [](auto& fut) { + if(fut.wait_for(std::chrono::seconds(0)) == std::future_status::ready) { + fut.get(); + return true; + } + return false; + }); + m_flushes.erase(rm, m_flushes.end()); + + std::printf("saved %s, %d unfinished.\n\r", path.c_str(), (int)m_flushes.size()); +} + +void CacheWriter::flush() +{ + for(auto& fut : m_flushes) { + fut.get(); + } + m_flushes.clear(); +} + From 31618c488ca09d18ccb0bc07152c5b58a3fb7fd0 Mon Sep 17 00:00:00 2001 From: Jarmo Tiitto Date: Mon, 14 Aug 2023 04:23:15 +0300 Subject: [PATCH 21/23] CacheWriter: Parallel serialization - CacheWriter now uses thread pool and copies the Hashy using worker threads. This would not be possible without libmapped_file. (N=13 completes now in less than 310 seconds, depends on disk) - Add nice progress bar --- cpp/include/newCache.hpp | 70 ++++++++++------- cpp/src/cubes.cpp | 4 +- cpp/src/newCache.cpp | 164 ++++++++++++++++++++++++++++----------- 3 files changed, 163 insertions(+), 75 deletions(-) diff --git a/cpp/include/newCache.hpp b/cpp/include/newCache.hpp index 242a273..888ff14 100644 --- a/cpp/include/newCache.hpp +++ b/cpp/include/newCache.hpp @@ -1,36 +1,38 @@ #pragma once #ifndef OPENCUBES_NEWCACHE_HPP #define OPENCUBES_NEWCACHE_HPP +#include #include -#include - #include -#include +#include +#include +#include +#include #include "cube.hpp" #include "hashes.hpp" #include "mapped_file.hpp" namespace cacheformat { - static constexpr uint32_t MAGIC = 0x42554350; - static constexpr uint32_t XYZ_SIZE = 3; - static constexpr uint32_t ALL_SHAPES = -1; - - struct Header { - uint32_t magic = MAGIC; // shoud be "PCUB" = 0x42554350 - uint32_t n; // we will never need 32bit but it is nicely aligned - uint32_t numShapes; // defines length of the shapeTable - uint64_t numPolycubes; // total number of polycubes - }; - struct ShapeEntry { - uint8_t dim0; // offset by -1 - uint8_t dim1; // offset by -1 - uint8_t dim2; // offset by -1 - uint8_t reserved; // for alignment - uint64_t offset; // from beginning of file - uint64_t size; // in bytes should be multiple of XYZ_SIZE - }; +static constexpr uint32_t MAGIC = 0x42554350; +static constexpr uint32_t XYZ_SIZE = 3; +static constexpr uint32_t ALL_SHAPES = -1; + +struct Header { + uint32_t magic = MAGIC; // shoud be "PCUB" = 0x42554350 + uint32_t n; // we will never need 32bit but it is nicely aligned + uint32_t numShapes; // defines length of the shapeTable + uint64_t numPolycubes; // total number of polycubes +}; +struct ShapeEntry { + uint8_t dim0; // offset by -1 + uint8_t dim1; // offset by -1 + uint8_t dim2; // offset by -1 + uint8_t reserved; // for alignment + uint64_t offset; // from beginning of file + uint64_t size; // in bytes should be multiple of XYZ_SIZE }; +}; // namespace cacheformat class CubeIterator { public: @@ -183,18 +185,32 @@ class FlatCache : public ICache { }; class CacheWriter { -protected: - // CacheWriter flushes the data in background. - std::deque> m_flushes; -public: - CacheWriter() {} + protected: + std::mutex m_mtx; + std::condition_variable m_run; + std::condition_variable m_wait; + bool m_active = true; + + // Jobs that flush and finalize the written file. + std::deque> m_flushes; + + // Temporary copy jobs into the memory mapped file. + std::deque> m_copy; + + // thread pool executing the jobs. + std::deque m_flushers; + + void run(); + + public: + CacheWriter(int num_threads = 8); ~CacheWriter(); /** * Capture snapshot of the Hashy and write cache file. * The data may not be entirely flushed before save() returns. */ - void save(std::string path, Hashy &hashes, uint8_t n); + void save(std::string path, Hashy& hashes, uint8_t n); /** * Complete all flushes immediately. diff --git a/cpp/src/cubes.cpp b/cpp/src/cubes.cpp index 7e5cf66..1630bb6 100644 --- a/cpp/src/cubes.cpp +++ b/cpp/src/cubes.cpp @@ -215,7 +215,7 @@ FlatCache gen(int n, int threads, bool use_cache, bool write_cache, bool split_c hashes.insert(Cube{{XYZ(0, 0, 0)}}, XYZ(0, 0, 0)); std::printf("%ld elements for %d\n\r", hashes.size(), n); if (write_cache) { - CacheWriter cw; + CacheWriter cw(1); cw.save(base_path + "cubes_" + std::to_string(n) + ".bin", hashes, n); } return FlatCache(hashes, n); @@ -242,7 +242,7 @@ FlatCache gen(int n, int threads, bool use_cache, bool write_cache, bool split_c workers.emplace_back(i); } - CacheWriter cw; + CacheWriter cw(threads); uint64_t totalSum = 0; auto start = std::chrono::steady_clock::now(); diff --git a/cpp/src/newCache.cpp b/cpp/src/newCache.cpp index f7af6ed..4d95792 100644 --- a/cpp/src/newCache.cpp +++ b/cpp/src/newCache.cpp @@ -1,8 +1,4 @@ -#include "../include/newCache.hpp" - -#include -#include -#include +#include "newCache.hpp" #include @@ -107,11 +103,54 @@ void CacheReader::unload() { CacheReader::~CacheReader() { unload(); } -CacheWriter::CacheWriter::~CacheWriter() -{ +CacheWriter::CacheWriter(int num_threads) { + for (int i = 0; i < num_threads; ++i) { + m_flushers.emplace_back(&CacheWriter::run, this); + } +} + +CacheWriter::CacheWriter::~CacheWriter() { flush(); + // stop the threads. + std::unique_lock lock(m_mtx); + m_active = false; + m_run.notify_all(); + lock.unlock(); + for (auto &thr : m_flushers) thr.join(); } +void CacheWriter::run() { + std::unique_lock lock(m_mtx); + while (m_active) { + // do copy jobs: + if (!m_copy.empty()) { + auto task = std::move(m_copy.front()); + m_copy.pop_front(); + lock.unlock(); + + task(); + + lock.lock(); + continue; + } + // file flushes: + if (!m_flushes.empty()) { + auto task = std::move(m_flushes.front()); + m_flushes.pop_front(); + lock.unlock(); + + task(); + + lock.lock(); + continue; + } + // notify that we are done here. + m_wait.notify_one(); + // wait for jobs. + m_run.wait(lock); + } + m_wait.notify_one(); +} void CacheWriter::save(std::string path, Hashy &hashes, uint8_t n) { if (hashes.size() == 0) return; @@ -125,7 +164,7 @@ void CacheWriter::save(std::string path, Hashy &hashes, uint8_t n) { return; } - auto header = std::make_unique>(file_, 0); + auto header = std::make_shared>(file_, 0); (*header)->magic = cacheformat::MAGIC; (*header)->n = n; (*header)->numShapes = hashes.byshape.size(); @@ -136,51 +175,91 @@ void CacheWriter::save(std::string path, Hashy &hashes, uint8_t n) { for (auto &pair : hashes.byshape) keys.push_back(pair.first); std::sort(keys.begin(), keys.end()); - auto shapeEntry = std::make_unique>(file_, header->getEndSeek(), (*header)->numShapes); + auto shapeEntry = std::make_shared>(file_, header->getEndSeek(), (*header)->numShapes); uint64_t offset = shapeEntry->getEndSeek(); size_t num_cubes = 0; int i = 0; for (auto &key : keys) { - auto& se = (*shapeEntry)[i++]; + auto &se = (*shapeEntry)[i++]; se.dim0 = key.x(); se.dim1 = key.y(); se.dim2 = key.z(); se.reserved = 0; se.offset = offset; - auto count = hashes.byshape[key].size() ; + auto count = hashes.byshape[key].size(); num_cubes += count; se.size = count * XYZ_SIZE * n; offset += se.size; } // put XYZs - // do this in parallel? - // it takes an long while to write out the file. - // note: we are at peak memory use in this function. + // Serialize large CubeSet(s) in parallel. - auto xyz = std::make_unique>(file_, (*shapeEntry)[0].offset, num_cubes * n); + auto xyz = std::make_shared>(file_, (*shapeEntry)[0].offset, num_cubes * n); auto put = xyz->get(); + auto copyrange = [n](CubeSet::iterator itr, CubeSet::iterator end, XYZ *dest) -> void { + while (itr != end) { + static_assert(sizeof(XYZ) == XYZ_SIZE); + assert(itr->size() == n); + itr->copyout(n, dest); + dest += n; + ++itr; + } + }; + + auto time_start = std::chrono::steady_clock::now(); for (auto &key : keys) { for (auto &subset : hashes.byshape[key].byhash) { auto itr = subset.set.begin(); - while(itr != subset.set.end()) { - static_assert(sizeof(XYZ) == XYZ_SIZE); - assert(itr->size() == n); - itr->copyout(n, put); - put += n; - ++itr; + + ptrdiff_t dist = subset.set.size(); + // distribute if range is large enough. + auto skip = std::max(4096L, std::max(1L, dist / (signed)m_flushers.size())); + while (dist > skip) { + auto start = itr; + auto dest = put; + + auto inc = std::min(dist, skip); + std::advance(itr, inc); + put += n * inc; + dist = std::distance(itr, subset.set.end()); + + auto done = 100.0f * (std::distance(xyz->get(), put) / float(num_cubes * n)); + std::printf("writing data %5.2f%% ... \r", done); + std::flush(std::cout); + + std::lock_guard lock(m_mtx); + m_copy.emplace_back(std::bind(copyrange, start, itr, dest)); + m_run.notify_all(); + } + // copy remainder, if any. + if (dist) { + std::lock_guard lock(m_mtx); + m_copy.emplace_back(std::bind(copyrange, itr, subset.set.end(), put)); + m_run.notify_all(); + put += n * dist; + + auto done = 100.0f * (std::distance(xyz->get(), put) / float(num_cubes * n)); + std::printf("writing data %5.2f%% ... \r", done); + std::flush(std::cout); } } } - // move the resources into lambda and async launch it. - // the file is finalized in background. - m_flushes.emplace_back(std::async(std::launch::async, [ - file = std::move(file_), - header = std::move(header), - shapeEntry = std::move(shapeEntry), - xyz = std::move(xyz)]() mutable { + + // sanity check: + assert(put == (*xyz).get() + num_cubes * n); + + // sync up. + std::unique_lock lock(m_mtx); + while (!m_copy.empty()) { + m_wait.wait(lock); + } + + // move the resources into flush job. + m_flushes.emplace_back(std::bind( + [](auto &&file, auto &&header, auto &&shapeEntry, auto &&xyz) -> void { // flush. header->flush(); shapeEntry->flush(); @@ -188,30 +267,23 @@ void CacheWriter::save(std::string path, Hashy &hashes, uint8_t n) { // Truncate file to proper size. file->truncate(xyz->getEndSeek()); file->close(); + file.reset(); xyz.reset(); shapeEntry.reset(); header.reset(); - file.reset(); - })); + }, + std::move(file_), std::move(header), std::move(shapeEntry), std::move(xyz))); + m_run.notify_all(); - // cleanup completed flushes. (don't wait) - auto rm = std::remove_if(m_flushes.begin(), m_flushes.end(), [](auto& fut) { - if(fut.wait_for(std::chrono::seconds(0)) == std::future_status::ready) { - fut.get(); - return true; - } - return false; - }); - m_flushes.erase(rm, m_flushes.end()); + auto time_end = std::chrono::steady_clock::now(); + auto dt_ms = std::chrono::duration_cast(time_end - time_start).count(); - std::printf("saved %s, %d unfinished.\n\r", path.c_str(), (int)m_flushes.size()); + std::printf("saved %s, took %.2f s\n\r", path.c_str(), dt_ms / 1000.f); } -void CacheWriter::flush() -{ - for(auto& fut : m_flushes) { - fut.get(); +void CacheWriter::flush() { + std::unique_lock lock(m_mtx); + while (!m_flushes.empty()) { + m_wait.wait(lock); } - m_flushes.clear(); } - From 25b969468a416eb0d01d2ffe1f38984771df7c7f Mon Sep 17 00:00:00 2001 From: Jarmo Tiitto Date: Mon, 14 Aug 2023 04:37:48 +0300 Subject: [PATCH 22/23] Remove include/cache.hpp src/cache.cpp The old cache code has been deprecated since CacheWriter arrived: Only user was in tests/src/test_cache.cpp so drop the test case because it doesn't have any impact on the main cubes anymore. - Delete include/cache.hpp src/cache.cpp source files. Hopefully they will not be missed. :-) --- cpp/CMakeLists.txt | 1 - cpp/include/cache.hpp | 35 -------- cpp/src/cache.cpp | 163 ----------------------------------- cpp/tests/src/test_cache.cpp | 10 --- 4 files changed, 209 deletions(-) delete mode 100644 cpp/include/cache.hpp delete mode 100644 cpp/src/cache.cpp delete mode 100644 cpp/tests/src/test_cache.cpp diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 05e50f0..78e91a2 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -44,7 +44,6 @@ ConfigureTarget(mapped_file) # Source files add_library(CubeObjs OBJECT "src/cubes.cpp" - "src/cache.cpp" "src/rotations.cpp" "src/newCache.cpp" ) diff --git a/cpp/include/cache.hpp b/cpp/include/cache.hpp deleted file mode 100644 index 6c3480d..0000000 --- a/cpp/include/cache.hpp +++ /dev/null @@ -1,35 +0,0 @@ -#pragma once -#ifndef OPENCUBES_CACHE_HPP -#define OPENCUBES_CACHE_HPP -#include - -#include "hashes.hpp" -#include "utils.hpp" - -struct Cache { - static constexpr uint32_t MAGIC = 0x42554350; - static constexpr uint32_t XYZ_SIZE = 3; - static constexpr uint32_t ALL_SHAPES = -1; - struct Header { - uint32_t magic = MAGIC; // shoud be "PCUB" = 0x42554350 - uint32_t n; // we will never need 32bit but it is nicely aligned - uint32_t numShapes; // defines length of the shapeTable - uint64_t numPolycubes; // total number of polycubes - }; - struct ShapeEntry { - uint8_t dim0; // offset by -1 - uint8_t dim1; // offset by -1 - uint8_t dim2; // offset by -1 - uint8_t reserved; // for alignment - uint64_t offset; // from beginning of file - uint64_t size; // in bytes should be multiple of XYZ_SIZE - }; - - static void save(std::string path, Hashy& hashes, uint8_t n); - static Hashy load(std::string path, uint32_t extractShape = ALL_SHAPES); - - int filedesc; - void* mmap_ptr; -}; - -#endif diff --git a/cpp/src/cache.cpp b/cpp/src/cache.cpp deleted file mode 100644 index 071ff0f..0000000 --- a/cpp/src/cache.cpp +++ /dev/null @@ -1,163 +0,0 @@ -#include "cache.hpp" - -#include -#include -#include -#include -#include - -#include "utils.hpp" - -/* -==================== -cache file header -==================== - -uint32_t magic = "PCUB" -uint32_t n = cache file for n cubes in a polycube -uint32_t numShapes = number of different shapes in cachefile -------- - -==================== -shapetable: -==================== -shapeEntry { - uint8_t dim0 // offset by -1 - uint8_t dim1 // offset by -1 - uint8_t dim2 // offset by -1 - uint8_t reserved - uint64_t offset in file -} -shapeEntry[numShapes] - - -==================== -XYZ data -==================== - -*/ - -void Cache::save(std::string path, Hashy &hashes, uint8_t n) { - if (hashes.size() == 0) return; - std::ofstream ofs(path, std::ios::binary); - Header header; - header.magic = MAGIC; - header.n = n; - header.numShapes = hashes.byshape.size(); - header.numPolycubes = hashes.size(); - ofs.write((const char *)&header, sizeof(header)); - - std::vector keys; - keys.reserve(header.numShapes); - for (auto &pair : hashes.byshape) keys.push_back(pair.first); - std::sort(keys.begin(), keys.end()); - uint64_t offset = sizeof(Header) + header.numShapes * sizeof(ShapeEntry); - for (auto &key : keys) { - ShapeEntry se; - se.dim0 = key.x(); - se.dim1 = key.y(); - se.dim2 = key.z(); - se.reserved = 0; - se.offset = offset; - se.size = hashes.byshape[key].size() * XYZ_SIZE * n; - offset += se.size; - ofs.write((const char *)&se, sizeof(ShapeEntry)); - } - // put XYZs - for (auto &key : keys) { - for (auto &subset : hashes.byshape[key].byhash) - for (const auto &c : subset.set) { - if constexpr (sizeof(XYZ) == XYZ_SIZE) { - ofs.write((const char *)c.data(), sizeof(XYZ) * c.size()); - } else { - for (const auto &p : c) { - ofs.write((const char *)p.data, XYZ_SIZE); - } - } - } - } - - std::printf("saved %s\n\r", path.c_str()); -} - -Hashy Cache::load(std::string path, uint32_t extractShape) { - Hashy cubes; - auto ifs = std::ifstream(path, std::ios::binary); - if (!ifs.is_open()) return cubes; - Header header; - if (!ifs.read((char *)&header, sizeof(header))) { - return cubes; - } - // check magic - if (header.magic != MAGIC) { - return cubes; - } -#ifdef CACHE_LOAD_HEADER_ONLY - std::printf("loading cache file \"%s\" for N = %u", path.c_str(), header.n); - std::printf(", %u shapes, %lu XYZs\n\r", header.numShapes, header.numPolycubes); -#endif - auto cubeSize = XYZ_SIZE * header.n; - DEBUG_PRINTF("cubeSize: %u\n\r", cubeSize); - - for (uint32_t i = 0; i < header.numShapes; ++i) { - ShapeEntry shapeEntry; - if (!ifs.read((char *)&shapeEntry, sizeof(shapeEntry))) { - std::printf("ERROR reading ShapeEntry %u\n\r", i); - exit(-1); - } - if (ALL_SHAPES != extractShape && i != extractShape) continue; -#ifdef CACHE_PRINT_SHAPEENTRIES - std::printf("ShapeEntry %3u: [%2d %2d %2d] offset: 0x%08lx size: 0x%08lx (%ld polycubes)\n\r", i, shapeEntry.dim0, shapeEntry.dim1, shapeEntry.dim2, - shapeEntry.offset, shapeEntry.size, shapeEntry.size / cubeSize); -#endif - if (shapeEntry.size % cubeSize != 0) { - std::printf("ERROR shape block is not divisible by cubeSize!\n\r"); - exit(-1); - } -#ifndef CACHE_LOAD_HEADER_ONLY - // remember pos in file - auto pos = ifs.tellg(); - - // read XYZ contents - ifs.seekg(shapeEntry.offset); - const uint32_t CHUNK_SIZE = 512 * XYZ_SIZE; - uint8_t buf[CHUNK_SIZE] = {0}; - uint64_t buf_offset = 0; - uint32_t numCubes = shapeEntry.size / cubeSize; - XYZ shape(shapeEntry.dim0, shapeEntry.dim1, shapeEntry.dim2); - uint64_t readsize = shapeEntry.size - buf_offset; - if (readsize > CHUNK_SIZE) readsize = CHUNK_SIZE; - if (!ifs.read((char *)&buf, readsize)) { - std::printf("ERROR reading XYZs for Shape %u\n\r", i); - exit(-1); - } - for (uint32_t j = 0; j < numCubes; ++j) { - Cube next(header.n); - for (uint32_t k = 0; k < header.n; ++k) { - // check if buf contains next XYZ - uint64_t curr_offset = j * cubeSize + k * XYZ_SIZE; - if (curr_offset >= buf_offset + CHUNK_SIZE) { - // std::printf("reload buffer\n\r"); - buf_offset += CHUNK_SIZE; - readsize = shapeEntry.size - buf_offset; - if (readsize > CHUNK_SIZE) readsize = CHUNK_SIZE; - if (!ifs.read((char *)&buf, readsize)) { - std::printf("ERROR reading XYZs for Shape %u\n\r", i); - exit(-1); - } - } - - next.data()[k].data[0] = buf[curr_offset - buf_offset + 0]; - next.data()[k].data[1] = buf[curr_offset - buf_offset + 1]; - next.data()[k].data[2] = buf[curr_offset - buf_offset + 2]; - } - cubes.insert(next, shape); - } - - // restore pos - ifs.seekg(pos); -#endif - } - std::printf(" loaded %lu cubes\n\r", cubes.size()); - return cubes; -} diff --git a/cpp/tests/src/test_cache.cpp b/cpp/tests/src/test_cache.cpp deleted file mode 100644 index ae10cbf..0000000 --- a/cpp/tests/src/test_cache.cpp +++ /dev/null @@ -1,10 +0,0 @@ -#include - -#include "cache.hpp" - -TEST(CacheTests, TestCacheLoadDoesNotThrow) { EXPECT_NO_THROW(Cache::load("./test_data.bin")); } - -TEST(CacheTests, TestCacheSaveDoesNotThrow) { - auto data = Cache::load("./test_data.bin"); - EXPECT_NO_THROW(Cache::save("./temp.bin", data, 255)); -} \ No newline at end of file From 7c68374e8d325a6cdabe2b94fcb0a3b4dd51c2c8 Mon Sep 17 00:00:00 2001 From: Jarmo Tiitto Date: Mon, 14 Aug 2023 09:56:50 +0300 Subject: [PATCH 23/23] CacheWriter: Fix-up synchronization CacheWriter didn't properly wait for queued job(s) to complete. Fix with counter that is incremented on queue and decremented *after* the task is run. --- cpp/include/newCache.hpp | 2 ++ cpp/src/newCache.cpp | 9 +++++++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/cpp/include/newCache.hpp b/cpp/include/newCache.hpp index 888ff14..c24a06b 100644 --- a/cpp/include/newCache.hpp +++ b/cpp/include/newCache.hpp @@ -192,9 +192,11 @@ class CacheWriter { bool m_active = true; // Jobs that flush and finalize the written file. + size_t m_num_flushes = 0; std::deque> m_flushes; // Temporary copy jobs into the memory mapped file. + size_t m_num_copys = 0; std::deque> m_copy; // thread pool executing the jobs. diff --git a/cpp/src/newCache.cpp b/cpp/src/newCache.cpp index 4d95792..93f15b3 100644 --- a/cpp/src/newCache.cpp +++ b/cpp/src/newCache.cpp @@ -131,6 +131,7 @@ void CacheWriter::run() { task(); lock.lock(); + --m_num_copys; continue; } // file flushes: @@ -142,6 +143,7 @@ void CacheWriter::run() { task(); lock.lock(); + --m_num_flushes; continue; } // notify that we are done here. @@ -232,12 +234,14 @@ void CacheWriter::save(std::string path, Hashy &hashes, uint8_t n) { std::lock_guard lock(m_mtx); m_copy.emplace_back(std::bind(copyrange, start, itr, dest)); + ++m_num_copys; m_run.notify_all(); } // copy remainder, if any. if (dist) { std::lock_guard lock(m_mtx); m_copy.emplace_back(std::bind(copyrange, itr, subset.set.end(), put)); + ++m_num_copys; m_run.notify_all(); put += n * dist; @@ -253,7 +257,7 @@ void CacheWriter::save(std::string path, Hashy &hashes, uint8_t n) { // sync up. std::unique_lock lock(m_mtx); - while (!m_copy.empty()) { + while (m_num_copys) { m_wait.wait(lock); } @@ -273,6 +277,7 @@ void CacheWriter::save(std::string path, Hashy &hashes, uint8_t n) { header.reset(); }, std::move(file_), std::move(header), std::move(shapeEntry), std::move(xyz))); + ++m_num_flushes; m_run.notify_all(); auto time_end = std::chrono::steady_clock::now(); @@ -283,7 +288,7 @@ void CacheWriter::save(std::string path, Hashy &hashes, uint8_t n) { void CacheWriter::flush() { std::unique_lock lock(m_mtx); - while (!m_flushes.empty()) { + while (m_num_flushes) { m_wait.wait(lock); } }