diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 6151054..78e91a2 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -38,10 +38,12 @@ macro(ConfigureTarget Target) ) endmacro() +add_library(mapped_file STATIC "libraries/mapped_file.cpp") +ConfigureTarget(mapped_file) + # Source files add_library(CubeObjs OBJECT "src/cubes.cpp" - "src/cache.cpp" "src/rotations.cpp" "src/newCache.cpp" ) @@ -50,6 +52,7 @@ ConfigureTarget(CubeObjs) # Build main program add_executable(${PROJECT_NAME} "program.cpp" $) target_link_libraries(${PROJECT_NAME} pthread) +target_link_libraries(${PROJECT_NAME} mapped_file) ConfigureTarget(${PROJECT_NAME}) # Optionally build tests diff --git a/cpp/include/cache.hpp b/cpp/include/cache.hpp deleted file mode 100644 index 6c3480d..0000000 --- a/cpp/include/cache.hpp +++ /dev/null @@ -1,35 +0,0 @@ -#pragma once -#ifndef OPENCUBES_CACHE_HPP -#define OPENCUBES_CACHE_HPP -#include - -#include "hashes.hpp" -#include "utils.hpp" - -struct Cache { - static constexpr uint32_t MAGIC = 0x42554350; - static constexpr uint32_t XYZ_SIZE = 3; - static constexpr uint32_t ALL_SHAPES = -1; - struct Header { - uint32_t magic = MAGIC; // shoud be "PCUB" = 0x42554350 - uint32_t n; // we will never need 32bit but it is nicely aligned - uint32_t numShapes; // defines length of the shapeTable - uint64_t numPolycubes; // total number of polycubes - }; - struct ShapeEntry { - uint8_t dim0; // offset by -1 - uint8_t dim1; // offset by -1 - uint8_t dim2; // offset by -1 - uint8_t reserved; // for alignment - uint64_t offset; // from beginning of file - uint64_t size; // in bytes should be multiple of XYZ_SIZE - }; - - static void save(std::string path, Hashy& hashes, uint8_t n); - static Hashy load(std::string path, uint32_t extractShape = ALL_SHAPES); - - int filedesc; - void* mmap_ptr; -}; - -#endif diff --git a/cpp/include/cube.hpp b/cpp/include/cube.hpp index 83feaa7..e92e570 100644 --- a/cpp/include/cube.hpp +++ b/cpp/include/cube.hpp @@ -7,6 +7,7 @@ #include #include #include +#include #include "utils.hpp" @@ -45,20 +46,47 @@ using XYZSet = std::unordered_set>; struct Cube { private: - struct { - uint8_t is_shared : 1; - uint8_t size : 7; // MAX 127 - } bits; - XYZ *array = nullptr; - - static_assert(sizeof(bits) == sizeof(uint8_t)); + // cube memory is stored two ways: + // normal, new'd buffer: is_shared == false + // shared, external memory: is_shared == true + + struct bits_t { + uint64_t is_shared : 1; + uint64_t size : 7; // MAX 127 + uint64_t addr : 56; // low 56-bits of memory address. + }; + // fields + bits_t fields; + + static_assert(sizeof(bits_t) == sizeof(void*)); + // extract the pointer from bits_t + static XYZ *get(bits_t key) { + // pointer bit-hacking: + uint64_t addr = key.addr; + return reinterpret_cast(addr); + } + static bits_t put(bool is_shared, int size, XYZ *addr) { + // mask off top byte from the memory address to fit it into bits_t::addr + // on x86-64 it is not used by the hardware (yet). + // This hack actually saves 8 bytes because previously + // the uint8_t caused padding to 16 bytes. + // @note if we get segfaults dereferencing get(fields) + // then this is the problem and this hack must be undone. + uint64_t tmp = reinterpret_cast((void *)addr); + tmp &= 0xffffffffffffff; + bits_t bits; + bits.addr = tmp; + bits.is_shared = is_shared; + bits.size = size; + return bits; + } public: // Empty cube - Cube() : bits{0, 0} {} + Cube() : fields{put(0, 0, nullptr)} {} // Cube with N capacity - explicit Cube(uint8_t N) : bits{0, N}, array(new XYZ[bits.size]) {} + explicit Cube(uint8_t N) : fields{put(0,N, new XYZ[N])} {} // Construct from pieces Cube(std::initializer_list il) : Cube(il.size()) { std::copy(il.begin(), il.end(), begin()); } @@ -69,20 +97,23 @@ struct Cube { // Construct from external source. // Cube shares this the memory until modified. // Caller guarantees the memory given will live longer than *this - Cube(XYZ *start, uint8_t n) : bits{1, n}, array(start) {} + Cube(const XYZ *start, uint8_t n) : fields{put(1,n,const_cast(start))} {} // Copy ctor. Cube(const Cube ©) : Cube(copy.size()) { std::copy(copy.begin(), copy.end(), begin()); } ~Cube() { + bits_t bits = fields; if (!bits.is_shared) { - delete[] array; + delete[] get(bits); } } friend void swap(Cube &a, Cube &b) { using std::swap; - swap(a.array, b.array); - swap(a.bits, b.bits); + bits_t abits = a.fields; + bits_t bbits = b.fields; + a.fields = bbits; + b.fields = abits; } Cube(Cube &&mv) : Cube() { swap(*this, mv); } @@ -98,19 +129,15 @@ struct Cube { return *this; } - size_t size() const { return bits.size; } + size_t size() const { return fields.size; } XYZ *data() { - if (bits.is_shared) { - // lift to RAM: this should never happen really. - Cube tmp(array, bits.size); - swap(*this, tmp); - std::printf("Bad use of Cube\n"); - } - return array; - } + return get(fields); + } - const XYZ *data() const { return array; } + const XYZ *data() const { + return get(fields); + } XYZ *begin() { return data(); } @@ -138,8 +165,16 @@ struct Cube { void print() const { for (auto &p : *this) std::printf(" (%2d %2d %2d)\n\r", p.x(), p.y(), p.z()); } + + /** + * Copy cube data into destination buffer. + */ + void copyout(int num, XYZ* dest) const { + std::copy_n(begin(), num, dest); + } }; +static_assert(sizeof(Cube) == 8, "Unexpected sizeof(Cube) for Cube"); static_assert(std::is_move_assignable_v, "Cube must be moveable"); static_assert(std::is_swappable_v, "Cube must swappable"); diff --git a/cpp/include/hashes.hpp b/cpp/include/hashes.hpp index 7999d5c..49462d2 100644 --- a/cpp/include/hashes.hpp +++ b/cpp/include/hashes.hpp @@ -28,7 +28,7 @@ using CubeSet = std::unordered_set>; struct Hashy { struct Subsubhashy { CubeSet set; - std::shared_mutex set_mutex; + mutable std::shared_mutex set_mutex; template void insert(CubeT &&c) { @@ -36,12 +36,16 @@ struct Hashy { set.emplace(std::forward(c)); } - bool contains(const Cube &c) { + bool contains(const Cube &c) const { std::shared_lock lock(set_mutex); - return set.count(c); + auto itr = set.find(c); + if(itr != set.end()) { + return true; + } + return false; } - auto size() { + auto size() const { std::shared_lock lock(set_mutex); return set.size(); } @@ -59,7 +63,7 @@ struct Hashy { // printf("new size %ld\n\r", byshape[shape].size()); } - auto size() { + auto size() const { size_t sum = 0; for (auto &set : byhash) { auto part = set.size(); @@ -95,12 +99,12 @@ struct Hashy { set.insert(std::forward(c)); } - auto size() { + auto size() const { size_t sum = 0; - DEBUG_PRINTF("%ld maps by shape\n\r", byshape.size()); + DEBUG1_PRINTF("%ld maps by shape\n\r", byshape.size()); for (auto &set : byshape) { auto part = set.second.size(); - DEBUG_PRINTF("bucket [%2d %2d %2d]: %ld\n", set.first.x(), set.first.y(), set.first.z(), part); + DEBUG1_PRINTF("bucket [%2d %2d %2d]: %ld\n", set.first.x(), set.first.y(), set.first.z(), part); sum += part; } return sum; diff --git a/cpp/include/newCache.hpp b/cpp/include/newCache.hpp index 29e622e..c24a06b 100644 --- a/cpp/include/newCache.hpp +++ b/cpp/include/newCache.hpp @@ -1,13 +1,38 @@ #pragma once #ifndef OPENCUBES_NEWCACHE_HPP #define OPENCUBES_NEWCACHE_HPP +#include #include +#include +#include +#include #include +#include #include "cube.hpp" #include "hashes.hpp" - -class Workset; +#include "mapped_file.hpp" + +namespace cacheformat { +static constexpr uint32_t MAGIC = 0x42554350; +static constexpr uint32_t XYZ_SIZE = 3; +static constexpr uint32_t ALL_SHAPES = -1; + +struct Header { + uint32_t magic = MAGIC; // shoud be "PCUB" = 0x42554350 + uint32_t n; // we will never need 32bit but it is nicely aligned + uint32_t numShapes; // defines length of the shapeTable + uint64_t numPolycubes; // total number of polycubes +}; +struct ShapeEntry { + uint8_t dim0; // offset by -1 + uint8_t dim1; // offset by -1 + uint8_t dim2; // offset by -1 + uint8_t reserved; // for alignment + uint64_t offset; // from beginning of file + uint64_t size; // in bytes should be multiple of XYZ_SIZE +}; +}; // namespace cacheformat class CubeIterator { public: @@ -18,7 +43,7 @@ class CubeIterator { using reference = Cube&; // or also value_type& // constructor - CubeIterator(uint32_t _n, XYZ* ptr) : n(_n), m_ptr(ptr) {} + CubeIterator(uint32_t _n, const XYZ* ptr) : n(_n), m_ptr(ptr) {} // invalid iterator (can't deference) explicit CubeIterator() : n(0), m_ptr(nullptr) {} @@ -27,6 +52,8 @@ class CubeIterator { const value_type operator*() const { return Cube(m_ptr, n); } // pointer operator->() { return (pointer)m_ptr; } + const XYZ* data() const { return m_ptr; } + // Prefix increment CubeIterator& operator++() { m_ptr += n; @@ -49,17 +76,16 @@ class CubeIterator { friend bool operator<(const CubeIterator& a, const CubeIterator& b) { return a.m_ptr < b.m_ptr; }; friend bool operator>(const CubeIterator& a, const CubeIterator& b) { return a.m_ptr > b.m_ptr; }; friend bool operator!=(const CubeIterator& a, const CubeIterator& b) { return a.m_ptr != b.m_ptr; }; - friend class Workset; private: uint32_t n; - XYZ* m_ptr; + const XYZ* m_ptr; }; class ShapeRange { public: - ShapeRange(XYZ* start, XYZ* stop, uint64_t _cubeLen, XYZ _shape) - : b(_cubeLen, start), e(_cubeLen, stop), size_(((uint64_t)stop - (uint64_t)start) / (_cubeLen * sizeof(XYZ))), shape_(_shape) {} + ShapeRange(const XYZ* start, const XYZ* stop, uint64_t _cubeLen, XYZ _shape) + : b(_cubeLen, start), e(_cubeLen, stop), size_(std::distance(start, stop) / _cubeLen), shape_(_shape) {} CubeIterator begin() { return b; } CubeIterator end() { return e; } @@ -98,46 +124,33 @@ class CacheReader : public ICache { uint32_t numShapes() override { return header->numShapes; }; operator bool() { return fileLoaded_; } - static constexpr uint32_t MAGIC = 0x42554350; - static constexpr uint32_t XYZ_SIZE = 3; - static constexpr uint32_t ALL_SHAPES = -1; - - struct Header { - uint32_t magic = MAGIC; // shoud be "PCUB" = 0x42554350 - uint32_t n; // we will never need 32bit but it is nicely aligned - uint32_t numShapes; // defines length of the shapeTable - uint64_t numPolycubes; // total number of polycubes - }; - struct ShapeEntry { - uint8_t dim0; // offset by -1 - uint8_t dim1; // offset by -1 - uint8_t dim2; // offset by -1 - uint8_t reserved; // for alignment - uint64_t offset; // from beginning of file - uint64_t size; // in bytes should be multiple of XYZ_SIZE - }; - - CubeIterator begin() { - uint8_t* start = filePointer + shapes[0].offset; - return CubeIterator(header->n, (XYZ*)start); + // Do begin() and end() make sense for CacheReader + // If the cache file provides data for more than single shape? + // The data might not even be mapped contiguously to save memory. + /*CubeIterator begin() { + const uint8_t* start = filePointer + shapes[0].offset; + return CubeIterator(header->n, (const XYZ*)start); } CubeIterator end() { - uint8_t* stop = filePointer + shapes[0].offset + header->numPolycubes * header->n * XYZ_SIZE; - return CubeIterator(header->n, (XYZ*)stop); - } + const uint8_t* stop = filePointer + shapes[0].offset + header->numPolycubes * header->n * XYZ_SIZE; + return CubeIterator(header->n, (const XYZ*)stop); + }*/ + // get shapes at index [0, numShapes()[ ShapeRange getCubesByShape(uint32_t i) override; private: - uint8_t* filePointer; + std::shared_ptr file_; + std::unique_ptr> header_; + std::unique_ptr> shapes_; + std::unique_ptr> xyz_; + std::string path_; - int fileDescriptor_; - uint64_t fileSize_; bool fileLoaded_; - Header dummyHeader; - Header* header; - ShapeEntry* shapes; + const cacheformat::Header dummyHeader; + const cacheformat::Header* header; + const cacheformat::ShapeEntry* shapes; }; class FlatCache : public ICache { @@ -171,4 +184,40 @@ class FlatCache : public ICache { size_t size() override { return allXYZs.size() / n / sizeof(XYZ); } }; +class CacheWriter { + protected: + std::mutex m_mtx; + std::condition_variable m_run; + std::condition_variable m_wait; + bool m_active = true; + + // Jobs that flush and finalize the written file. + size_t m_num_flushes = 0; + std::deque> m_flushes; + + // Temporary copy jobs into the memory mapped file. + size_t m_num_copys = 0; + std::deque> m_copy; + + // thread pool executing the jobs. + std::deque m_flushers; + + void run(); + + public: + CacheWriter(int num_threads = 8); + ~CacheWriter(); + + /** + * Capture snapshot of the Hashy and write cache file. + * The data may not be entirely flushed before save() returns. + */ + void save(std::string path, Hashy& hashes, uint8_t n); + + /** + * Complete all flushes immediately. + */ + void flush(); +}; + #endif diff --git a/cpp/include/utils.hpp b/cpp/include/utils.hpp index 4cd23e3..f895877 100644 --- a/cpp/include/utils.hpp +++ b/cpp/include/utils.hpp @@ -3,12 +3,39 @@ #define OPENCUBES_UTILS_HPP #include + +// Debug print level: all prints enabled +// below DEBUG_LEVEL. +// DEBUG_LEVEL -> 0 all prints disabled. +// DEBUG_LEVEL -> 1 enable DEBUG_PRINTF() statements +// DEBUG_LEVEL -> 2 enable DEBUG1_PRINTF() statements and earlier +// DEBUG_LEVEL -> 3 all prints enabled. +#define DEBUG_LEVEL 1 + #ifdef DEBUG + +#if DEBUG_LEVEL >= 1 #define DEBUG_PRINTF(...) std::printf(__VA_ARGS__) -#else -#define DEBUG_PRINTF(...) \ - do { \ - } while (0) #endif +#if DEBUG_LEVEL >= 2 +#define DEBUG1_PRINTF(...) std::printf(__VA_ARGS__) +#endif + +#if DEBUG_LEVEL >= 3 +#define DEBUG2_PRINTF(...) std::printf(__VA_ARGS__) +#endif + +#endif + +#ifndef DEBUG_PRINTF +#define DEBUG_PRINTF(...) do {} while (0) #endif +#ifndef DEBUG1_PRINTF +#define DEBUG1_PRINTF(...) do {} while (0) +#endif +#ifndef DEBUG2_PRINTF +#define DEBUG2_PRINTF(...) do {} while (0) +#endif + +#endif \ No newline at end of file diff --git a/cpp/libraries/mapped_file.cpp b/cpp/libraries/mapped_file.cpp new file mode 100644 index 0000000..e7261dc --- /dev/null +++ b/cpp/libraries/mapped_file.cpp @@ -0,0 +1,449 @@ +/** + * Copyright 2023 Jarmo A Tiitto + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the “Software”), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "mapped_file.hpp" + +#include +#include +#include +#include +#include + +// POSIX/Linux APIs +#include +#include +#include +#include + +#include +#include + +#ifndef MAP_HUGE_2MB +#define MAP_HUGE_2MB (21 << MAP_HUGE_SHIFT) +#define MAP_HUGE_1GB (30 << MAP_HUGE_SHIFT) +#endif + +namespace mapped { + +/** + * Mapped file POSIX/Linux compatible implementation + */ +file::file() : fd(-1), fd_size(0) {} + +file::~file() { close(); } + +void file::close() { + if (fd >= 0) { + ::fsync(fd); + ::close(fd); + fd = -1; + fd_size = 0; + } +} + +int file::open(const char* fname) { + close(); + + fd = ::open64(fname, O_RDONLY); + if (fd == -1) { + // std::fprintf(stderr, "Error opening file for reading\n"); + return -1; + } + + struct stat64 finfo; + if (fstat64(fd, &finfo)) { + std::fprintf(stderr, "Error getting file size: %s\n", std::strerror(errno)); + return -1; + } + fd_size = finfo.st_size; + fd_rw = false; + return 0; +} + +int file::openrw(const char* fname, size_t maxsize, int flags) { + // create new files with "normal" permissions: "-rw-r--r--" + const mode_t fperms = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH; + + close(); + + maxsize = roundUp(maxsize); + + if (!flags) { + fd = ::open64(fname, O_RDWR | O_CLOEXEC); + if (fd == -1) { + // std::fprintf(stderr, "Error opening file:%s\n", std::strerror(errno)); + return -1; + } + + fd_rw = true; + + struct stat64 finfo; + if (fstat64(fd, &finfo)) { + std::fprintf(stderr, "Error getting file size:%s\n", std::strerror(errno)); + return -1; + } + return truncate(finfo.st_size); + + } else if ((flags & (CREATE | RESIZE)) == (CREATE | RESIZE)) { + fd = ::open64(fname, O_CREAT | O_RDWR | O_TRUNC | O_CLOEXEC, fperms); + if (fd == -1) { + // std::fprintf(stderr, "Error opening file:%s\n", std::strerror(errno)); + return -1; + } + fd_rw = true; + + if(flags & FSTUNE) { + int flags = 0; + ioctl(fd, FS_IOC_GETFLAGS, &flags); + flags |= FS_NOATIME_FL | FS_NOCOW_FL; + ioctl(fd, FS_IOC_SETFLAGS, &flags); + } + return truncate(maxsize); + + } else if ((flags & RESIZE) != 0) { + fd = ::open64(fname, O_RDWR | O_CLOEXEC, fperms); + if (fd == -1) { + // std::fprintf(stderr, "Error opening file:%s\n", std::strerror(errno)); + return -1; + } + fd_rw = true; + return truncate(maxsize); + } else { + std::fprintf(stderr, "Invalid open flags:%s\n", std::strerror(errno)); + return -1; + } +} + +bool file::is_rw() const { return fd_rw; } + +seekoff_t file::size() const { return fd_size; } + +int file::truncate(seekoff_t newsize) { + // resize the backing file + if (newsize != fd_size && ftruncate64(fd, newsize)) { + std::fprintf(stderr, "Error resizing backing file:%s\n", std::strerror(errno)); + return -1; + } + fd_size = newsize; + return 0; +} + +/** + * Mapped region POSIX/Linux compatible implementation. + */ + +region::region(std::shared_ptr src, seekoff_t fpos, len_t size, len_t window) : mfile(src) { + std::lock_guard lock(mtx); + remap(fpos, size, window); +} + +region::region(std::shared_ptr src) : mfile(src) { + std::lock_guard lock(mtx); + auto sz = mfile->size(); + remap(0, sz, sz); +} + +region::~region() { + // destructor is not thread-safe. + std::lock_guard lock(mtx); + map_fseek = 0; + remap(0, 0, 0); +} + +/** + * This is the core implementation of mapped_file: + * remap(0,0) releases the mapping. + * remap(0, n) mmap roundUp(n) bytes at offset 0 + * remap(0, k) mremap roundUp(n) bytes at offset 0 (grows the existing mapping) + * remap(n, j) munmap old region, mmap new at offset roundDown(n) + * + * In read-write mode the backing file is grown to fit the mapping. + * + * @warn this->mtx must be held when this function is called. + */ +void region::remap(const seekoff_t fpos, const len_t size, const len_t window) { + if (fpos == usr_fseek && size == usr_size) return; // No-op + // check if [fpos, fpos+size] fits into the existing + // mmap() window and only adjust the user region. + if (size && map_ptr && (map_fseek <= fpos && fpos + size <= map_fseek + map_size)) { + usr_fseek = fpos; + usr_ptr = (uint8_t*)map_ptr + (fpos - map_fseek); + usr_size = size; + return; + } + + // if size == 0 or the usr_fseek != fpos, + // we have to unmap the old region first, if any. + if (!!map_ptr && (size == 0 || usr_fseek != fpos)) { + if (::munmap(map_ptr, map_size) == -1) { + std::fprintf(stderr, "Error mapping file memory\n"); + return; + } + map_ptr = nullptr; + map_size = 0; + usr_ptr = nullptr; + usr_size = 0; + if (size == 0) return; + } + // keep what user tried to ask: + usr_fseek = fpos; + usr_size = size; + + if (map_ptr && map_fseek == fpos) { + // this mapping exists already at same map_fseek + // remap it to grow the region. + auto newsize = roundUp(std::max(size, window)); + void* newptr = mremap(map_ptr, map_size, newsize, MREMAP_MAYMOVE); + if (newptr == MAP_FAILED) { + std::fprintf(stderr, "Error resizing memory-map of file:%s\n", std::strerror(errno)); + std::abort(); + return; + } + map_ptr = newptr; + map_size = newsize; + return; + } + + // create new mapping + if (mfile->is_rw()) { + // RW mapping + auto newsize = roundUp(std::max(size, window)); + + // take file lock so size() check --> truncate is atomic. + std::unique_lock trunclock(mfile->mut); + if (mfile->size() < fpos + newsize && mfile->truncate(fpos + newsize)) { + // failed. Disk full? + std::abort(); + return; + } + trunclock.unlock(); + + // mmap requires fpos && size to be multiple of PAGE_SIZE + map_fseek = roundDown(fpos); + if (map_fseek < fpos) { + // adjust size to cover. + newsize += PAGE_SIZE; + } + map_size = newsize; + map_ptr = mmap(0, map_size, PROT_READ | PROT_WRITE, MAP_SHARED, mfile->fd, map_fseek); + if (map_ptr == MAP_FAILED) { + // If this gets triggered we are in deep trouble + std::fprintf(stderr, "Error memory-mapping file:%s %lu %d %lu\n", std::strerror(errno), size, mfile->fd, fpos); + std::fprintf(stderr, "Dumping /proc/self/maps:\n"); + // for debugging information try print /proc/self/mmaps contents + // as this explains why we hit some limit of the system. + std::ifstream fmaps("/proc/self/maps"); + std::string buf; + int count = 0; + while(std::getline(fmaps, buf)) { + std::fprintf(stderr, "%s\n", buf.c_str()); + ++count; + } + std::fprintf(stderr, "counted %d memory-maps in process.\n", count); + return; + } + } else { + // RO mapping + if (mfile->size() <= fpos) { + // can't: the backing file is too small. + std::fprintf(stderr, "Error seeking past end of file.\n"); + std::abort(); + return; + } + map_size = roundUp(std::max(size, window)); + map_fseek = roundDown(fpos); + // Map the region. (use huge pages, don't reserve backing store) + map_ptr = mmap(0, map_size, PROT_READ, MAP_SHARED | MAP_NORESERVE, mfile->fd, map_fseek); + + if (!map_ptr || map_ptr == MAP_FAILED) { + std::fprintf(stderr, "Error mapping file\n"); + std::abort(); + return; + } + } + + // hint that this memory is accessed in random order. + if(madvise(map_ptr, map_size, MADV_RANDOM)) { + std::fprintf(stderr, "warn: madvice(MADV_RANDOM) failed: %s\n", std::strerror(errno)); + } + // adjust the usr_ptr to fix + // any page misalignment. + usr_ptr = (uint8_t*)map_ptr + (fpos - map_fseek); +} + +void region::window(len_t window) { + std::lock_guard lock(mtx); + auto usize = usr_size; + // note: remap() does nothing if window == usr_size + remap(usr_fseek, window, window); + usr_size = usize; +} + +void region::jump(seekoff_t fpos) { + std::lock_guard lock(mtx); + remap(fpos, usr_size, map_size); + is_dirty = false; +} + +void region::flushJump(seekoff_t fpos) { + flush(); + std::lock_guard lock(mtx); + remap(fpos, usr_size, map_size); +} + +void region::flush() { + // only flush if dirty and RW mapped. + std::lock_guard lock(mtx); + if (is_dirty && mfile->is_rw()) { + is_dirty = false; + auto flush_begin = (void*)roundDown((uintptr_t)usr_ptr); + auto flush_len = roundUp(usr_size); + if (flush_begin < usr_ptr) flush_len += PAGE_SIZE; + if (msync(flush_begin, flush_len, MS_ASYNC)) { + std::fprintf(stderr, "Error flushing memory-map:%s\n", std::strerror(errno)); + } + } +} + +void region::sync() { + // only flush if dirty and RW mapped. + std::lock_guard lock(mtx); + if (is_dirty && mfile->is_rw()) { + is_dirty = false; + auto flush_begin = (void*)roundDown((uintptr_t)usr_ptr); + auto flush_len = roundUp(usr_size); + if (flush_begin < usr_ptr) flush_len += PAGE_SIZE; + if (msync(flush_begin, flush_len, MS_SYNC)) { + std::fprintf(stderr, "Error flushing memory-map:%s\n", std::strerror(errno)); + } + } +} + +void region::writeAt(seekoff_t fpos, len_t datasize, const void* data) { + auto srcmem = (const char*)data; + + // take file lock so that file size check --> truncate is atomic. + std::unique_lock trunclock(mfile->mut); + if(mfile->size() < fpos+datasize && mfile->truncate(fpos+datasize)) { + return; + } + trunclock.unlock(); + + // does write fall out the mapped area begin? + if (fpos < map_fseek) { + // max size that can be written before map_fseek + ssize_t wr = std::min(map_fseek - fpos, datasize); + if (pwrite(mfile->fd, srcmem, wr, fpos) != wr) { + std::fprintf(stderr, "Error writing file:%s\n", std::strerror(errno)); + } + srcmem += wr; + fpos += wr; + datasize -= wr; + } + + if (fpos >= map_fseek && fpos < map_fseek + map_size && datasize) { + // max size that can be copied into this mapping: + ssize_t wr = std::min(map_size - (fpos - map_fseek), datasize); + std::memcpy((char*)map_ptr + (fpos - map_fseek), srcmem, wr); + srcmem += wr; + fpos += wr; + datasize -= wr; + } + + // does write fall out the mapped area end? + if (datasize) { + // write into backing file after the mapped area: + if (pwrite(mfile->fd, srcmem, datasize, fpos) != ssize_t(datasize)) { + std::fprintf(stderr, "Error writing file:%s\n", std::strerror(errno)); + } + } +} + +void region::readAt(seekoff_t fpos, len_t datasize, void* data) const { + auto dstmem = (char*)data; + + // does read fall out the mapped area begin? + if (fpos < map_fseek) { + // max size that can be written before map_fseek + ssize_t rd = std::min(map_fseek - fpos, datasize); + if (pread(mfile->fd, dstmem, rd, fpos) != rd) { + std::fprintf(stderr, "Error reading file:%s\n", std::strerror(errno)); + } + dstmem += rd; + fpos += rd; + datasize -= rd; + } + + if (fpos >= map_fseek && fpos < map_fseek + map_size && datasize) { + // max size that can be copied from this mapping: + ssize_t rd = std::min(map_size - (fpos - map_fseek), datasize); + std::memcpy(dstmem, (char*)map_ptr + (fpos - map_fseek), rd); + dstmem += rd; + fpos += rd; + datasize -= rd; + } + + // does read fall out the mapped area end? + if (datasize) { + // read from backing file after the mapped area: + if (pread(mfile->fd, dstmem, datasize, fpos) != ssize_t(datasize)) { + std::fprintf(stderr, "Error reading file:%s\n", std::strerror(errno)); + } + } +} + + +void region::resident(bool resident) { + std::lock_guard lock(mtx); + if(madvise(map_ptr, map_size, resident ? MADV_WILLNEED : MADV_DONTNEED)) { + std::fprintf(stderr,"Error setting memory-map residency:%s\n",std::strerror(errno)); + } +} + + +void region::discard(seekoff_t fpos, len_t datasize) { + + auto cur = usr_fseek + fpos; + + if (cur < map_fseek + map_size) { + // max size that can discarded from this mapping: + ssize_t dm = std::min(map_size - (cur - map_fseek), datasize); + + // Have to be careful here: if we delete too much + // caller will not have an good time. + // align size down to page size. + dm = roundDown(dm); + // align file offset up + auto _first = roundUp(cur - map_fseek); + if(_first > cur - map_fseek) + dm -= PAGE_SIZE; + + if(dm >= (signed)PAGE_SIZE) { + if(madvise((char*)map_ptr + _first, dm, MADV_REMOVE)) { + std::fprintf(stderr,"Error discarding memory-map region:%s\n",std::strerror(errno)); + } + } + } +} + +}; // namespace mapped diff --git a/cpp/libraries/mapped_file.hpp b/cpp/libraries/mapped_file.hpp new file mode 100644 index 0000000..b86657a --- /dev/null +++ b/cpp/libraries/mapped_file.hpp @@ -0,0 +1,552 @@ +/** + * Copyright 2023 Jarmo A Tiitto + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the “Software”), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef MAPPEDFILE_HPP_INCLUDED +#define MAPPEDFILE_HPP_INCLUDED + +#include +#include +#include +#include +#include +#include +#include + +/** + * Memory mapped file I/O utilities + * - mapped::file class for opening an file + * - mapped::region class for RW/RO memory mapping part the file instance. + * - mapped::struct_region template for RW/RO accessing part the file as specified type. + * - mapped::array_region template for RW/RO accessing part of the file as array of T elements. + * + * @note + * When doing read-only mapping the region instance + * should be const qualified as this restricts + * the region class API to read-only operations and prevents + * accidental modification of the file. + * Use std::make_unique() in this case. + * + * @note + * When using the read-write features the backing file is resized + * in multiple PAGE_SIZE blocks even if the actually mapped size is + * something else. + * openrw(...,size,RESIZE) always truncates the file to roundUp(size). + * You should do file->truncate(< sizeInBytes>) to make the file + * size exactly what you want before the file is closed. + * + * Modified regions should flush() or sync() before they are destroyed + * or the modified data may not end up in the file. + * + * TODO: + * - Two region instances should not overlap, + * i.e same portion of the file should not be mapped twice. + * (Not sure if this is actually broken now, but you have been warned) + * - Multi-threading support not tested/written. + * Currently the same mapped region can be used by multiple threads, + * but cannot it be modified. + * - Better error handling. (exceptions?, error codes?) + * Currently critical errors are printed and std::abort() is called. + * How do we handle system errors that happen in constructors? + */ +namespace mapped { + +const size_t PAGE_SIZE = 4096; + +static inline size_t roundToPage(ptrdiff_t x) { return (std::max(0, x - 1) & ~(PAGE_SIZE - 1)) + PAGE_SIZE; } + +constexpr inline size_t roundUp(uintptr_t x) { return (x + (PAGE_SIZE - 1)) & ~(PAGE_SIZE - 1); } + +constexpr inline size_t roundDown(uintptr_t x) { return (x & ~(PAGE_SIZE - 1)); } + +/** + * seekoff_t: Position of the file cursor + */ +using seekoff_t = uint64_t; +/** + * len_t: length of file data + */ +using len_t = size_t; + +class file; + +/** + * Memory-mapped region + * @brief + * the base class memory-maps an raw range of bytes from the backing file. + */ +class region { + protected: + std::mutex mtx; + // actually mapped region: + void* map_ptr = nullptr; + size_t map_size = 0; + seekoff_t map_fseek = 0; + // what constructor asked: + void* usr_ptr = nullptr; + size_t usr_size = 0; + seekoff_t usr_fseek = 0; + // todo: maybe use std::weak_ptr? + // that would allow file to be released and + // any any existing region(s) would still work. + // (but only if remap() is not called) + std::shared_ptr mfile; + // non-const data access sets is_dirty. + bool is_dirty = false; + + void remap(const seekoff_t fpos, const len_t size, const len_t window); + + region() {} + + public: + /** + * Open memory mapped region into a file. + * @brief + * Seeks at fpos in file and map size bytes + * starting from that position in file. + * @param window + * over-extend mapping up to max(size,window) bytes. + * Setting window bigger than size allows more efficient operation: + * [fpos, fpos + window] area is memory mapped + * but region will only operate on the + * [roundDown(fpos), roundup(fpos+size)] + * sub-portion of the memory. + * @note + * - Seeking past the EOF in file that is read-only will fail. + * The mapped size may extend past EOF but accessing past EOF + * either returns undefined data or program is terminated by OS. + * (EOF is at file->size()) + * - Seeking past the EOF that is read-write + * grows the backing file to fit the mapping. + * The backing file is always extended in multiple of PAGE_SIZE bytes. + * @note + * If size and/or fpos are not aligned to multiple of PAGE_SIZE + * they are forcibly aligned internally. This results in + * regionSize() and regionSeek() that may differ compared to + * size() and getSeek(). + * Side-effect is that backing file may grow more than expected. + */ + region(std::shared_ptr src, seekoff_t fpos, len_t size, len_t window = 0); + + /** + * Open memory mapped region into the file + * @brief + * same as region(myfile, 0, myfile.size()) + * and memory maps the entire file. + */ + explicit region(std::shared_ptr src); + + /** + * Note: even if region was modified, + * destructor will not flush()/sync() before tearing down the mapping. + */ + virtual ~region(); + + // region is not copyable + region(const region&) =delete; + region& operator=(const region&) =delete; + + // region is moveable + friend void swap(region& a, region& b) { + using std::swap; + //std::lock(a.mtx,b.mtx); + //std::lock_guard l0(a.mtx, std::adopt_lock); + //std::lock_guard l1(b.mtx, std::adopt_lock); + swap(a.map_ptr,b.map_ptr); + swap(a.map_size,b.map_size); + swap(a.map_fseek,b.map_fseek); + swap(a.usr_ptr,b.usr_ptr); + swap(a.usr_size,b.usr_size); + swap(a.usr_fseek,b.usr_fseek); + swap(a.mfile,b.mfile); + swap(a.is_dirty,b.is_dirty); + } + region(region&& mv) : region() { + swap(*this, mv); + } + region& operator=(region&& mv) { + swap(*this, mv); + return *this; + } + + /** + * Get data pointer. + */ + const void* data() const { return usr_ptr; } + void* data() { + is_dirty = true; + return usr_ptr; + } + + std::shared_ptr getFile() { return mfile; } + + /** + * Get the seek used to init this region. + */ + seekoff_t getSeek() const { return usr_fseek; } + /** + * Get the size used to init this region. + */ + len_t size() const { return usr_size; } + + /** + * Get page aligned seek <= getSeek() + */ + seekoff_t regionSeek() const { return map_fseek; } + /** + * Get page aligned size >= size() + */ + len_t regionSize() const { return map_size; } + + /** + * Resize the mapped region. + * @note the mapped memory address may move, + * but current contents are preserved. + * @warn all pointers or references into + * the mapping are invalidated. + */ + void resize(len_t newsize); + + /** + * @brief over-extend mapping up to max(size(),window) bytes. + * Setting window bigger than size() allows more efficient operation: + * [regionSeek(), regionSeek() + window] area is memory mapped + * but region will only operate on the + * [roundDown(getSeek()), roundUp(getSeek()+size())] + * sub-portion of the memory. + */ + void window(len_t window = 0); + + /** + * Flush mapped memory region into the file. + * @brief this is an hint to operating system that + * memory region shall be synchronized to disk. + * It may not wait for this to have completed before returning. + * @note only the page aligned region + * [roundDown(data()), roundUp(data()+size())] + * is flushed. + * @note Use sync() instead if you must guarantee the data has + * reached persistent storage. + */ + void flush(); + + /** + * Synchronize modified memory region onto disk. + */ + void sync(); + + /** + * Write data into the backing file. + * @brief + * writeAt() stores range of bytes into the backing file. + * @note + * The region doesn't need to have this area to be memory-mapped: + * The data that falls into the memory-mapped + * [regionSeek(), regionSeek()+regionSize()] area is simply memcpy'ed. + * Any data that falls out this window is written directly + * into the backing file. + * The backing file is grown to fit the data when needed. + */ + void writeAt(seekoff_t fpos, len_t datasize, const void* data); + + /** + * Read data from the backing file. + * @brief + * readAt() reads [fpos, fpos+datasize] range of bytes from the backing file + * @note + * The region doesn't need to have this area memory-mapped + * The read out area that falls into the memory-mapped + * [regionSeek(), regionSeek()+regionSize()] area is simply memcpy'ed. + * Any data that falls out this window is read directly + * from the backing file. + */ + void readAt(seekoff_t fpos, len_t datasize, void* data) const; + + /** + * Set memory region to resident/or released. + * @brief setting memory range to non-resident state + * causes system to drop the data from system memory. + * Reading non-resident memory region again causes system to + * fetch data from the disk again. + * @warn if memory region is not flushed before setting + * resident(false) any writes may be discarded to backing file. + */ + void resident(bool state); + + /** + * Discard memory region. + * @brief discarding memory range causes system + * to reclaim the memory *and* the on-disk area. + * This means the data is lost in the mapped memory region, + * and any data within will not be written onto disk by sync() + * Subsequent reads after discard() return zero filled data. + * @note + * The discarded area shall be within the mapped area. + * @param fpos + * file offset from begin of this mapping. (getSeek() + fpos) + * @param datasize + * length of the data area to discard. + */ + void discard(seekoff_t fpos, len_t datasize); + + /** + * Seek in the file to fpos position and + * remap the memory region there. + * @warn all pointers or references into + * the mapping are invalidated. + */ + void jump(seekoff_t fpos); + + /** + * Flush the current region and + * Seek in the file to fpos position and + * remap the memory region there. + * @warn all pointers or references into + * the mapping are invalidated. + */ + void flushJump(seekoff_t fpos); +}; + +static_assert(std::is_move_constructible_v); +static_assert(std::is_move_assignable_v); +static_assert(std::is_swappable_v); + +/** + * Typed region. + * struct_region allows directly accessing an on-disk structure. + * The region size is implicit from the type. + */ +template +class struct_region : protected region { + public: + using type = typename std::decay::type; + static_assert(std::is_standard_layout_v, "T must be plain-old-data type"); + + /** + * Memory map struct_region at fpos in file. + */ + struct_region(std::shared_ptr f, seekoff_t fpos, len_t window = 0) : region(f, fpos, sizeof(type), window) {} + + type* get() { return static_cast(data()); } + const type* get() const { return static_cast(data()); } + + type* operator->() { return get(); } + const type* operator->() const { return get(); } + + type& operator*() { return *get(); } + const type& operator*() const { return *get(); } + + using region::flush; + using region::getFile; + using region::getSeek; + using region::readAt; + using region::sync; + using region::writeAt; + using region::resident; + using region::window; + using region::discard; + + // note: size means the sizeof(T) + using region::size; + + /** + * Get the file seek position just after *this. + */ + seekoff_t getEndSeek() const { return getSeek() + sizeof(T); } + + /** + * Seek to fpos in file and remap the region. + * @return the pointer into the new position + */ + type* jump(seekoff_t fpos) { + region::jump(fpos); + return get(); + } + + type* flushJump(seekoff_t fpos) { + region::flushJump(fpos); + return get(); + } +}; + +static_assert(std::is_move_constructible_v>); +static_assert(std::is_move_assignable_v>); +static_assert(std::is_swappable_v>); + +/** + * Typed array region. + * @brief + * array_region allows directly accessing an on-disk array of structures + * The element size is implicit from the type and length of the array + * is provided by the constructor. + * @provides resize(), operator[], begin(), end() + */ +template +class array_region : protected region { + protected: + size_t num_elements = 0; + + public: + using type = typename std::decay::type; + static_assert(std::is_standard_layout_v, "T must be plain-old-data type"); + + /** + * Memory map array_region at fpos in file and map array_size elements. + */ + array_region(std::shared_ptr f, seekoff_t fpos, size_t array_size) : region(f, fpos, sizeof(type) * array_size), num_elements(array_size) {} + + /** + * Get pointer to first mapped element. + */ + type* get() { return static_cast(data()); } + const type* get() const { return static_cast(data()); } + + using region::flush; + using region::getFile; + using region::getSeek; + using region::readAt; + using region::sync; + using region::writeAt; + + /** + * Resize the mapped array region. + */ + void resize(size_t elements) { + region::resize(sizeof(T) * elements); + num_elements = elements; + } + + /** + * Get number of mapped *elements* + */ + size_t size() const { return num_elements; } + + /** + * Access the array elements + */ + T& operator[](size_t index) { + assert(index < num_elements); + return get()[index]; + } + const T& operator[](size_t index) const { + assert(index < num_elements); + return get()[index]; + } + /** + * Iterators + */ + T* begin() { return get(); } + T* end() { return get() + num_elements; } + const T* begin() const { return get(); } + const T* end() const { return get() + num_elements; } + + /** + * Get the file seek position just after *this. + */ + seekoff_t getEndSeek() const { return getSeek() + sizeof(T) * num_elements; } + + /** + * Seek to fpos in file and remap the region. + * @return the pointer into the first element in the array + */ + type* jump(seekoff_t fpos) { + region::jump(fpos); + return get(); + } + + type* flushJump(seekoff_t fpos) { + region::flushJump(fpos); + return get(); + } +}; + +/** + * Memory-mapped file I/O class. + * @note + * file should be created with std::make_shared() + * as mapped region(s) take shared ownership of the file. + */ +class file : public std::enable_shared_from_this { + private: + std::mutex mut; + int fd; + seekoff_t fd_size; + bool fd_rw; + // the file and region classes are inherently coupled, + // and we don't want to expose the internals. + friend class region; + + public: + enum : int { + CREATE = 0x1, //!< Create new file, if doesn't exist. + RESIZE = 0x2, //!< Resize file. + FSTUNE = 0x4 //!< When creating new file attempt to set + //!< file system attributes to improve performance. + }; + + file(); + ~file(); + + /** + * Open file in read-only mode. + * @return non-zero if error occurred. + */ + int open(const char* file); + + /** + * Create/Open file in read-write mode. + * @param flags + * - CREATE|RESIZE creates or replaces existing file + * that is truncated to maxsize. + * - RESIZE opens existing file and truncates it to + * maxsize. The file must exist already. + * - flags == 0 ignores the maxsize argument and opens + * existing file. + * @warn default open mode discards any previous file contents! + * @return non-zero if error occurred. + */ + int openrw(const char* file, len_t maxsize, int flags = CREATE | RESIZE); + + /** + * Check if file open R/W or RO + */ + bool is_rw() const; + + /** + * Resize the open file to newsize bytes. + * (file must be open in R/W mode) + * @return non-zero if error occurred. + */ + int truncate(seekoff_t newsize); + + /** + * Current length of the file + * The file EOF (end-of-file) is at this position. + */ + seekoff_t size() const; + + // Close the file. + void close(); +}; + +}; // namespace mapped +#endif diff --git a/cpp/src/cache.cpp b/cpp/src/cache.cpp deleted file mode 100644 index 071ff0f..0000000 --- a/cpp/src/cache.cpp +++ /dev/null @@ -1,163 +0,0 @@ -#include "cache.hpp" - -#include -#include -#include -#include -#include - -#include "utils.hpp" - -/* -==================== -cache file header -==================== - -uint32_t magic = "PCUB" -uint32_t n = cache file for n cubes in a polycube -uint32_t numShapes = number of different shapes in cachefile -------- - -==================== -shapetable: -==================== -shapeEntry { - uint8_t dim0 // offset by -1 - uint8_t dim1 // offset by -1 - uint8_t dim2 // offset by -1 - uint8_t reserved - uint64_t offset in file -} -shapeEntry[numShapes] - - -==================== -XYZ data -==================== - -*/ - -void Cache::save(std::string path, Hashy &hashes, uint8_t n) { - if (hashes.size() == 0) return; - std::ofstream ofs(path, std::ios::binary); - Header header; - header.magic = MAGIC; - header.n = n; - header.numShapes = hashes.byshape.size(); - header.numPolycubes = hashes.size(); - ofs.write((const char *)&header, sizeof(header)); - - std::vector keys; - keys.reserve(header.numShapes); - for (auto &pair : hashes.byshape) keys.push_back(pair.first); - std::sort(keys.begin(), keys.end()); - uint64_t offset = sizeof(Header) + header.numShapes * sizeof(ShapeEntry); - for (auto &key : keys) { - ShapeEntry se; - se.dim0 = key.x(); - se.dim1 = key.y(); - se.dim2 = key.z(); - se.reserved = 0; - se.offset = offset; - se.size = hashes.byshape[key].size() * XYZ_SIZE * n; - offset += se.size; - ofs.write((const char *)&se, sizeof(ShapeEntry)); - } - // put XYZs - for (auto &key : keys) { - for (auto &subset : hashes.byshape[key].byhash) - for (const auto &c : subset.set) { - if constexpr (sizeof(XYZ) == XYZ_SIZE) { - ofs.write((const char *)c.data(), sizeof(XYZ) * c.size()); - } else { - for (const auto &p : c) { - ofs.write((const char *)p.data, XYZ_SIZE); - } - } - } - } - - std::printf("saved %s\n\r", path.c_str()); -} - -Hashy Cache::load(std::string path, uint32_t extractShape) { - Hashy cubes; - auto ifs = std::ifstream(path, std::ios::binary); - if (!ifs.is_open()) return cubes; - Header header; - if (!ifs.read((char *)&header, sizeof(header))) { - return cubes; - } - // check magic - if (header.magic != MAGIC) { - return cubes; - } -#ifdef CACHE_LOAD_HEADER_ONLY - std::printf("loading cache file \"%s\" for N = %u", path.c_str(), header.n); - std::printf(", %u shapes, %lu XYZs\n\r", header.numShapes, header.numPolycubes); -#endif - auto cubeSize = XYZ_SIZE * header.n; - DEBUG_PRINTF("cubeSize: %u\n\r", cubeSize); - - for (uint32_t i = 0; i < header.numShapes; ++i) { - ShapeEntry shapeEntry; - if (!ifs.read((char *)&shapeEntry, sizeof(shapeEntry))) { - std::printf("ERROR reading ShapeEntry %u\n\r", i); - exit(-1); - } - if (ALL_SHAPES != extractShape && i != extractShape) continue; -#ifdef CACHE_PRINT_SHAPEENTRIES - std::printf("ShapeEntry %3u: [%2d %2d %2d] offset: 0x%08lx size: 0x%08lx (%ld polycubes)\n\r", i, shapeEntry.dim0, shapeEntry.dim1, shapeEntry.dim2, - shapeEntry.offset, shapeEntry.size, shapeEntry.size / cubeSize); -#endif - if (shapeEntry.size % cubeSize != 0) { - std::printf("ERROR shape block is not divisible by cubeSize!\n\r"); - exit(-1); - } -#ifndef CACHE_LOAD_HEADER_ONLY - // remember pos in file - auto pos = ifs.tellg(); - - // read XYZ contents - ifs.seekg(shapeEntry.offset); - const uint32_t CHUNK_SIZE = 512 * XYZ_SIZE; - uint8_t buf[CHUNK_SIZE] = {0}; - uint64_t buf_offset = 0; - uint32_t numCubes = shapeEntry.size / cubeSize; - XYZ shape(shapeEntry.dim0, shapeEntry.dim1, shapeEntry.dim2); - uint64_t readsize = shapeEntry.size - buf_offset; - if (readsize > CHUNK_SIZE) readsize = CHUNK_SIZE; - if (!ifs.read((char *)&buf, readsize)) { - std::printf("ERROR reading XYZs for Shape %u\n\r", i); - exit(-1); - } - for (uint32_t j = 0; j < numCubes; ++j) { - Cube next(header.n); - for (uint32_t k = 0; k < header.n; ++k) { - // check if buf contains next XYZ - uint64_t curr_offset = j * cubeSize + k * XYZ_SIZE; - if (curr_offset >= buf_offset + CHUNK_SIZE) { - // std::printf("reload buffer\n\r"); - buf_offset += CHUNK_SIZE; - readsize = shapeEntry.size - buf_offset; - if (readsize > CHUNK_SIZE) readsize = CHUNK_SIZE; - if (!ifs.read((char *)&buf, readsize)) { - std::printf("ERROR reading XYZs for Shape %u\n\r", i); - exit(-1); - } - } - - next.data()[k].data[0] = buf[curr_offset - buf_offset + 0]; - next.data()[k].data[1] = buf[curr_offset - buf_offset + 1]; - next.data()[k].data[2] = buf[curr_offset - buf_offset + 2]; - } - cubes.insert(next, shape); - } - - // restore pos - ifs.seekg(pos); -#endif - } - std::printf(" loaded %lu cubes\n\r", cubes.size()); - return cubes; -} diff --git a/cpp/src/cubes.cpp b/cpp/src/cubes.cpp index cdcc5b4..1630bb6 100644 --- a/cpp/src/cubes.cpp +++ b/cpp/src/cubes.cpp @@ -7,8 +7,9 @@ #include #include #include +#include +#include -#include "cache.hpp" #include "cube.hpp" #include "hashes.hpp" #include "newCache.hpp" @@ -19,22 +20,27 @@ const int PERF_STEP = 500; struct Workset { std::mutex mu; + + CacheReader cr; CubeIterator _begin_total; CubeIterator _begin; CubeIterator _end; Hashy &hashes; XYZ targetShape, shape, expandDim; bool notSameShape; - Workset(ShapeRange &data, Hashy &hashes, XYZ targetShape, XYZ shape, XYZ expandDim, bool notSameShape) - : _begin_total(data.begin()) - , _begin(data.begin()) - , _end(data.end()) - , hashes(hashes) + Workset(Hashy &hashes, XYZ targetShape, XYZ shape, XYZ expandDim, bool notSameShape) + : hashes(hashes) , targetShape(targetShape) , shape(shape) , expandDim(expandDim) , notSameShape(notSameShape) {} + void setRange(ShapeRange &data) { + _begin_total = data.begin(); + _begin = data.begin(); + _end = data.end(); + } + struct Subset { CubeIterator _begin, _end; bool valid; @@ -48,7 +54,7 @@ struct Workset { auto a = _begin; _begin += 500; if (_begin > _end) _begin = _end; - return {a, _begin, a < _end, 100 * (float)((uint64_t)a.m_ptr - (uint64_t)_begin_total.m_ptr) / ((uint64_t)_end.m_ptr - (uint64_t)_begin_total.m_ptr)}; + return {a, _begin, a < _end, 100 * float(std::distance(_begin_total.data(), a.data())) / std::distance(_begin_total.data(), _end.data())}; } void expand(const Cube &c) { @@ -87,14 +93,14 @@ struct Workset { std::set_difference(candidates.begin(), end, c.begin(), c.end(), std::back_inserter(tmp)); candidates = std::move(tmp); - DEBUG_PRINTF("candidates: %lu\n\r", candidates.size()); + DEBUG1_PRINTF("candidates: %lu\n\r", candidates.size()); Cube newCube(c.size() + 1); Cube lowestHashCube(newCube.size()); Cube rotatedCube(newCube.size()); for (const auto &p : candidates) { - DEBUG_PRINTF("(%2d %2d %2d)\n\r", p.x(), p.y(), p.z()); + DEBUG2_PRINTF("(%2d %2d %2d)\n\r", p.x(), p.y(), p.z()); int ax = (p.x() < 0) ? 1 : 0; int ay = (p.y() < 0) ? 1 : 0; int az = (p.z() < 0) ? 1 : 0; @@ -131,26 +137,69 @@ struct Workset { }; struct Worker { - Workset &ws; + std::shared_ptr ws; int id; - Worker(Workset &ws_, int id_) : ws(ws_), id(id_) {} + int state = 3; // 1 == completed/waiting for job, 2 == processing, 3 == job assigned. + std::mutex mtx; + std::condition_variable cond; + std::condition_variable cond2; + std::thread thr; + + Worker(int id_) : id(id_), thr(&Worker::run, this) {} + ~Worker() { + std::unique_lock lock(mtx); + state = 0; + cond.notify_one(); + lock.unlock(); + thr.join(); + } + + void launch(std::shared_ptr ws_) { + std::unique_lock lock(mtx); + while(state > 1) { + cond2.wait(lock); + } + ws = ws_; + state = 3; + cond.notify_one(); + } + + void sync() { + std::unique_lock lock(mtx); + while(state > 1) { + cond2.wait(lock); + } + ws.reset(); + } + void run() { - // std::printf("start %d\n", id); - auto subset = ws.getPart(); - while (subset.valid) { - if (id == 0) { - std::printf(" %5.2f%%\r", subset.percent); - std::flush(std::cout); - } - // std::cout << id << " next subset " << &*subset.begin() << " to " << &*subset.end() << "\n"; - for (auto &c : subset) { - // std::printf("%p\n", (void *)&c); - // c.print(); - ws.expand(c); + std::unique_lock lock(mtx); + std::printf("thread nro. %d started.\n", id); + while(state) { + state = 1; + cond2.notify_one(); + while(state == 1) + cond.wait(lock); + if(!state) + return; + state = 2; + // std::printf("start %d\n", id); + auto subset = ws->getPart(); + while (subset.valid) { + if (id == 0) { + std::printf(" %5.2f%%\r", subset.percent); + std::flush(std::cout); + } + // std::cout << id << " next subset " << &*subset.begin() << " to " << &*subset.end() << "\n"; + for (auto &c : subset) { + // std::printf("%p\n", (void *)&c); + // c.print(); + ws->expand(c); + } + subset = ws->getPart(); } - subset = ws.getPart(); + // std::printf("finished %d\n", id); } - // std::printf("finished %d\n", id); } }; @@ -166,7 +215,8 @@ FlatCache gen(int n, int threads, bool use_cache, bool write_cache, bool split_c hashes.insert(Cube{{XYZ(0, 0, 0)}}, XYZ(0, 0, 0)); std::printf("%ld elements for %d\n\r", hashes.size(), n); if (write_cache) { - Cache::save(base_path + "cubes_" + std::to_string(n) + ".bin", hashes, n); + CacheWriter cw(1); + cw.save(base_path + "cubes_" + std::to_string(n) + ".bin", hashes, n); } return FlatCache(hashes, n); } @@ -185,10 +235,20 @@ FlatCache gen(int n, int threads, bool use_cache, bool write_cache, bool split_c } std::printf("N = %d || generating new cubes from %lu base cubes.\n\r", n, base->size()); hashes.init(n); + + // Start worker threads. + std::deque workers; + for (int i = 0; i < threads; ++i) { + workers.emplace_back(i); + } + + CacheWriter cw(threads); + uint64_t totalSum = 0; auto start = std::chrono::steady_clock::now(); uint32_t totalOutputShapes = hashes.byshape.size(); uint32_t outShapeCount = 0; + auto prevShapes = Hashy::generateShapes(n - 1); for (auto &tup : hashes.byshape) { outShapeCount++; @@ -210,13 +270,14 @@ FlatCache gen(int n, int threads, bool use_cache, bool write_cache, bool split_c if (diffy == 1) if (shape.y() == shape.x()) diffx = 1; - std::printf(" shape %d %d %d\n\r", shape.x(), shape.y(), shape.z()); + auto ws = std::make_shared(hashes, targetShape, shape, XYZ(diffx, diffy, diffz), abssum); if (use_split_cache) { // load cache file only for this shape std::string cachefile = base_path + "cubes_" + std::to_string(n - 1) + "_" + std::to_string(prevShapes[sid].x()) + "-" + std::to_string(prevShapes[sid].y()) + "-" + std::to_string(prevShapes[sid].z()) + ".bin"; - cr.loadFile(cachefile); + ws->cr.loadFile(cachefile); + base = &ws->cr; // cr.printHeader(); } auto s = base->getCubesByShape(sid); @@ -224,24 +285,30 @@ FlatCache gen(int n, int threads, bool use_cache, bool write_cache, bool split_c std::printf("ERROR caches shape does not match expected shape!\n"); exit(-1); } - // std::printf("starting %d threads\n\r", threads); - std::vector ts; - Workset ws(s, hashes, targetShape, shape, XYZ(diffx, diffy, diffz), abssum); - std::vector workers; - ts.reserve(threads); - workers.reserve(threads); - for (int i = 0; i < threads; ++i) { - workers.emplace_back(ws, i); - ts.emplace_back(&Worker::run, std::ref(workers[i])); + + ws->setRange(s); + + // Wait for jobs to complete. + for (auto& thr : workers) { + thr.sync(); } - for (int i = 0; i < threads; ++i) { - ts[i].join(); + std::printf(" shape %d %d %d\n\r", shape.x(), shape.y(), shape.z()); + // launch the new jobs. + // Because the workset is held by shared_ptr + // main thread can do above preparation work in parallel + // while the jobs are running. + for (auto& thr : workers) { + thr.launch(ws); } } + // Wait for jobs to complete. + for (auto& thr : workers) { + thr.sync(); + } std::printf(" num: %lu\n\r", hashes.byshape[targetShape].size()); totalSum += hashes.byshape[targetShape].size(); if (write_cache && split_cache) { - Cache::save(base_path + "cubes_" + std::to_string(n) + "_" + std::to_string(targetShape.x()) + "-" + std::to_string(targetShape.y()) + "-" + + cw.save(base_path + "cubes_" + std::to_string(n) + "_" + std::to_string(targetShape.x()) + "-" + std::to_string(targetShape.y()) + "-" + std::to_string(targetShape.z()) + ".bin", hashes, n); } @@ -252,9 +319,16 @@ FlatCache gen(int n, int threads, bool use_cache, bool write_cache, bool split_c } } } + + // Stop the workers. + workers.clear(); + if (write_cache && !split_cache) { - Cache::save(base_path + "cubes_" + std::to_string(n) + ".bin", hashes, n); + cw.save(base_path + "cubes_" + std::to_string(n) + ".bin", hashes, n); } + + cw.flush(); + auto end = std::chrono::steady_clock::now(); auto dt_ms = std::chrono::duration_cast(end - start).count(); std::printf("took %.2f s\033[0K\n\r", dt_ms / 1000.f); diff --git a/cpp/src/newCache.cpp b/cpp/src/newCache.cpp index ef925dc..93f15b3 100644 --- a/cpp/src/newCache.cpp +++ b/cpp/src/newCache.cpp @@ -1,13 +1,8 @@ -#include "../include/newCache.hpp" - -#include -#include -#include +#include "newCache.hpp" #include -CacheReader::CacheReader() - : filePointer(nullptr), path_(""), fileDescriptor_(-1), fileSize_(0), fileLoaded_(false), dummyHeader{0, 0, 0, 0}, header(&dummyHeader), shapes(nullptr) {} +CacheReader::CacheReader() : path_(""), fileLoaded_(false), dummyHeader{0, 0, 0, 0}, header(&dummyHeader), shapes(nullptr) {} void CacheReader::printHeader() { if (fileLoaded_) { @@ -33,28 +28,37 @@ int CacheReader::printShapes(void) { int CacheReader::loadFile(const std::string path) { unload(); path_ = path; - fileDescriptor_ = open(path.c_str(), O_RDONLY); - if (fileDescriptor_ == -1) { + // open read-only backing file: + file_ = std::make_shared(); + if (file_->open(path.c_str())) { std::printf("error opening file\n"); return 1; } - // get filesize - fileSize_ = lseek(fileDescriptor_, 0, SEEK_END); - lseek(fileDescriptor_, 0, SEEK_SET); + // map the header struct + header_ = std::make_unique>(file_, 0); + header = header_->get(); - // memory map file - filePointer = (uint8_t*)mmap(NULL, fileSize_, PROT_READ, MAP_SHARED, fileDescriptor_, 0); - if (filePointer == MAP_FAILED) { - // error handling - std::printf("errorm mapping file memory"); - close(fileDescriptor_); - return 2; + if (header->magic != cacheformat::MAGIC) { + std::printf("error opening file: file not recognized\n"); + return 1; } - header = (Header*)(filePointer); - shapes = (ShapeEntry*)(filePointer + sizeof(Header)); + // map the ShapeEntry array: + shapes_ = std::make_unique>(file_, header_->getEndSeek(), (*header_)->numShapes); + shapes = shapes_->get(); + + size_t datasize = 0; + for (unsigned int i = 0; i < header->numShapes; ++i) { + datasize += shapes[i].size; + } + + // map rest of the file as XYZ data: + if (file_->size() != shapes_->getEndSeek() + datasize) { + std::printf("warn: file size does not match expected value\n"); + } + xyz_ = std::make_unique>(file_, shapes_->getEndSeek(), datasize); fileLoaded_ = true; @@ -65,27 +69,226 @@ ShapeRange CacheReader::getCubesByShape(uint32_t i) { if (i >= header->numShapes) { return ShapeRange{nullptr, nullptr, 0, XYZ(0, 0, 0)}; } - XYZ* start = reinterpret_cast(filePointer + shapes[i].offset); - XYZ* end = reinterpret_cast(filePointer + shapes[i].offset + shapes[i].size); - return ShapeRange(start, end, header->n, XYZ(shapes[i].dim0, shapes[i].dim1, shapes[i].dim2)); + if (shapes[i].size <= 0) { + return ShapeRange{nullptr, nullptr, header->n, XYZ(shapes[i].dim0, shapes[i].dim1, shapes[i].dim2)}; + } + // get section start + // note: shapes[i].offset may have bogus offset + // if any earlier shape table entry was empty before i + // so we ignore the offset here. + size_t offset = 0; + for (unsigned int k = 0; k < i; ++k) { + offset += shapes[k].size; + } + auto index = offset / cacheformat::XYZ_SIZE; + auto num_xyz = shapes[i].size / cacheformat::XYZ_SIZE; + // pointers to Cube data: + auto start = xyz_->get() + index; + auto end = xyz_->get() + index + num_xyz; + return ShapeRange{start, end, header->n, XYZ(shapes[i].dim0, shapes[i].dim1, shapes[i].dim2)}; } void CacheReader::unload() { - // unmap file from memory + // unload file from memory if (fileLoaded_) { - if (munmap(filePointer, fileSize_) == -1) { - // error handling - std::printf("error unmapping file\n"); - } - - // close file descriptor - close(fileDescriptor_); + xyz_.reset(); + shapes_.reset(); + header_.reset(); + file_.reset(); fileLoaded_ = false; } - fileDescriptor_ = -1; - filePointer = nullptr; header = &dummyHeader; shapes = nullptr; } CacheReader::~CacheReader() { unload(); } + +CacheWriter::CacheWriter(int num_threads) { + for (int i = 0; i < num_threads; ++i) { + m_flushers.emplace_back(&CacheWriter::run, this); + } +} + +CacheWriter::CacheWriter::~CacheWriter() { + flush(); + // stop the threads. + std::unique_lock lock(m_mtx); + m_active = false; + m_run.notify_all(); + lock.unlock(); + for (auto &thr : m_flushers) thr.join(); +} + +void CacheWriter::run() { + std::unique_lock lock(m_mtx); + while (m_active) { + // do copy jobs: + if (!m_copy.empty()) { + auto task = std::move(m_copy.front()); + m_copy.pop_front(); + lock.unlock(); + + task(); + + lock.lock(); + --m_num_copys; + continue; + } + // file flushes: + if (!m_flushes.empty()) { + auto task = std::move(m_flushes.front()); + m_flushes.pop_front(); + lock.unlock(); + + task(); + + lock.lock(); + --m_num_flushes; + continue; + } + // notify that we are done here. + m_wait.notify_one(); + // wait for jobs. + m_run.wait(lock); + } + m_wait.notify_one(); +} + +void CacheWriter::save(std::string path, Hashy &hashes, uint8_t n) { + if (hashes.size() == 0) return; + + using namespace mapped; + using namespace cacheformat; + + auto file_ = std::make_shared(); + if (file_->openrw(path.c_str(), 0)) { + std::printf("error opening file\n"); + return; + } + + auto header = std::make_shared>(file_, 0); + (*header)->magic = cacheformat::MAGIC; + (*header)->n = n; + (*header)->numShapes = hashes.byshape.size(); + (*header)->numPolycubes = hashes.size(); + + std::vector keys; + keys.reserve((*header)->numShapes); + for (auto &pair : hashes.byshape) keys.push_back(pair.first); + std::sort(keys.begin(), keys.end()); + + auto shapeEntry = std::make_shared>(file_, header->getEndSeek(), (*header)->numShapes); + + uint64_t offset = shapeEntry->getEndSeek(); + size_t num_cubes = 0; + int i = 0; + for (auto &key : keys) { + auto &se = (*shapeEntry)[i++]; + se.dim0 = key.x(); + se.dim1 = key.y(); + se.dim2 = key.z(); + se.reserved = 0; + se.offset = offset; + auto count = hashes.byshape[key].size(); + num_cubes += count; + se.size = count * XYZ_SIZE * n; + offset += se.size; + } + + // put XYZs + // Serialize large CubeSet(s) in parallel. + + auto xyz = std::make_shared>(file_, (*shapeEntry)[0].offset, num_cubes * n); + auto put = xyz->get(); + + auto copyrange = [n](CubeSet::iterator itr, CubeSet::iterator end, XYZ *dest) -> void { + while (itr != end) { + static_assert(sizeof(XYZ) == XYZ_SIZE); + assert(itr->size() == n); + itr->copyout(n, dest); + dest += n; + ++itr; + } + }; + + auto time_start = std::chrono::steady_clock::now(); + for (auto &key : keys) { + for (auto &subset : hashes.byshape[key].byhash) { + auto itr = subset.set.begin(); + + ptrdiff_t dist = subset.set.size(); + // distribute if range is large enough. + auto skip = std::max(4096L, std::max(1L, dist / (signed)m_flushers.size())); + while (dist > skip) { + auto start = itr; + auto dest = put; + + auto inc = std::min(dist, skip); + std::advance(itr, inc); + put += n * inc; + dist = std::distance(itr, subset.set.end()); + + auto done = 100.0f * (std::distance(xyz->get(), put) / float(num_cubes * n)); + std::printf("writing data %5.2f%% ... \r", done); + std::flush(std::cout); + + std::lock_guard lock(m_mtx); + m_copy.emplace_back(std::bind(copyrange, start, itr, dest)); + ++m_num_copys; + m_run.notify_all(); + } + // copy remainder, if any. + if (dist) { + std::lock_guard lock(m_mtx); + m_copy.emplace_back(std::bind(copyrange, itr, subset.set.end(), put)); + ++m_num_copys; + m_run.notify_all(); + put += n * dist; + + auto done = 100.0f * (std::distance(xyz->get(), put) / float(num_cubes * n)); + std::printf("writing data %5.2f%% ... \r", done); + std::flush(std::cout); + } + } + } + + // sanity check: + assert(put == (*xyz).get() + num_cubes * n); + + // sync up. + std::unique_lock lock(m_mtx); + while (m_num_copys) { + m_wait.wait(lock); + } + + // move the resources into flush job. + m_flushes.emplace_back(std::bind( + [](auto &&file, auto &&header, auto &&shapeEntry, auto &&xyz) -> void { + // flush. + header->flush(); + shapeEntry->flush(); + xyz->flush(); + // Truncate file to proper size. + file->truncate(xyz->getEndSeek()); + file->close(); + file.reset(); + xyz.reset(); + shapeEntry.reset(); + header.reset(); + }, + std::move(file_), std::move(header), std::move(shapeEntry), std::move(xyz))); + ++m_num_flushes; + m_run.notify_all(); + + auto time_end = std::chrono::steady_clock::now(); + auto dt_ms = std::chrono::duration_cast(time_end - time_start).count(); + + std::printf("saved %s, took %.2f s\n\r", path.c_str(), dt_ms / 1000.f); +} + +void CacheWriter::flush() { + std::unique_lock lock(m_mtx); + while (m_num_flushes) { + m_wait.wait(lock); + } +} diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index b30d160..42e0014 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -19,4 +19,5 @@ add_executable(${PROJECT_NAME} $ ${TESTS}) target_link_libraries(GTest::GTest INTERFACE gtest_main) target_link_libraries(${PROJECT_NAME} pthread GTest::GTest) +target_link_libraries(${PROJECT_NAME} mapped_file) ConfigureTarget(${PROJECT_NAME}) diff --git a/cpp/tests/src/test_cache.cpp b/cpp/tests/src/test_cache.cpp deleted file mode 100644 index ae10cbf..0000000 --- a/cpp/tests/src/test_cache.cpp +++ /dev/null @@ -1,10 +0,0 @@ -#include - -#include "cache.hpp" - -TEST(CacheTests, TestCacheLoadDoesNotThrow) { EXPECT_NO_THROW(Cache::load("./test_data.bin")); } - -TEST(CacheTests, TestCacheSaveDoesNotThrow) { - auto data = Cache::load("./test_data.bin"); - EXPECT_NO_THROW(Cache::save("./temp.bin", data, 255)); -} \ No newline at end of file