Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 76 additions & 3 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,28 @@ set(CMAKE_POSITION_INDEPENDENT_CODE ON)

option(POMAI_BUILD_TESTS "Build tests" OFF)
option(POMAI_BUILD_BENCH "Build benchmarks" OFF)

# Prefer integer (SQ8/FP16) distance paths where data is quantized; reduces float use on embedded.
option(POMAI_PREFER_INTEGER_MATH "Prefer integer/SQ8/FP16 paths for distance (embedded)" ON)

# Edge-oriented build profile for constrained devices.
# When enabled, we bias towards smaller binaries and lower memory/debug overhead.
option(POMAI_EDGE_BUILD "Optimize PomaiDB build for edge devices (size/footprint)" OFF)

# Strict build mode: treat project warnings as errors (useful for CI/hardening).
# We keep a couple of warning categories non-fatal to tolerate intentional diagnostics
# from some vendored headers included by the core.
option(POMAI_STRICT "Treat warnings as errors for PomaiDB code" OFF)

# Ensure POMAI_PREFER_INTEGER_MATH is defined for source code (0 or 1)
add_compile_definitions(POMAI_PREFER_INTEGER_MATH=$<BOOL:${POMAI_PREFER_INTEGER_MATH}>)

if (POMAI_EDGE_BUILD)
add_compile_definitions(POMAI_EDGE_BUILD=1)
# Favor size over speed and strip most debug info in edge builds.
add_compile_options(-Os -g0)
endif()


# =========================
# Native HNSW (Replaces FAISS HNSW)
Expand Down Expand Up @@ -79,6 +95,13 @@ target_include_directories(pomai
${CMAKE_CURRENT_SOURCE_DIR}/third_party
)

# Vendored code: keep strict warnings for PomaiDB, but avoid breaking builds on
# third_party sources when POMAI_STRICT is enabled.
if (POMAI_STRICT AND NOT MSVC)
set_source_files_properties(third_party/pomaidb_hnsw/hnsw.cc PROPERTIES COMPILE_OPTIONS
"-Wno-error;-Wno-shadow;-Wno-unused-parameter;-Wno-unused-but-set-variable")
endif()

# OpenMP for parallel builds if needed
find_package(OpenMP QUIET)
if (OpenMP_CXX_FOUND)
Expand All @@ -88,11 +111,29 @@ endif()
if (MSVC)
target_compile_options(pomai PRIVATE /W4 /permissive-)
else()
# Strict warnings for pomaidb code; then suppress vendored third_party (simd) warnings
# Strict warnings for pomaidb code; then suppress vendored third_party (simd) warnings.
# We do not use / -Werror globally because some third_party headers intentionally emit #warning.
target_compile_options(pomai PRIVATE
-Wall -Wextra -Wpedantic -Wconversion -Wshadow
-Wunused-parameter -Wunused-variable
-Wno-cpp -Wno-unknown-pragmas -Wno-conversion -Wno-float-conversion -Wno-unused-function
)
endif()

if (POMAI_STRICT)
if (MSVC)
target_compile_options(pomai PRIVATE /WX)
else()
target_compile_options(pomai PRIVATE
-Werror
# Some vendored headers use extensions that trigger pedantic/cpp warnings.
-Wno-error=pedantic
-Wno-error=cpp
# third_party/simd uses #warning; pre-C++23 this triggers -Wc++23-extensions.
# We disable that warning category entirely to keep strict builds usable.
-Wno-c++23-extensions
)
endif()
endif()


Expand Down Expand Up @@ -133,8 +174,24 @@ if (MSVC)
target_compile_options(pomai_c PRIVATE /W4 /permissive-)
target_compile_options(pomai_c_static PRIVATE /W4 /permissive-)
else()
target_compile_options(pomai_c PRIVATE -Wall -Wextra -Wpedantic -Wconversion -Wshadow)
target_compile_options(pomai_c_static PRIVATE -Wall -Wextra -Wpedantic -Wconversion -Wshadow)
target_compile_options(pomai_c PRIVATE
-Wall -Wextra -Wpedantic -Wconversion -Wshadow
-Wunused-parameter -Wunused-variable
)
target_compile_options(pomai_c_static PRIVATE
-Wall -Wextra -Wpedantic -Wconversion -Wshadow
-Wunused-parameter -Wunused-variable
)
endif()

if (POMAI_STRICT)
if (MSVC)
target_compile_options(pomai_c PRIVATE /WX)
target_compile_options(pomai_c_static PRIVATE /WX)
else()
target_compile_options(pomai_c PRIVATE -Werror -Wno-error=pedantic -Wno-error=cpp -Wno-c++23-extensions)
target_compile_options(pomai_c_static PRIVATE -Werror -Wno-error=pedantic -Wno-error=cpp -Wno-c++23-extensions)
endif()
endif()

# =========================
Expand Down Expand Up @@ -269,6 +326,18 @@ if (POMAI_BUILD_TESTS)
pomai_setup_test(db_partial_search_test)
pomai_add_labeled_test(db_partial_search_test "integ")

add_executable(db_backpressure_test tests/integ/db_backpressure_test.cc)
pomai_setup_test(db_backpressure_test)
pomai_add_labeled_test(db_backpressure_test "integ")

add_executable(db_edge_workload_test tests/integ/db_edge_workload_test.cc)
pomai_setup_test(db_edge_workload_test)
pomai_add_labeled_test(db_edge_workload_test "integ")

add_executable(db_error_paths_test tests/integ/db_error_paths_test.cc)
pomai_setup_test(db_error_paths_test)
pomai_add_labeled_test(db_error_paths_test "integ")

add_executable(routing_engine_test tests/integ/routing_engine_test.cc)
pomai_setup_test(routing_engine_test)
pomai_add_labeled_test(routing_engine_test "integ")
Expand Down Expand Up @@ -356,6 +425,10 @@ if (POMAI_BUILD_TESTS)
pomai_setup_test(shard_runtime_tsan_test)
pomai_add_labeled_test(shard_runtime_tsan_test "tsan")

add_executable(backpressure_tsan_test tests/tsan/backpressure_tsan_test.cc)
pomai_setup_test(backpressure_tsan_test)
pomai_add_labeled_test(backpressure_tsan_test "tsan")

add_executable(basic_workload_tsan_test tests/tsan/basic_workload_tsan_test.cc)
pomai_setup_test(basic_workload_tsan_test)
pomai_add_labeled_test(basic_workload_tsan_test "tsan")
Expand Down
7 changes: 7 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,13 @@ For the smallest footprint on embedded devices:
2. **Slim palloc submodule** (saves ~6MB): after clone, run `./scripts/slim_palloc_submodule.sh` so `third_party/palloc` omits `media/`, `test/`, `bench/`, and `contrib/`.
3. **Optional sparse checkout of pomaidb**: for a production embedded build you can exclude `benchmarks/`, `examples/`, or `tools/` via your own sparse-checkout if you do not need them at build time.

### Edge deployments & failure semantics

For recommended settings on real edge devices (build flags, durability policies, backpressure, and how PomaiDB behaves on power loss), see:

- `docs/EDGE_DEPLOYMENT.md` — **edge-device configuration & failure behavior**
- `docs/FAILURE_SEMANTICS.md` — low-level WAL / manifest crash semantics

### Docker: run benchmarks

Build the image, then run benchmarks in constrained (IoT/Edge) or server-style containers:
Expand Down
25 changes: 24 additions & 1 deletion benchmarks/palloc_env_stress.cc
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,9 @@ void RunEnvB(EnvReport* report) {
size_t total_verified = 0;
bool all_ok = true;
int failed_cycle = -1;
double first_cycle_throughput = 0.0;
double last_cycle_throughput = 0.0;
double min_cycle_throughput = 0.0;

try {
for (int cycle = 0; cycle < num_cycles; ++cycle) {
Expand All @@ -245,13 +248,17 @@ void RunEnvB(EnvReport* report) {
IngestResult r = IngestAndVerify(path, per_cycle);
total_ingested += r.ingested;
total_verified += r.verified;
if (cycle == 0) first_cycle_throughput = r.throughput_vec_per_sec;
last_cycle_throughput = r.throughput_vec_per_sec;
if (r.throughput_vec_per_sec > 0.0 && (min_cycle_throughput == 0.0 || r.throughput_vec_per_sec < min_cycle_throughput))
min_cycle_throughput = r.throughput_vec_per_sec;
if (r.verified != per_cycle || r.ingested != per_cycle) {
all_ok = false;
if (failed_cycle < 0) failed_cycle = cycle;
}
if (cycle == 0) rss_after_first = GetPeakRssBytes();
rss_after_last = GetPeakRssBytes();
printf("ingested=%zu verified=%zu\n", r.ingested, r.verified);
printf("ingested=%zu verified=%zu %.1f Vec/s\n", r.ingested, r.verified, r.throughput_vec_per_sec);
fflush(stdout);
}
} catch (const std::exception& e) {
Expand Down Expand Up @@ -281,6 +288,18 @@ void RunEnvB(EnvReport* report) {
if (total_ingested > 0 && elapsed_ns > 0)
report->throughput_vec_per_sec = static_cast<double>(total_ingested) * 1e9 / static_cast<double>(elapsed_ns);

// Report per-cycle ingestion rate so we can check it does not degrade over time (constant vector size).
if (num_cycles > 0 && first_cycle_throughput > 0.0) {
printf(" Per-cycle ingestion (Vec/s): first=%.1f last=%.1f min=%.1f",
first_cycle_throughput, last_cycle_throughput, min_cycle_throughput);
const double ratio = (first_cycle_throughput > 0.0) ? (last_cycle_throughput / first_cycle_throughput) : 0.0;
if (ratio < 0.75)
printf(" [WARN: last cycle %.0f%% of first — ingestion rate reduced over time]\n", ratio * 100.0);
else
printf(" [OK: rate stable]\n");
fflush(stdout);
}

if (!all_ok) {
report->passed = 0;
static std::string fail_msg;
Expand All @@ -299,6 +318,10 @@ void RunEnvB(EnvReport* report) {
if (growth <= 0.15) {
report->passed = 1;
report->message = "Peak RSS stable (no leak); all cycles verified";
if (first_cycle_throughput > 0.0 && last_cycle_throughput >= 0.75 * first_cycle_throughput)
report->message = "Peak RSS stable (no leak); all cycles verified; ingestion rate stable";
else if (first_cycle_throughput > 0.0 && last_cycle_throughput < 0.75 * first_cycle_throughput)
report->message = "Peak RSS stable (no leak); all cycles verified; WARN: ingestion rate degraded over cycles";
} else {
report->passed = 0;
report->message = "FAIL: RSS growth suggests leak";
Expand Down
184 changes: 184 additions & 0 deletions docs/EDGE_DEPLOYMENT.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,184 @@
## PomaiDB on Edge Devices: Recommended Settings & Failure Semantics

PomaiDB is designed first for embedded / edge workloads: single-process, local storage, constrained memory, and frequent power loss. This guide summarizes **recommended configuration presets** and **what happens on failure** so you can reason about behavior on devices like Raspberry Pi, Jetson, or custom ARM boards.

This document focuses on the **embedded `pomai::Database` API** (single-instance engine) and the **sharded `pomai::DB` API** where relevant.

---

### 1. Build profile and compiler settings

- **Edge build profile (size-optimized):**
- Configure CMake with:
- `-DPOMAI_EDGE_BUILD=ON` (enables `-Os -g0` and other size-focused flags)
- `-DCMAKE_BUILD_TYPE=Release`
- Recommended for production firmware images and containers where binary size and cold-start latency matter more than debug info.

- **Strict warnings for development and CI:**
- Enable:
- `-DPOMAI_STRICT=ON`
- This turns most compiler warnings into errors for PomaiDB’s own code while keeping vendored dependencies (HNSW, SIMD kernels) lenient.
- Safe to combine with `POMAI_EDGE_BUILD` once your toolchain is stable; it helps surface misconfigurations early.

---

### 2. Storage and durability settings

PomaiDB stores all data under a **single directory** on local storage (e.g., SD card, eMMC, SSD).

- **Filesystem & mount:**
- Prefer **ext4** or another journaling filesystem with barriers enabled.
- Avoid network filesystems for embedded use; PomaiDB assumes low-latency local I/O.

- **Durability via `FsyncPolicy`:**
- For the sharded `pomai::DB` API (`pomai::DBOptions`):
- `FsyncPolicy::kNever`:
- Best for **cache-like or reconstructible** data.
- Power loss may drop recent writes still in OS buffers, but on-disk data remains self-consistent.
- `FsyncPolicy::kAlways`:
- Recommended when **data must survive power loss** and write rates are modest.
- Every WAL / manifest commit is fsynced; expect higher latency but strong durability.
- For the embedded `pomai::Database` API (`pomai::EmbeddedOptions`):
- Use `EmbeddedOptions::fsync` in the same way.
- On intermittently powered devices, prefer `kAlways` for critical logs and `kNever` where data can be rebuilt.

- **Flush vs. Freeze:**
- `Flush()` ensures the **WAL is pushed to disk** according to `FsyncPolicy`.
- `Freeze()` moves the current memtable into an on-disk **segment** and updates manifests.
- On edge devices, a common pattern from an event loop or watchdog is:
- Periodically call `Flush()` and `Freeze()` on a timer (e.g., every N seconds) or after M ingests.
- On clean shutdown, issue `Flush()` and `Freeze()` before `Close()`.

For the detailed atomic commit protocol and WAL / manifest guarantees, see `docs/FAILURE_SEMANTICS.md`.

---

### 3. Memory limits and backpressure (embedded `pomai::Database`)

`pomai::Database` exposes **explicit backpressure controls** in `EmbeddedOptions`:

- **Key fields:**
- `max_memtable_mb`:
- Hard cap for the memtable (in MiB). `0` = use environment or default:
- Default is tuned for edge and may differ between low-memory and normal builds.
- `pressure_threshold_percent`:
- Soft threshold (percent of `max_memtable_mb`) where pressure handling kicks in. `0` = default (typically 80%).
- `auto_freeze_on_pressure`:
- If `true`, when the memtable exceeds the pressure threshold, the engine will **call `Freeze()` internally** rather than returning an error.
- `memtable_flush_threshold_mb`:
- Absolute size in MiB where `auto_freeze_on_pressure` triggers, overriding the percentage. `0` = derive from `pressure_threshold_percent`.

- **Recommended presets for edge:**
- **Tiny devices (≤ 256 MiB RAM):**
- `max_memtable_mb = 32`–`64`
- `pressure_threshold_percent = 70`–`80`
- `auto_freeze_on_pressure = true`
- `memtable_flush_threshold_mb = 32` (optional override)
- **Moderate devices (512 MiB – 1 GiB RAM):**
- `max_memtable_mb = 128`–`256`
- `pressure_threshold_percent = 80`
- `auto_freeze_on_pressure = true` (recommended) or `false` if you want manual control via `TryFreezeIfPressured()`.

- **Environment overrides:**
- The embedded engine also honors:
- `POMAI_MAX_MEMTABLE_MB` – caps memtable size if `max_memtable_mb` is `0`.
- `POMAI_MEMTABLE_PRESSURE_THRESHOLD` – overrides `pressure_threshold_percent` for defaults.
- `POMAI_BENCH_LOW_MEMORY` – switches to lower default memtable sizes for benchmarks / tests.

- **Operational pattern:**
- In a single-threaded event loop, the typical pattern is:
- Call `AddVector()` / `AddVectorBatch()` for ingestion.
- Periodically call `TryFreezeIfPressured()` to keep memory use bounded.
- Inspect `GetMemTableBytesUsed()` for metrics / logging.

---

### 4. Index and quantization presets for low memory

PomaiDB’s `IndexParams` exposes presets tuned for edge workloads:

- **Use `IndexParams::ForEdge()` wherever possible:**
- In `EmbeddedOptions`:
- `opt.index_params = pomai::IndexParams::ForEdge();`
- This preset reduces:
- IVF list count (`nlist`), probes (`nprobe`),
- HNSW degree / ef parameters,
- and other memory-heavy knobs.
- The goal is to keep index RAM usage predictable while still providing reasonable recall.

- **Distance metric:**
- For most embedding-style workloads on edge devices:
- Use `MetricType::kL2` (squared L2) with SQ8 or FP16 quantization for compact storage.
- `MetricType::kInnerProduct` is also supported but may be more sensitive to quantization.

- **Quantization knobs (when applicable):**
- Prefer SQ8 or FP16 quantization where your model tolerates some loss, especially for:
- Large corpora on devices with ≤ 512 MiB RAM.
- Scenarios where on-disk size is heavily constrained (e.g., SD cards with many tenants).

---

### 5. Failure semantics on edge devices

PomaiDB is built to **fail closed** rather than risking silent corruption. High-level behaviors (see `docs/FAILURE_SEMANTICS.md` for details):

- **On `Open()` (embedded `Database::Open` / sharded `DB::Open`):**
- Invalid configuration (e.g., `dim == 0`, empty `path`) returns:
- `Status::InvalidArgument`.
- Filesystem errors (permissions, missing dirs that cannot be created) return:
- `Status::IOError`.
- WAL or manifest corruption:
- The engine attempts to **replay or recover**.
- If recovery is not possible, `Open()` returns a non-OK `Status` (e.g., `Corruption`, `Aborted`, or `Internal` depending on context) and **does not start** the engine.

- **During ingestion / search:**
- **Backpressure (embedded engine):**
- If the memtable exceeds `max_memtable_mb` and `auto_freeze_on_pressure` is `false`:
- `AddVector` / `AddVectorBatch` will return `Status::ResourceExhausted` with a message instructing callers to `Freeze()` or `TryFreezeIfPressured()`.
- If `auto_freeze_on_pressure` is `true`:
- The engine attempts to `Freeze()` internally once pressure is detected.
- If freeze fails (e.g., I/O error), the operation returns the corresponding failure `Status`.
- **I/O failures (ENOSPC, EIO, etc.):**
- Write failures on WAL / segments propagate as:
- `Status::IOError` or `Status::Aborted` / `Status::Internal`, depending on the layer.
- After a serious I/O error, affected shards / the embedded engine will refuse further operations until reopened, to avoid compounding corruption.

- **Crash and restart behavior:**
- On restart, both APIs:
- Re-open WALs and attempt **replay up to the last valid record**.
- Validate manifests and segment files; fall back from `manifest.current` to `manifest.prev` if needed.
- Tests such as `recovery_test`, `manifest_corruption_test`, and WAL corruption scenarios validate the following guarantees:
- No silent acceptance of corrupted manifests or WAL segments.
- Either **recover to a consistent state** (possibly losing a tail of recent writes) or **fail to open** with a non-OK `Status`.

---

### 6. Operational recommendations for real devices

- **Choose a failure policy per device class:**
- For sensor nodes with upstream replicas:
- Prefer `FsyncPolicy::kNever`, small `max_memtable_mb`, and `auto_freeze_on_pressure = true`.
- Rely on upstream for long-term durability.
- For gateway / aggregation devices:
- Prefer `FsyncPolicy::kAlways` for critical data.
- Use `IndexParams::ForEdge()` and conservative `max_memtable_mb` to bound RAM.

- **Integrate health checks:**
- Treat **any non-OK `Status` from `Open()`** as a signal to:
- Log and raise an alert.
- Potentially rotate to a new storage path or device.
- Monitor:
- `GetMemTableBytesUsed()`
- Open / search error codes (e.g., `ResourceExhausted`, `IOError`, `Corruption`).

- **Test on your actual target:**
- Run the existing integration, TSAN, and crash tests on:
- Your device type, filesystem, and kernel.
- Perform your own chaos test:
- Ingest + `Flush()` / `Freeze()` loop.
- Physically cut power or kill the process.
- Verify that:
- `Open()` either succeeds with intact historical data or fails with a clear error code.

These guidelines are intentionally conservative: they aim to keep your edge deployments safe even under frequent power loss and tight memory budgets.

Loading
Loading