diff --git a/Cargo.lock b/Cargo.lock index 69bc163b..90a95fe1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -205,13 +205,14 @@ checksum = "5e764a1d40d510daf35e07be9eb06e75770908c27d411ee6c92109c9840eaaf7" [[package]] name = "bitdex-v2" -version = "1.0.71" +version = "1.0.93" dependencies = [ "arc-swap", "axum", "bytes", "chrono", "clap", + "crc32fast", "criterion", "crossbeam-channel", "dashmap", @@ -229,10 +230,13 @@ dependencies = [ "rpmalloc", "serde", "serde_json", + "serde_yaml", "sqlx", "tar", "tempfile", "thiserror 2.0.18", + "tikv-jemalloc-ctl", + "tikv-jemallocator", "tokio", "tokio-util", "toml", @@ -1513,6 +1517,12 @@ dependencies = [ "windows-link", ] +[[package]] +name = "paste" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" + [[package]] name = "pem-rfc7468" version = "0.7.0" @@ -2241,6 +2251,19 @@ dependencies = [ "serde", ] +[[package]] +name = "serde_yaml" +version = "0.9.34+deprecated" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a8b1a1a2ebf674015cc02edccce75287f1a0130d394307b36743c2f5d504b47" +dependencies = [ + "indexmap", + "itoa", + "ryu", + "serde", + "unsafe-libyaml", +] + [[package]] name = "sha1" version = "0.10.6" @@ -2675,6 +2698,37 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "tikv-jemalloc-ctl" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "661f1f6a57b3a36dc9174a2c10f19513b4866816e13425d3e418b11cc37bc24c" +dependencies = [ + "libc", + "paste", + "tikv-jemalloc-sys", +] + +[[package]] +name = "tikv-jemalloc-sys" +version = "0.6.1+5.3.0-1-ge13ca993e8ccb9ba9847cc330696e02839f328f7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd8aa5b2ab86a2cefa406d889139c162cbb230092f7d1d7cbc1716405d852a3b" +dependencies = [ + "cc", + "libc", +] + +[[package]] +name = "tikv-jemallocator" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0359b4327f954e0567e69fb191cf1436617748813819c94b8cd4a431422d053a" +dependencies = [ + "libc", + "tikv-jemalloc-sys", +] + [[package]] name = "tinystr" version = "0.8.2" @@ -2971,6 +3025,12 @@ version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" +[[package]] +name = "unsafe-libyaml" +version = "0.2.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "673aac59facbab8a9007c7f6108d11f63b603f7cabff99fabf650fea5c32b861" + [[package]] name = "untrusted" version = "0.9.0" diff --git a/Cargo.toml b/Cargo.toml index 818ce2b7..37f3f389 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,9 +15,10 @@ default = [] server = ["dep:axum", "dep:tower-http", "dep:tokio", "dep:tokio-util", "dep:prometheus"] loadtest = ["ureq"] replay = ["ureq"] -pg-sync = ["dep:sqlx", "dep:clap", "dep:reqwest", "dep:chrono", "dep:tokio", "dep:axum", "dep:tower-http", "dep:futures-core", "dep:futures-util", "dep:bytes"] +pg-sync = ["dep:sqlx", "dep:clap", "dep:reqwest", "dep:chrono", "dep:tokio", "dep:axum", "dep:tower-http", "dep:futures-core", "dep:futures-util", "dep:bytes", "dep:serde_yaml"] simd = ["roaring/simd"] heap-prof = ["dep:tikv-jemallocator", "dep:tikv-jemalloc-ctl"] +serde_yaml = ["dep:serde_yaml"] [dependencies] # Bitmap indexes @@ -85,6 +86,7 @@ thiserror = "2" # Logging tracing = "0.1" tracing-subscriber = { version = "0.3", features = ["env-filter"] } +serde_yaml = { version = "0.9.34", optional = true } [dev-dependencies] # Property-based testing diff --git a/docs/design/pg-sync-v2-final.md b/docs/design/pg-sync-v2-final.md new file mode 100644 index 00000000..58ebb286 --- /dev/null +++ b/docs/design/pg-sync-v2-final.md @@ -0,0 +1,494 @@ +# BitDex Sync V2 — Final Design + +> Distilled from the [working design doc](pg-sync-v2.md) (Justin + Adam, 2026-03-25). + +## Problem + +The V1 outbox poller is 80M rows behind and can never catch up. Each cycle polls 5,000 rows from `BitdexOutbox`, then runs 5 enrichment queries per batch (images + tags + tools + techniques + resources) to assemble full JSON documents before PATCHing to BitDex. At ~2,500 changes/s with enrichment as the bottleneck, steady-state write volume exceeds processing capacity. + +## Solution + +Replace the "notify then re-fetch" pattern with **self-contained ops**. PG triggers encode the exact delta (old value, new value, field name) directly into a JSONB ops array. No enrichment queries, no full-document assembly. BitDex applies ops as direct bitmap mutations. + +--- + +## Architecture + +``` +PG trigger fires + → INSERT one row into BitdexOps (entity_id, JSONB ops array) + → pg-sync polls BitdexOps, deduplicates, POSTs batch to BitDex + → BitDex /ops endpoint appends to local WAL file, returns 200 + → WAL reader thread processes ops → bitmap mutations via coalescer +``` + +### BitdexOps Table + +```sql +CREATE TABLE IF NOT EXISTS "BitdexOps" ( + id BIGSERIAL PRIMARY KEY, + entity_id BIGINT NOT NULL, + ops JSONB NOT NULL, + created_at TIMESTAMPTZ DEFAULT now() +); +CREATE INDEX idx_bitdex_ops_id ON "BitdexOps" (id); +``` + +Each row contains a JSONB array of ops. Triggers include both old and new values so BitDex can update bitmaps without reading the docstore. + +### Op Types + +| Op | Example | Bitmap Action | +|----|---------|---------------| +| `set` | `{"op":"set","field":"nsfwLevel","value":16}` | Set bit in value bitmap | +| `remove` | `{"op":"remove","field":"nsfwLevel","value":8}` | Clear bit from value bitmap | +| `add` | `{"op":"add","field":"tagIds","value":42}` | Set bit in multi-value bitmap | +| `delete` | `{"op":"delete"}` | Clear all filter/sort bits + alive bit | +| `queryOpSet` | See [Fan-Out](#fan-out-via-queryopset) | Query-resolved bulk bitmap ops | + +**No `full` op type.** INSERTs emit individual `set` ops for each field (all additive, no `remove` since there's no prior state). One format for everything. + +### Op Examples + +**Image UPDATE** (nsfwLevel 8→16, type stays same): +```json +[ + {"op": "remove", "field": "nsfwLevel", "value": 8}, + {"op": "set", "field": "nsfwLevel", "value": 16} +] +``` + +**Image INSERT** (new image): +```json +[ + {"op": "set", "field": "nsfwLevel", "value": 1}, + {"op": "set", "field": "type", "value": "image"}, + {"op": "set", "field": "userId", "value": 12345}, + {"op": "set", "field": "sortAt", "value": 1711234567} +] +``` + +**Tag added:** +```json +[{"op": "add", "field": "tagIds", "value": 42}] +``` + +**Image deleted:** +```json +[{"op": "delete"}] +``` + +--- + +## Fan-Out via queryOpSet + +Fan-out tables (ModelVersion, Post, Model) don't produce per-image ops in the trigger. Instead, they emit a single `queryOpSet` op that tells BitDex to resolve affected slots from its own bitmaps. + +**ModelVersion baseModel change:** +```json +[{"op": "queryOpSet", "query": "modelVersionIds eq 456", "ops": [ + {"op": "remove", "field": "baseModel", "value": "SD 1.5"}, + {"op": "set", "field": "baseModel", "value": "SDXL"} +]}] +``` + +BitDex looks up the `modelVersionIds=456` bitmap, gets all affected slots, applies two bulk bitmap operations (`andnot` old + `or` new). A 15M-image fan-out completes in microseconds — no per-image ops, no PG queries. + +**Model POI change** (needs MV ids from PG first): +```json +[{"op": "queryOpSet", "query": "modelVersionIds in [101, 102, 103]", "ops": [ + {"op": "set", "field": "poi", "value": true} +]}] +``` + +The trigger uses `jsonb_agg` to collect MV ids: `SELECT jsonb_agg(id) FROM ModelVersion WHERE modelId = NEW.id`. BitDex ORs the MV bitmaps together, then applies the ops. + +**Post publishedAt change:** +```json +[{"op": "queryOpSet", "query": "postId eq 789", "ops": [ + {"op": "remove", "field": "publishedAt", "value": 1711000000}, + {"op": "set", "field": "publishedAt", "value": 1711234567} +]}] +``` + +### Fan-Out Scale (measured 2026-03-25) + +| Metric | Value | +|--------|-------| +| ImageResourceNew rows | ~375M | +| Top ModelVersion (290640) | ~15.1M images | +| Top 5 ModelVersions | 18.6% of all rows | +| p50 images/MV | 1 | +| p90 images/MV | 5 | +| p99 images/MV | 53 | + +The distribution is extremely heavy-tailed. 99% of fan-outs are trivial. The queryOpSet approach handles even the 15M-image worst case as two bitmap operations. + +--- + +## Trigger Configuration (YAML) + +pg-sync generates trigger SQL from a declarative YAML config. Two table types: + +### Direct Tables (slot = PG column) + +```yaml +sync_sources: + - table: Image + slot_field: id + track_fields: [nsfwLevel, type, userId, postId, minor, poi, hideMeta, meta, blockedFor] + on_delete: delete_slot + + - table: TagsOnImageNew + slot_field: imageId + field: tagIds + value_field: tagId + + - table: ImageTool + slot_field: imageId + field: toolIds + value_field: toolId + + - table: ImageTechnique + slot_field: imageId + field: techniqueIds + value_field: techniqueId + + - table: CollectionItem + slot_field: imageId + field: collectionIds + value_field: collectionId + filter: "status = 'ACCEPTED' AND \"imageId\" IS NOT NULL" +``` + +- `slot_field`: PG column that maps to the BitDex slot ID +- `track_fields`: Scalar columns — trigger emits `remove`/`set` pairs using `IS DISTINCT FROM` +- `field` + `value_field`: Multi-value join tables — INSERT = `add`, DELETE = `remove` +- `on_delete`: `delete_slot` emits a `{"op":"delete"}` op + +### Fan-Out Tables (slots resolved by BitDex query) + +```yaml + - table: ModelVersion + query: "modelVersionIds eq {id}" + track_fields: [baseModel] + + - table: Post + query: "postId eq {id}" + track_fields: [publishedAt, availability] + + - table: Model + query: "modelVersionIds in {modelVersionIds}" + query_source: "SELECT jsonb_agg(id) as \"modelVersionIds\" FROM \"ModelVersion\" WHERE \"modelId\" = {id}" + track_fields: [poi] +``` + +- `query`: BitDex query template. `{column}` placeholders are substituted from `NEW` columns. +- `query_source`: Optional PG subquery for values not on the triggering table. Returns named columns that feed into `query` placeholders. +- No `slot_field` — slots come from the BitDex query result. + +### Trigger Reconciliation + +Trigger naming: `bitdex_{table}_{hash8}` where `hash8` is the first 8 chars of SHA256 of the function body. On startup, pg-sync: + +1. Generates trigger SQL from config +2. Queries `pg_trigger WHERE tgname LIKE 'bitdex_%'` +3. Hash matches → skip. Hash differs → `CREATE OR REPLACE`. Table not in config → `DROP TRIGGER`. + +Config is the source of truth. pg-sync reconciles PG state to match. + +--- + +## WAL-Backed Ops Endpoint + +### Ingestion + +`POST /api/indexes/{name}/ops` receives ops from pg-sync, appends to a local WAL file, returns 200. Zero processing on the HTTP path — just fsync and acknowledge. + +```json +{ + "ops": [ + {"entity_id": 123, "ops": [{"op": "add", "field": "tagIds", "value": 42}]}, + {"entity_id": 456, "ops": [{"op": "set", "field": "nsfwLevel", "value": 16}]} + ], + "meta": { + "source": "pg-sync-default", + "cursor": 420000000, + "max_id": 500000000, + "lag_rows": 80000000 + } +} +``` + +No cursor management — pg-sync owns its cursor in PG (`bitdex_cursors` table). The `meta` field carries lag metrics for Prometheus exposition. + +### WAL Processing + +A dedicated reader thread tails the WAL file, reads batches, deduplicates, and submits mutations to the coalescer. + +- Append-only files, one per generation: `ops_000001.wal`, `ops_000002.wal`, ... +- Reader maintains a persisted byte-offset cursor +- Size-based rotation (e.g., 100MB), old generations deleted after processing +- Format: `[4-byte len][entity_id: i64][ops: JSONB bytes][CRC32]` — same pattern as ShardStore/BucketDiffLog +- Crash recovery: resume from persisted cursor in current generation + +### Op Deduplication + +Two-layer dedup using a shared `dedup_ops()` helper: + +1. **pg-sync side**: LIFO dedup per `(entity_id, field)` + add/remove cancellation. Reduces batch before sending. +2. **WAL reader side**: Same dedup on WAL batch. Catches cross-poll duplicates. + +`full` ops are decomposed into individual `set` ops by pg-sync before dedup — `full` is not a special case in the processing pipeline. + +BitDex skips ops for fields not in its index config. Stale triggers that emit ops for removed fields are harmless. + +--- + +## Observability + +### Prometheus Metrics + +Unified `bitdex_sync_*` namespace with `source` label: + +``` +bitdex_sync_cursor_position{source="pg-sync-default"} 420000000 +bitdex_sync_max_id{source="pg-sync-default"} 500000000 +bitdex_sync_lag_rows{source="pg-sync-default"} 80000000 +bitdex_sync_cycle_duration_seconds{source="pg-sync-default"} 0.05 +bitdex_sync_cycle_rows{source="pg-sync-default"} 4850 +bitdex_sync_wal_pending_bytes 1048576 +bitdex_sync_wal_generation 3 +``` + +### Lag Endpoint + +`GET /api/internal/sync-lag` — returns latest `meta` from each sync source. + +Metrics are bundled with the ops payload — no separate reporting call. + +--- + +## Deployment + +### Binary + +Rename `bitdex-pg-sync` → `bitdex-sync` with subcommands: +- `bitdex-sync pg --config sync.toml` — PG ops poller +- `bitdex-sync ch --config sync.toml` — ClickHouse metrics poller +- `bitdex-sync all --config sync.toml` — both (default for K8s sidecar) + +Single sidecar container, concurrent tokio tasks. + +### ClickHouse + +Stays separate and simple. Polls CH for aggregate counts (reactionCount, commentCount, collectedCount), pushes to BitDex ops endpoint. Not config-driven — the CH query is domain-specific. + +### Migration Plan + +1. Build V2: BitdexOps table, YAML-driven triggers, ops poller, WAL endpoint, queryOpSet, dump pipeline +2. Boot pod — pg-sync auto-detects empty BitDex, runs table dumps, transitions to steady-state +3. Done. No manual intervention. V1 code stays in repo, unused. + +No incremental migration, no shadow mode, no V1 fixes. No manual pod teardown/reload dance. + +--- + +## Unified Load Pipeline + +### Responsibility Split + +**pg-sync (sidecar)** is a thin data mover: +- `COPY FROM` PG → write CSV to shared volume +- Signal BitDex that a CSV is ready (`POST /dumps/{name}/loaded`) +- Poll BitdexOps outbox → `POST /ops` batches (steady-state) +- Manage cursor in PG (`bitdex_cursors` table) + +**BitDex (server)** owns all processing: +- On dump signal: read CSV → parse → ops → AccumSink → bitmap accumulation (direct path, ~367K images/s) +- On `/ops` POST: append to WAL → WAL reader → CoalescerSink → coalescer channel (steady-state) +- YAML sync config awareness: field mapping, value conversion, bit decomposition +- All indexing logic: BitmapSink trait, FieldMeta, value_to_bitmap_key, value_to_sort_u32 + +pg-sync never generates ops, never touches bitmaps, never writes WAL. The sync config (`sync.yaml`) is read by both: pg-sync uses it for `COPY` column selection and trigger generation, BitDex uses it for CSV→ops field mapping. + +### Boot Sequence + +``` +K8s starts pod (BitDex server + bitdex-sync sidecar) + → bitdex-sync waits for BitDex health check + → Capture max(BitdexOps.id) as pre_dump_cursor + → GET /api/indexes/{name}/dumps — check dump history + → For each sync_source not yet dumped: + 1. PUT /api/indexes/{name}/dumps — register dump + 2. COPY table from PG → write CSV to shared volume + 3. POST /api/indexes/{name}/dumps/{name}/loaded — "CSV is ready" + 4. BitDex reads CSV directly, parses → AccumSink → bitmaps + 5. BitDex saves bitmaps to ShardStore, unloads from memory + → Seed cursor at pre_dump_cursor (not current max — catches dump-window ops) + → Transition to steady-state ops polling + → K8s readiness probe flips to 200, traffic starts routing +``` + +No manual intervention. No WAL for dumps. No serialization overhead. Just boot and it works. + +### Dump Endpoints + +``` +GET /api/indexes/{name}/dumps — list dump history +PUT /api/indexes/{name}/dumps — register new dump → task ID +POST /api/indexes/{name}/dumps/{name}/loaded — signal dump file complete +DELETE /api/indexes/{name}/dumps/{name} — remove from history +DELETE /api/indexes/{name}/dumps — clear all history +GET /api/tasks/{task_id} — poll dump processing status (existing) +``` + +### Dump Identity and Change Detection + +Dump names include a config hash: `Image-a1b2c3d4`. pg-sync constructs the name from the table name + hash of that table's YAML config. If the config changes (add a field to `track_fields`), the hash changes, the name doesn't match existing dumps, and pg-sync auto-re-dumps. BitDex treats dump names as opaque strings. + +### Table Ordering + +No JOINs on large tables. Each table dumps flat. + +1. **Image** — flat COPY. Produces `existedAt` via `GREATEST(scannedAt, createdAt)` expression in `track_fields`. +2. **TagsOnImageNew, ImageTool, ImageTechnique, CollectionItem, ImageResourceNew** — flat COPYs, can run in parallel. +3. **Post** — flat COPY (id, publishedAt, availability). Depends on Image being loaded first. Uses `queryOpSet "postId eq {id}"` to set fields on image slots. +4. **ModelVersion** — flat COPY (small table, <1M rows, JOINs fine). Sets baseModel via `queryOpSet`. +5. **ClickHouse metrics** — separate dump via ch-sync. + +### Dump Processing Mode + +Dump processing bypasses the WAL, coalescer, and flush thread entirely. BitDex reads the CSV directly and processes via `AccumSink` → `BitmapAccum` → `apply_accum()`: + +1. CSV rows parsed in-process (`parse_image_row`, `parse_tag_row`, etc.) +2. Each row → ops → `BitmapSink::filter_insert()` / `sort_set()` / `alive_insert()` +3. `AccumSink` inserts directly into `BitmapAccum` (HashMap-backed bitmap accumulator) +4. After all rows: `engine.apply_accum(&accum)` merges bitmaps into staging via OR +5. Save bitmaps to ShardStore, unload from memory +6. Lazy load on first query (existing `ensure_fields_loaded()` path) + +This matches the single-pass loader's throughput: **367K images/s at 1M scale** (vs 345K/s single-pass baseline). No serialization, no WAL I/O, no channel overhead. + +The `creates_slot` flag on `EntityOps` controls alive bit management: +- Image table CSVs: `creates_slot: true` → sets alive bit +- Join table CSVs (tags, tools): `creates_slot: false` → only adds filter bitmaps + +Peak memory: one table's bitmaps at a time. K8s readiness probe returns 503 during dumps (health probe stays 200). Traffic routes only after all dumps complete. + +### Prerequisite: Computed Sort Fields + +`sortAt = GREATEST(existedAt, publishedAt)` requires BitDex to compute sort values from multiple source fields. `existedAt` comes from Image dumps, `publishedAt` comes from Post dumps — they arrive at different times. BitDex must recompute `sortAt` whenever either source changes. + +This is a separate feature tracked in [computed-sort-fields.md](computed-sort-fields.md). + +--- + +## Throughput + +| | V1 | V2 (measured) | +|---|---|---| +| Enrichment queries | 5 per batch | 0 | +| Dump throughput (images) | ~70K/s (single-pass) | **367K/s** (direct AccumSink) | +| Dump throughput (tags) | — | **2.6M/s** (direct AccumSink) | +| Steady-state throughput | ~2,500 changes/s | 2,700 ops/s (CoalescerSink) | +| Fan-out cost (15M images) | 15M enrichment queries | 2 bitmap ops | +| WAL-backed dump (if needed) | — | 41K ops/s | + +Dump mode at 367K images/s processes 107M images in ~4.9 minutes (image table only). +Steady-state 2,700 ops/s provides 1.1x headroom over peak traffic (~2,500 changes/s). +The WAL path exists for steady-state durability; dumps skip it entirely for throughput. + +--- + +## Design Review Findings (2026-03-25) + +Architectural review identified 17 issues. Resolutions agreed with Justin: + +### Cursor Gap (Critical — C1) + +PG triggers fire into `BitdexOps` while dumps run. If we seed the cursor at `max(BitdexOps.id)` AFTER dumps, ops generated during the dump window are skipped. + +**Resolution:** Capture `max(BitdexOps.id)` BEFORE starting dumps. Seed cursor at that pre-dump value. pg-sync re-processes some overlapping ops (idempotent — set/remove are self-correcting). Updated boot sequence: + +``` +→ Capture max(BitdexOps.id) as pre_dump_cursor +→ Run dumps... +→ Seed cursor at pre_dump_cursor (not current max) +→ Start steady-state polling — catches all ops from dump window +``` + +### queryOpSet Race (Critical — C2) + +Between bitmap lookup and op application, concurrent mutations could change the resolved slot set. A new image gaining MV 456 during a baseModel cascade could be missed. + +**Resolution:** Snapshot-level isolation is acceptable. The next steady-state trigger on the missed image corrects the state. The consistency window is bounded by the poll interval (~2s). Document this as eventual consistency, not serializability. + +### Delete Ops + Docstore Read (High — H1) + +Delete ops carry no old values, so BitDex must read the docstore to know which bitmaps to clear (clean delete principle). + +**Resolution:** Deletes are infrequent — docstore read is acceptable for this case. This is the one op type that requires a docstore read. Doc cache makes it <1μs in the common case. The trigger can't easily emit all field values from `OLD` because multi-value fields (tags, tools) come from join tables, not the Image row. + +### WAL Partial Records (High — H3) + +Crash mid-write leaves truncated WAL record. + +**Resolution:** `POST /ops` returns 200 only after all records are written and fsynced. If crash happens before response, pg-sync doesn't advance its cursor and resends the batch. LIFO dedup on the WAL reader handles re-delivered ops. For dump WAL files, same approach: pg-sync only calls `/loaded` after the full file is written. + +### Alive Bit Management (Medium — M1) + +No op type explicitly sets the alive bit for new slots. + +**Resolution:** The Image table config gets a new property: `sets_alive: true`. Only the table marked `sets_alive` triggers alive bit setting on first `set` op for a non-alive slot. This prevents tags/tools from accidentally creating alive entries for non-existent images. Other tables' ops on non-alive slots are silently dropped. + +```yaml +- table: Image + slot_field: id + sets_alive: true # only this table can create new alive slots + track_fields: [...] +``` + +### Dump Ordering Dependency (Medium — M4) + +ImageResourceNew must complete before ModelVersion dump starts (MV queryOpSet needs `modelVersionIds` bitmaps). + +**Resolution:** Explicit dump phases: +1. Image +2. ImageResourceNew + tags + tools + techniques + collections (parallel) +3. Post + ModelVersion (parallel, both depend on step 2) +4. ClickHouse metrics + +### Docstore Writes for V2 Ops (Medium — M5) + +Each op must also write to the docstore (not just bitmaps) for document serving and computed field lookups. + +**Resolution:** Each op appends to the docstore via V2 tuple format: `DocSink.append(slot_id, field_idx, value)`. For `queryOpSet`, each affected slot gets a docstore write per field. Slot ID is always available from `entity_id` (direct ops) or from the query result set (queryOpSet). + +### `meta` Field Write Amplification (Low — L5) + +**Non-issue.** `hasMeta` and `onSite` are already precomputed as bit flags on the Image table (`flags` column — bit 13 = hasPrompt, bit 14 = madeOnSite, bit 2 = hideMeta). The COPY loader reads these directly via `CopyImageRow.has_meta()` and `.on_site()`. No raw `meta` JSONB tracking needed — `hasMeta` and `onSite` are plain boolean fields in `track_fields`, derived from flag bit changes. + +### queryOpSet entity_id Dedup (Low — L2) + +Multiple queryOpSets with `entity_id=0` would incorrectly deduplicate. + +**Resolution:** Use the source entity's ID (ModelVersion ID, Post ID) as `entity_id`. Dedup logic treats `queryOpSet` ops separately — dedup by `(entity_id, query)` not `(entity_id, field)`. + +--- + +## Files That Change + +| File | Change | +|------|--------| +| `src/pg_sync/queries.rs` | BitdexOps table SQL, `poll_ops_from_cursor()` | +| `src/pg_sync/ops_poller.rs` | **New** — V2 poller with dedup | +| `src/pg_sync/op_dedup.rs` | **New** — shared dedup helper | +| `src/pg_sync/trigger_gen.rs` | **New** — YAML config → trigger SQL generator | +| `src/pg_sync/dump.rs` | **New** — table dump pipeline (COPY → WAL writer) | +| `src/pg_sync/config.rs` | V2 config fields, YAML sync_sources, dump config | +| `src/bin/pg_sync.rs` | Rename to bitdex-sync, add subcommands | +| `src/server.rs` | `POST /ops` (WAL-backed), `GET /sync-lag`, dump endpoints | +| `src/ops_wal.rs` | **New** — WAL writer + reader thread (ops + dumps) | +| `src/pg_sync/bitdex_client.rs` | `post_ops()`, dump registration | +| `src/metrics.rs` | `bitdex_sync_*` Prometheus gauges | diff --git a/examples/validate_ops_pipeline.rs b/examples/validate_ops_pipeline.rs new file mode 100644 index 00000000..a040d9b8 --- /dev/null +++ b/examples/validate_ops_pipeline.rs @@ -0,0 +1,297 @@ +//! Validation harness for the Sync V2 ops pipeline. +//! +//! Tests both processing modes: +//! - **Dump mode** (default): AccumSink → direct bitmap accumulation (bulk loading path) +//! - **Steady-state mode** (--steady-state): CoalescerSink → coalescer channel (online path) +//! +//! Usage: +//! cargo run --example validate_ops_pipeline --features pg-sync -- \ +//! --csv-dir C:\Dev\Repos\open-source\bitdex-v2\data\load_stage \ +//! --limit 100000 +//! +//! # Steady-state mode (slower, tests the online write path): +//! cargo run --example validate_ops_pipeline --features pg-sync -- \ +//! --csv-dir data/load_stage --limit 10000 --steady-state + +use std::path::PathBuf; +use std::time::Instant; + +use bitdex_v2::concurrent_engine::ConcurrentEngine; +use bitdex_v2::config::Config; +use bitdex_v2::ops_processor::{apply_ops_batch, process_csv_dump_direct, process_wal_dump, FieldMeta}; +use bitdex_v2::ops_wal::WalReader; +use bitdex_v2::pg_sync::csv_ops::run_csv_dump; + +fn main() { + let args: Vec = std::env::args().collect(); + let csv_dir = get_arg(&args, "--csv-dir").unwrap_or_else(|| "data/load_stage".into()); + let limit: u64 = get_arg(&args, "--limit") + .map(|s| s.parse().unwrap_or(100_000)) + .unwrap_or(100_000); + let steady_state = args.iter().any(|a| a == "--steady-state"); + let direct = args.iter().any(|a| a == "--direct"); + + eprintln!("=== Sync V2 Pipeline Validation ==="); + eprintln!("CSV dir: {csv_dir}"); + eprintln!("Row limit: {limit}"); + let mode_str = if direct { "direct (CSV → AccumSink, no WAL)" } + else if steady_state { "steady-state (CoalescerSink)" } + else { "dump (WAL → AccumSink)" }; + eprintln!("Mode: {mode_str}"); + + // Create a temp directory for this validation run + let temp_dir = tempfile::TempDir::new().expect("Failed to create temp dir"); + let data_dir = temp_dir.path(); + let wal_path = data_dir.join("ops.wal"); + let bitmap_dir = data_dir.join("bitmaps"); + let docs_dir = data_dir.join("docs"); + std::fs::create_dir_all(&bitmap_dir).ok(); + std::fs::create_dir_all(&docs_dir).ok(); + + // Phase 1: CSV → WAL (skip for direct mode — it reads CSVs directly) + let csv_results: Vec<(String, bitdex_v2::pg_sync::csv_ops::CsvOpsStats)> = if direct { + eprintln!("\n--- Phase 1: Skipped (direct mode) ---"); + Vec::new() + } else { + eprintln!("\n--- Phase 1: CSV → WAL ---"); + let csv_start = Instant::now(); + let results = run_csv_dump( + &PathBuf::from(&csv_dir), + &wal_path, + 10_000, + Some(limit), + ) + .expect("CSV dump failed"); + + let csv_elapsed = csv_start.elapsed(); + let total_ops: u64 = results.iter().map(|(_, s)| s.ops_written).sum(); + let total_rows: u64 = results.iter().map(|(_, s)| s.rows_read).sum(); + eprintln!("\nCSV → WAL complete:"); + eprintln!(" Total rows: {total_rows}"); + eprintln!(" Total ops: {total_ops}"); + eprintln!(" Time: {:.2}s", csv_elapsed.as_secs_f64()); + eprintln!(" Throughput: {:.0} rows/s", total_rows as f64 / csv_elapsed.as_secs_f64().max(0.001)); + results + }; + let total_rows: u64 = csv_results.iter().map(|(_, s)| s.rows_read).sum(); + + // Phase 2: WAL → Engine + eprintln!("\n--- Phase 2: WAL → Engine ---"); + + // Load index config + let config_path = PathBuf::from(&csv_dir).parent().unwrap().join("indexes/civitai/config.json"); + let alt_config = PathBuf::from("data/indexes/civitai/config.json"); + let config_path = if config_path.exists() { + config_path + } else if alt_config.exists() { + alt_config + } else { + eprintln!("ERROR: Could not find config.json. Skipping engine validation."); + print_summary(&csv_results, None, None); + return; + }; + + let config_str = std::fs::read_to_string(&config_path).expect("Failed to read config.json"); + let index_def: serde_json::Value = serde_json::from_str(&config_str).expect("Failed to parse config.json"); + let config: Config = serde_json::from_value(index_def["config"].clone()).expect("Failed to parse engine config"); + + let meta = FieldMeta::from_config(&config); + + let mut engine_config = config.clone(); + engine_config.storage.bitmap_path = Some(bitmap_dir.clone()); + + if direct { + run_direct_mode(&engine_config, &docs_dir, &PathBuf::from(&csv_dir), limit, &csv_results); + } else if steady_state { + run_steady_state(&engine_config, &docs_dir, &wal_path, &meta, &csv_results, total_rows); + } else { + run_dump_mode(&engine_config, &config, &docs_dir, &wal_path, &meta, &csv_results, total_rows); + } +} + +fn run_direct_mode( + engine_config: &Config, + docs_dir: &std::path::Path, + csv_dir: &std::path::Path, + limit: u64, + csv_results: &[(String, bitdex_v2::pg_sync::csv_ops::CsvOpsStats)], +) { + let mut cfg = engine_config.clone(); + cfg.headless = true; + let engine = ConcurrentEngine::new_with_path(cfg, docs_dir) + .expect("Failed to create engine"); + + eprintln!(" Processing CSV directly (no WAL)..."); + let (total_applied, total_errors, elapsed) = + process_csv_dump_direct(&engine, csv_dir, 10_000, Some(limit)); + let alive = engine.alive_count(); + + eprintln!("\nDirect dump complete:"); + eprintln!(" Ops applied: {total_applied}"); + eprintln!(" Errors: {total_errors}"); + eprintln!(" Alive count: {alive}"); + eprintln!(" Time: {:.2}s", elapsed); + eprintln!(" Throughput: {:.0} ops/s", total_applied as f64 / elapsed.max(0.001)); + + validate_and_summarize(csv_results, alive, total_errors, 0); +} + +fn run_dump_mode( + engine_config: &Config, + _config: &Config, + docs_dir: &std::path::Path, + wal_path: &std::path::Path, + _meta: &FieldMeta, + csv_results: &[(String, bitdex_v2::pg_sync::csv_ops::CsvOpsStats)], + total_rows: u64, +) { + // Headless is fine for dump mode — we apply directly to staging, no flush thread needed + let mut cfg = engine_config.clone(); + cfg.headless = true; + let engine = ConcurrentEngine::new_with_path(cfg, docs_dir) + .expect("Failed to create engine"); + + eprintln!(" Processing WAL via dump mode (AccumSink)..."); + let (total_applied, total_errors, elapsed) = process_wal_dump(&engine, wal_path, 10_000); + let alive = engine.alive_count(); + + eprintln!("\nWAL → Engine complete (dump mode):"); + eprintln!(" Ops applied: {total_applied}"); + eprintln!(" Errors: {total_errors}"); + eprintln!(" Alive count: {alive}"); + eprintln!(" Time: {:.2}s", elapsed); + eprintln!(" Throughput: {:.0} ops/s", total_applied as f64 / elapsed.max(0.001)); + + validate_and_summarize(csv_results, alive, total_errors, total_rows); +} + +fn run_steady_state( + engine_config: &Config, + docs_dir: &std::path::Path, + wal_path: &std::path::Path, + meta: &FieldMeta, + csv_results: &[(String, bitdex_v2::pg_sync::csv_ops::CsvOpsStats)], + total_rows: u64, +) { + use bitdex_v2::ingester::CoalescerSink; + + // Non-headless — need flush thread to drain coalescer + let mut cfg = engine_config.clone(); + cfg.headless = false; + let engine = ConcurrentEngine::new_with_path(cfg, docs_dir) + .expect("Failed to create engine"); + + let wal_start = Instant::now(); + let mut reader = WalReader::new(wal_path, 0); + let mut total_applied = 0u64; + let mut total_errors = 0u64; + + loop { + let batch = reader.read_batch(10_000).expect("WAL read failed"); + if batch.entries.is_empty() { + break; + } + let sender = engine.mutation_sender(); + let mut sink = CoalescerSink::new(sender); + let mut entries = batch.entries; + let (applied, _skipped, errors) = apply_ops_batch( + &mut sink, meta, &mut entries, Some(&engine), + ); + total_applied += applied as u64; + total_errors += errors as u64; + } + + // Wait for flush thread to drain + eprintln!(" Waiting for flush thread to drain..."); + let drain_start = Instant::now(); + loop { + let pending = engine.flush_queue_depth(); + if pending == 0 { break; } + if drain_start.elapsed().as_secs() > 30 { + eprintln!(" WARN: flush thread still has {pending} pending ops after 30s"); + break; + } + std::thread::sleep(std::time::Duration::from_millis(50)); + } + eprintln!(" Flush drain: {:.1}s", drain_start.elapsed().as_secs_f64()); + + let wal_elapsed = wal_start.elapsed(); + let alive = engine.alive_count(); + + eprintln!("\nWAL → Engine complete (steady-state):"); + eprintln!(" Ops applied: {total_applied}"); + eprintln!(" Errors: {total_errors}"); + eprintln!(" Alive count: {alive}"); + eprintln!(" Time: {:.2}s", wal_elapsed.as_secs_f64()); + eprintln!(" Throughput: {:.0} ops/s", total_applied as f64 / wal_elapsed.as_secs_f64().max(0.001)); + + validate_and_summarize(csv_results, alive, total_errors, total_rows); +} + +fn validate_and_summarize( + csv_results: &[(String, bitdex_v2::pg_sync::csv_ops::CsvOpsStats)], + alive: u64, + total_errors: u64, + total_rows: u64, +) { + eprintln!("\n--- Phase 3: Validation ---"); + let mut pass = true; + + if alive == 0 { + eprintln!(" FAIL: alive count is 0 — no documents loaded"); + pass = false; + } else { + eprintln!(" PASS: alive count = {alive}"); + } + + if total_errors > 0 { + eprintln!(" WARN: {total_errors} errors during ops application"); + } else { + eprintln!(" PASS: zero errors"); + } + + // Images make up a fraction of total rows + let image_rows = csv_results.iter() + .find(|(name, _)| name == "images") + .map(|(_, s)| s.rows_read) + .unwrap_or(0); + let expected_min = (image_rows as f64 * 0.8) as u64; + if alive < expected_min { + eprintln!(" WARN: alive ({alive}) < 80% of image rows ({image_rows})"); + } + + print_summary(csv_results, Some(alive), Some(pass)); +} + +fn print_summary( + csv_results: &[(String, bitdex_v2::pg_sync::csv_ops::CsvOpsStats)], + alive: Option, + pass: Option, +) { + eprintln!("\n=== Summary ==="); + eprintln!("Table | Rows | Ops | Time | Rows/s"); + eprintln!("---------------|-----------|-----------|---------|--------"); + for (table, stats) in csv_results { + eprintln!( + "{:14} | {:>9} | {:>9} | {:>5.1}s | {:>7.0}", + table, + stats.rows_read, + stats.ops_written, + stats.elapsed_secs, + stats.rows_read as f64 / stats.elapsed_secs.max(0.001) + ); + } + if let Some(alive) = alive { + eprintln!("\nAlive count: {alive}"); + } + if let Some(pass) = pass { + eprintln!("Result: {}", if pass { "PASS" } else { "FAIL" }); + } +} + +fn get_arg(args: &[String], flag: &str) -> Option { + args.iter() + .position(|a| a == flag) + .and_then(|i| args.get(i + 1)) + .cloned() +} diff --git a/src/bin/pg_sync.rs b/src/bin/pg_sync.rs index 1107e2b8..b30f08cb 100644 --- a/src/bin/pg_sync.rs +++ b/src/bin/pg_sync.rs @@ -352,7 +352,6 @@ async fn main() { password: sync_config.clickhouse_password.clone(), }; let metrics_fut = metrics_poller::run_metrics_poller( - &pool, &ch_config, &bitdex_client, sync_config.metrics_poll_interval_secs, diff --git a/src/concurrent_engine.rs b/src/concurrent_engine.rs index 45cd0878..25fe5726 100644 --- a/src/concurrent_engine.rs +++ b/src/concurrent_engine.rs @@ -6067,6 +6067,12 @@ impl ConcurrentEngine { &self.config } + /// Get a cloneable MutationSender for submitting ops to the coalescer channel. + /// Used by the WAL reader thread to send ops via CoalescerSink. + pub fn mutation_sender(&self) -> MutationSender { + self.sender.clone() + } + /// Get a reference to the BitmapFs store, if configured. pub fn bitmap_store(&self) -> Option<&Arc> { self.bitmap_store.as_ref() @@ -6483,6 +6489,49 @@ impl ConcurrentEngine { total_count } + /// Apply a BitmapAccum's accumulated bitmaps directly to staging. + /// + /// Used by the dump pipeline (Sync V2) to apply ops-derived bitmaps + /// without going through the coalescer channel. + /// + /// **Caller must be in loading mode** (`enter_loading_mode()` before first call, + /// `exit_loading_mode()` after all accums are applied). This avoids the Arc clone + /// cascade — in loading mode, staging refcount=1 so clone is cheap. + /// + /// ORs filter bitmaps, sort layer bitmaps, and alive bitmap into staging. + pub fn apply_accum(&self, accum: &crate::loader::BitmapAccum) { + // In loading mode, the flush thread doesn't publish snapshots, so the + // ArcSwap holds the sole reference. Clone is O(num_fields) — just Arc + // pointer copies, no deep bitmap clones. + let snap = self.inner.load_full(); + let mut staging = (*snap).clone(); + drop(snap); + + // Apply filter bitmaps + for (field_name, value_map) in &accum.filter_maps { + if let Some(field) = staging.filters.get_field_mut(field_name) { + for (&value, bitmap) in value_map { + field.or_bitmap(value, bitmap); + } + } + } + + // Apply sort layer bitmaps + for (field_name, layer_map) in &accum.sort_maps { + if let Some(field) = staging.sorts.get_field_mut(field_name) { + for (&bit_layer, bitmap) in layer_map { + field.or_layer(bit_layer, bitmap); + } + } + } + + // Apply alive bitmap (also updates slot counter) + staging.slots.alive_or_bitmap(&accum.alive); + + // Store back — in loading mode, no snapshot publish overhead + self.inner.store(Arc::new(staging)); + } + /// Build all bitmap indexes from the docstore. /// /// Designed for "build index" boot mode: starts from bare docs on disk, diff --git a/src/lib.rs b/src/lib.rs index 15657ead..66628268 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,6 +1,10 @@ pub mod bitmap_fs; pub mod bound_store; pub mod bucket_diff_log; +#[cfg(feature = "pg-sync")] +pub mod ops_processor; +#[cfg(feature = "pg-sync")] +pub mod ops_wal; pub mod cache; pub mod capture; pub mod concurrency; diff --git a/src/metrics.rs b/src/metrics.rs index 25491177..950b976d 100644 --- a/src/metrics.rs +++ b/src/metrics.rs @@ -116,6 +116,13 @@ pub struct Metrics { pub pgsync_cycle_seconds: HistogramVec, pub pgsync_rows_fetched_total: IntCounterVec, pub pgsync_cursor_position: IntGaugeVec, + + // V2 sync metrics (unified namespace with source label) + pub sync_cursor_position: IntGaugeVec, + pub sync_max_id: IntGaugeVec, + pub sync_lag_rows: IntGaugeVec, + pub sync_ops_total: IntCounterVec, + pub sync_wal_bytes: IntGaugeVec, } impl Metrics { @@ -570,6 +577,28 @@ impl Metrics { ) .unwrap(); + // V2 sync metrics (unified namespace) + let sync_cursor_position = IntGaugeVec::new( + Opts::new("bitdex_sync_cursor_position", "Current sync cursor position"), + &["source"], + ).unwrap(); + let sync_max_id = IntGaugeVec::new( + Opts::new("bitdex_sync_max_id", "Max ops table ID (for lag calculation)"), + &["source"], + ).unwrap(); + let sync_lag_rows = IntGaugeVec::new( + Opts::new("bitdex_sync_lag_rows", "Number of ops rows behind"), + &["source"], + ).unwrap(); + let sync_ops_total = IntCounterVec::new( + Opts::new("bitdex_sync_ops_total", "Total ops received from sync sources"), + &["source"], + ).unwrap(); + let sync_wal_bytes = IntGaugeVec::new( + Opts::new("bitdex_sync_wal_bytes", "Current WAL file size in bytes"), + &["source"], + ).unwrap(); + // Register all metrics registry.register(Box::new(alive_documents.clone())).unwrap(); registry.register(Box::new(slot_high_water.clone())).unwrap(); @@ -671,6 +700,11 @@ impl Metrics { registry.register(Box::new(pgsync_cycle_seconds.clone())).unwrap(); registry.register(Box::new(pgsync_rows_fetched_total.clone())).unwrap(); registry.register(Box::new(pgsync_cursor_position.clone())).unwrap(); + registry.register(Box::new(sync_cursor_position.clone())).unwrap(); + registry.register(Box::new(sync_max_id.clone())).unwrap(); + registry.register(Box::new(sync_lag_rows.clone())).unwrap(); + registry.register(Box::new(sync_ops_total.clone())).unwrap(); + registry.register(Box::new(sync_wal_bytes.clone())).unwrap(); Self { registry, @@ -746,6 +780,11 @@ impl Metrics { pgsync_cycle_seconds, pgsync_rows_fetched_total, pgsync_cursor_position, + sync_cursor_position, + sync_max_id, + sync_lag_rows, + sync_ops_total, + sync_wal_bytes, } } diff --git a/src/ops_processor.rs b/src/ops_processor.rs new file mode 100644 index 00000000..b7550eb8 --- /dev/null +++ b/src/ops_processor.rs @@ -0,0 +1,1227 @@ +//! WAL ops processor — translates ops from WAL files into bitmap mutations. +//! +//! Two processing modes per the Sync V2 design: +//! +//! - **Steady-state**: Ops → BitmapSink (CoalescerSink) → coalescer channel → flush thread. +//! Used by the WAL reader thread during normal operation. +//! +//! - **Dump mode**: Ops → BitmapSink (AccumSink) → direct bitmap accumulation. +//! Used during initial load. Bypasses coalescer, snapshot publishing, and cache. +//! +//! Both paths use the same `process_entity_ops()` core that translates Op variants +//! into BitmapSink calls using the engine Config for field awareness and +//! `value_to_bitmap_key()` / `value_to_sort_u32()` for value conversion. + +use std::collections::HashMap; +use std::path::{Path, PathBuf}; +use std::sync::Arc; +use std::time::Duration; + +use serde_json::Value as JsonValue; + +use crate::concurrent_engine::ConcurrentEngine; +use crate::config::Config; +use crate::filter::FilterFieldType; +use crate::ingester::BitmapSink; +use crate::mutation::{value_to_bitmap_key, value_to_sort_u32, FieldRegistry}; +use crate::pg_sync::op_dedup::dedup_ops; +use crate::pg_sync::ops::{EntityOps, Op}; +use crate::query::{BitdexQuery, FilterClause, Value as QValue}; + +/// Convert a serde_json::Value to a query::Value for bitmap key conversion. +fn json_to_qvalue(v: &JsonValue) -> QValue { + match v { + JsonValue::Number(n) => { + if let Some(i) = n.as_i64() { + QValue::Integer(i) + } else if let Some(f) = n.as_f64() { + QValue::Float(f) + } else { + QValue::Integer(0) + } + } + JsonValue::Bool(b) => QValue::Bool(*b), + JsonValue::String(s) => QValue::String(s.clone()), + JsonValue::Null => QValue::Integer(0), + _ => QValue::String(v.to_string()), + } +} + +/// Configuration for the ops processor. +pub struct OpsProcessorConfig { + /// Max records to read per WAL batch + pub batch_size: usize, + /// How long to sleep when no new records are available + pub poll_interval: Duration, + /// Path to persist the cursor position + pub cursor_path: PathBuf, +} + +impl Default for OpsProcessorConfig { + fn default() -> Self { + Self { + batch_size: 10_000, + poll_interval: Duration::from_millis(50), + cursor_path: PathBuf::from("wal_cursor"), + } + } +} + +/// Precomputed field metadata from Config, used during ops processing. +/// Built once, reused across all batches. +pub struct FieldMeta { + /// Filter field name → (Arc, FilterFieldType) + filter_fields: HashMap, FilterFieldType)>, + /// Sort field name → (Arc, num_bits) + sort_fields: HashMap, usize)>, + /// Field registry for Arc interning (kept for future DocSink use) + #[allow(dead_code)] + registry: FieldRegistry, +} + +impl FieldMeta { + /// Build FieldMeta from engine config. + pub fn from_config(config: &Config) -> Self { + let registry = FieldRegistry::from_config(config); + let mut filter_fields = HashMap::new(); + for fc in &config.filter_fields { + filter_fields.insert( + fc.name.clone(), + (registry.get(&fc.name), fc.field_type.clone()), + ); + } + let mut sort_fields = HashMap::new(); + for sc in &config.sort_fields { + sort_fields.insert( + sc.name.clone(), + (registry.get(&sc.name), sc.bits as usize), + ); + } + Self { + filter_fields, + sort_fields, + registry, + } + } +} + +/// Process a batch of entity ops, translating them into BitmapSink calls. +/// +/// This is the core function used by both steady-state (CoalescerSink) and +/// dump (AccumSink) paths. The sink determines where mutations go. +/// +/// For queryOpSet resolution, an engine reference is needed to execute queries. +/// Pass `None` during dump mode (queryOpSets are only used in steady-state). +/// +/// Returns (applied, skipped, errors). +pub fn apply_ops_batch( + sink: &mut S, + meta: &FieldMeta, + batch: &mut Vec, + engine: Option<&ConcurrentEngine>, +) -> (usize, usize, usize) { + dedup_ops(batch); + + let mut applied = 0usize; + let mut skipped = 0usize; + let mut errors = 0usize; + + for entry in batch.iter() { + let entity_id = entry.entity_id; + if entity_id < 0 || entity_id > u32::MAX as i64 { + skipped += 1; + continue; + } + let slot = entity_id as u32; + + // Delete absorbs everything — clear all bitmaps for this slot. + if entry.ops.iter().any(|op| matches!(op, Op::Delete)) { + match process_delete(sink, meta, slot, engine) { + Ok(()) => applied += 1, + Err(e) => { + tracing::warn!("ops processor: delete slot {slot} failed: {e}"); + errors += 1; + } + } + continue; + } + + // Handle queryOpSets (steady-state only — needs engine for query resolution) + for op in &entry.ops { + if let Op::QueryOpSet { query, ops } = op { + if let Some(eng) = engine { + match apply_query_op_set(sink, meta, eng, query, ops) { + Ok(count) => applied += count, + Err(e) => { + tracing::warn!("ops processor: queryOpSet '{query}' failed: {e}"); + errors += 1; + } + } + } else { + tracing::warn!("ops processor: queryOpSet skipped (no engine in dump mode)"); + skipped += 1; + } + } + } + + // Process set/remove/add ops → direct bitmap mutations + let mut has_any_ops = false; + for op in &entry.ops { + match op { + Op::Set { field, value } => { + process_set_op(sink, meta, slot, field, value); + has_any_ops = true; + } + Op::Remove { field, value } => { + process_remove_op(sink, meta, slot, field, value); + has_any_ops = true; + } + Op::Add { field, value } => { + process_add_op(sink, meta, slot, field, value); + has_any_ops = true; + } + Op::Delete | Op::QueryOpSet { .. } => { + // Already handled above + } + } + } + + // Set alive only if creates_slot is true (primary entity table). + // Join tables (tags, tools) set creates_slot=false — they only + // add multi-value bitmaps to existing slots. + if entry.creates_slot { + sink.alive_insert(slot); + } + + if has_any_ops { + applied += 1; + } + } + + // Flush buffered operations + if let Err(e) = sink.flush() { + tracing::error!("ops processor: sink flush failed: {e}"); + errors += 1; + } + + (applied, skipped, errors) +} + +/// Process a `set` op: set the new value's bitmap bit for this slot. +fn process_set_op( + sink: &mut S, + meta: &FieldMeta, + slot: u32, + field: &str, + value: &JsonValue, +) { + let qval = json_to_qvalue(value); + + // Check if this is a filter field + if let Some((arc_name, _field_type)) = meta.filter_fields.get(field) { + if let Some(key) = value_to_bitmap_key(&qval) { + sink.filter_insert(arc_name.clone(), key, slot); + } + } + + // Check if this is a sort field + if let Some((arc_name, num_bits)) = meta.sort_fields.get(field) { + if let Some(sort_val) = value_to_sort_u32(&qval) { + for bit in 0..*num_bits { + if (sort_val >> bit) & 1 == 1 { + sink.sort_set(arc_name.clone(), bit, slot); + } + } + } + } +} + +/// Process a `remove` op: clear the old value's bitmap bit for this slot. +fn process_remove_op( + sink: &mut S, + meta: &FieldMeta, + slot: u32, + field: &str, + value: &JsonValue, +) { + let qval = json_to_qvalue(value); + + // Check if this is a filter field + if let Some((arc_name, _field_type)) = meta.filter_fields.get(field) { + if let Some(key) = value_to_bitmap_key(&qval) { + sink.filter_remove(arc_name.clone(), key, slot); + } + } + + // Check if this is a sort field + if let Some((arc_name, num_bits)) = meta.sort_fields.get(field) { + if let Some(sort_val) = value_to_sort_u32(&qval) { + for bit in 0..*num_bits { + if (sort_val >> bit) & 1 == 1 { + sink.sort_clear(arc_name.clone(), bit, slot); + } + } + } + } +} + +/// Process an `add` op: set a multi-value bitmap bit. +/// Same as `set` for bitmap purposes — adds the value's bit. +fn process_add_op( + sink: &mut S, + meta: &FieldMeta, + slot: u32, + field: &str, + value: &JsonValue, +) { + let qval = json_to_qvalue(value); + + if let Some((arc_name, _field_type)) = meta.filter_fields.get(field) { + if let Some(key) = value_to_bitmap_key(&qval) { + sink.filter_insert(arc_name.clone(), key, slot); + } + } + // Multi-value fields don't have sort layers, but handle it generically + if let Some((arc_name, num_bits)) = meta.sort_fields.get(field) { + if let Some(sort_val) = value_to_sort_u32(&qval) { + for bit in 0..*num_bits { + if (sort_val >> bit) & 1 == 1 { + sink.sort_set(arc_name.clone(), bit, slot); + } + } + } + } +} + +/// Process a delete: read stored doc from engine to know which bitmaps to clear +/// (clean delete principle), then clear all filter/sort bits + alive bit. +/// +/// Per design doc H1: deletes are the one op type that requires a docstore read. +fn process_delete( + sink: &mut S, + _meta: &FieldMeta, + slot: u32, + engine: Option<&ConcurrentEngine>, +) -> std::result::Result<(), String> { + // If we have an engine, read stored doc to clear filter/sort bitmaps cleanly. + // Without engine (dump mode), we can only clear alive — filter bitmaps may be stale. + if let Some(eng) = engine { + // Use the engine's delete method which handles clean delete internally. + eng.delete(slot).map_err(|e| format!("engine delete failed: {e}"))?; + return Ok(()); + } + + // Dump mode fallback: just clear alive bit (no stored doc to read) + sink.alive_remove(slot); + Ok(()) +} + +/// Resolve a queryOpSet: execute the query to get matching slots, +/// then apply the nested ops to each matching slot via the BitmapSink. +fn apply_query_op_set( + sink: &mut S, + meta: &FieldMeta, + engine: &ConcurrentEngine, + query_str: &str, + ops: &[Op], +) -> std::result::Result { + let filters = parse_filter_from_query_str(query_str)?; + + let query = BitdexQuery { + filters, + sort: None, + limit: usize::MAX, + offset: None, + cursor: None, + skip_cache: true, + }; + + let result = engine + .execute_query(&query) + .map_err(|e| format!("queryOpSet query failed: {e}"))?; + + let slot_ids = &result.ids; + if slot_ids.is_empty() { + return Ok(0); + } + + // Apply nested ops to each matching slot + let mut applied = 0; + for &slot_id in slot_ids { + if slot_id < 0 || slot_id > u32::MAX as i64 { + continue; + } + let slot = slot_id as u32; + + for op in ops { + match op { + Op::Set { field, value } => { + process_set_op(sink, meta, slot, field, value); + } + Op::Remove { field, value } => { + process_remove_op(sink, meta, slot, field, value); + } + Op::Add { field, value } => { + process_add_op(sink, meta, slot, field, value); + } + Op::Delete => { + // Delete within queryOpSet clears alive for each matched slot + sink.alive_remove(slot); + } + Op::QueryOpSet { .. } => { + // Nested queryOpSets not supported + tracing::warn!("nested queryOpSet ignored"); + } + } + } + applied += 1; + } + + Ok(applied) +} + +/// Parse a simple filter string like "modelVersionIds eq 456" or "postId eq 789" +/// into filter clauses. +fn parse_filter_from_query_str(query_str: &str) -> std::result::Result, String> { + let clauses: Vec<&str> = query_str.split(" AND ").collect(); + let mut filters = Vec::new(); + + for clause in clauses { + let parts: Vec<&str> = clause.trim().splitn(3, ' ').collect(); + if parts.len() < 3 { + return Err(format!("Invalid filter clause: '{clause}'")); + } + + let field = parts[0].to_string(); + let op = parts[1].to_lowercase(); + let value_str = parts[2]; + + let filter = match op.as_str() { + "eq" => { + let value = parse_query_value(value_str)?; + FilterClause::Eq(field, value) + } + "in" => { + let values = parse_query_values_array(value_str)?; + FilterClause::In(field, values) + } + _ => { + return Err(format!("Unsupported filter op '{op}' in queryOpSet")); + } + }; + filters.push(filter); + } + + Ok(filters) +} + +/// Parse a single query value from a string. +fn parse_query_value(s: &str) -> std::result::Result { + if let Ok(n) = s.parse::() { + return Ok(QValue::Integer(n)); + } + if let Ok(f) = s.parse::() { + return Ok(QValue::Float(f)); + } + if s == "true" { + return Ok(QValue::Bool(true)); + } + if s == "false" { + return Ok(QValue::Bool(false)); + } + let stripped = s.trim_matches('"').trim_matches('\''); + Ok(QValue::String(stripped.to_string())) +} + +/// Parse an array of query values like "[101, 102, 103]". +fn parse_query_values_array(s: &str) -> std::result::Result, String> { + let trimmed = s.trim(); + if !trimmed.starts_with('[') || !trimmed.ends_with(']') { + return Err(format!("Expected array for 'in' filter, got: '{s}'")); + } + let inner = &trimmed[1..trimmed.len() - 1]; + let mut values = Vec::new(); + for part in inner.split(',') { + let part = part.trim(); + if !part.is_empty() { + values.push(parse_query_value(part)?); + } + } + Ok(values) +} + +/// Process a batch of entity ops in dump mode using AccumSink. +/// +/// This is the bulk-loading path that bypasses the coalescer entirely. +/// Ops are accumulated directly into bitmaps (like the single-pass loader). +/// +/// Returns (applied, skipped, errors). +pub(crate) fn apply_ops_batch_dump( + accum: &mut crate::loader::BitmapAccum, + meta: &FieldMeta, + batch: &mut Vec, +) -> (usize, usize, usize) { + let mut sink = crate::ingester::AccumSink::new(accum); + apply_ops_batch(&mut sink, meta, batch, None) +} + +/// Process all WAL entries in dump mode: reads WAL, accumulates bitmaps, applies to engine. +/// +/// This is the high-level dump pipeline entry point. It: +/// 1. Creates a BitmapAccum from the engine config +/// 2. Reads all WAL entries, processes via AccumSink +/// 3. Applies accumulated bitmaps directly to engine staging +/// +/// Returns (total_applied, total_errors, elapsed_secs). +pub fn process_wal_dump( + engine: &ConcurrentEngine, + wal_path: &Path, + batch_size: usize, +) -> (u64, u64, f64) { + use crate::loader::BitmapAccum; + use crate::ops_wal::WalReader; + use std::time::Instant; + + let config = engine.config(); + let meta = FieldMeta::from_config(config); + + let filter_names: Vec = config.filter_fields.iter().map(|f| f.name.clone()).collect(); + let sort_configs: Vec<(String, u8)> = config.sort_fields.iter().map(|s| (s.name.clone(), s.bits)).collect(); + let mut accum = BitmapAccum::new(&filter_names, &sort_configs); + + let start = Instant::now(); + let mut reader = WalReader::new(wal_path, 0); + let mut total_applied = 0u64; + let mut total_errors = 0u64; + + loop { + let batch = match reader.read_batch(batch_size) { + Ok(b) => b, + Err(e) => { + tracing::error!("WAL read error in dump mode: {e}"); + total_errors += 1; + break; + } + }; + if batch.entries.is_empty() { + break; + } + let mut entries = batch.entries; + let (applied, _skipped, errors) = apply_ops_batch_dump(&mut accum, &meta, &mut entries); + total_applied += applied as u64; + total_errors += errors as u64; + } + + // Apply accumulated bitmaps to engine staging + engine.apply_accum(&accum); + + (total_applied, total_errors, start.elapsed().as_secs_f64()) +} + +/// Direct dump pipeline: CSV → chunked reader → rayon parallel parse → BitmapAccum → apply. +/// +/// Bypasses WAL entirely. Uses a reader thread + rayon fold+reduce for parallel +/// CSV parsing, matching the single-pass loader's throughput pattern. Memory-safe +/// at any scale — reads in ~300MB blocks, never loads the full file. +/// +/// Returns (total_applied, total_errors, elapsed_secs). +pub fn process_csv_dump_direct( + engine: &ConcurrentEngine, + csv_dir: &Path, + _batch_size: usize, + limit: Option, +) -> (u64, u64, f64) { + use crate::loader::BitmapAccum; + use crate::pg_sync::copy_queries::{parse_image_row, parse_tag_row, parse_tool_row}; + use rayon::prelude::*; + use std::io::BufRead; + use std::time::Instant; + + let config = engine.config(); + let meta = FieldMeta::from_config(config); + + let filter_names: Vec = config.filter_fields.iter().map(|f| f.name.clone()).collect(); + let sort_configs: Vec<(String, u8)> = config.sort_fields.iter().map(|s| (s.name.clone(), s.bits)).collect(); + + let start = Instant::now(); + let mut total_applied = 0u64; + let mut total_errors = 0u64; + let record_limit = limit.map(|l| l as usize).unwrap_or(usize::MAX); + + // Enter loading mode ONCE for the entire dump — avoids Arc clone cascade. + engine.enter_loading_mode(); + + // Chunk size for reading CSV lines. 10M lines per chunk keeps memory bounded + // (~1-2GB per chunk) while giving rayon enough work for parallelism. + const CHUNK_SIZE: usize = 10_000_000; + + /// Helper: read up to `chunk` lines from a BufReader, returns lines read. + fn read_chunk( + reader: &mut impl BufRead, + chunk: usize, + buf: &mut Vec>, + ) -> usize { + buf.clear(); + let mut count = 0; + let mut line_buf = Vec::new(); + while count < chunk { + line_buf.clear(); + match reader.read_until(b'\n', &mut line_buf) { + Ok(0) => break, // EOF + Ok(_) => { + // Trim trailing newline + if line_buf.last() == Some(&b'\n') { line_buf.pop(); } + if line_buf.last() == Some(&b'\r') { line_buf.pop(); } + if !line_buf.is_empty() { + buf.push(std::mem::take(&mut line_buf)); + line_buf = Vec::new(); + count += 1; + } + } + Err(_) => break, + } + } + count + } + + // Phase 1: Images (creates alive slots) — chunked + let images_csv = csv_dir.join("images.csv"); + if images_csv.exists() { + let img_start = Instant::now(); + let file = std::fs::File::open(&images_csv).expect("open images.csv"); + let mut reader = std::io::BufReader::with_capacity(8 * 1024 * 1024, file); + let mut phase_total = 0usize; + let mut phase_errors = 0u64; + let mut chunk_buf = Vec::with_capacity(CHUNK_SIZE); + + let f_names = &filter_names; + let s_configs = &sort_configs; + let meta_ref = &meta; + + loop { + let remaining = record_limit.saturating_sub(phase_total); + if remaining == 0 { break; } + let n = read_chunk(&mut reader, remaining.min(CHUNK_SIZE), &mut chunk_buf); + if n == 0 { break; } + + let accum = chunk_buf + .par_iter() + .fold( + || BitmapAccum::new(f_names, s_configs), + |mut acc, line| { + let row = match parse_image_row(line) { + Some(r) => r, + None => { acc.errors += 1; return acc; } + }; + let slot = row.id as u32; + acc.alive.insert(slot); + + let ops = crate::pg_sync::csv_ops::image_row_to_ops_pub(&row); + for op in &ops { + if let Op::Set { field, value } = op { + let qval = json_to_qvalue(value); + if let Some((_, _)) = meta_ref.filter_fields.get(field.as_str()) { + if let Some(key) = value_to_bitmap_key(&qval) { + if let Some(m) = acc.filter_maps.get_mut(field.as_str()) { + m.entry(key).or_insert_with(roaring::RoaringBitmap::new).insert(slot); + } + } + } + if let Some((_, num_bits)) = meta_ref.sort_fields.get(field.as_str()) { + if let Some(sort_val) = value_to_sort_u32(&qval) { + if let Some(m) = acc.sort_maps.get_mut(field.as_str()) { + for bit in 0..*num_bits { + if (sort_val >> bit) & 1 == 1 { + m.entry(bit).or_insert_with(roaring::RoaringBitmap::new).insert(slot); + } + } + } + } + } + } + } + acc.count += 1; + acc + }, + ) + .reduce( + || BitmapAccum::new(f_names, s_configs), + |a, b| a.merge(b), + ); + + phase_total += accum.count; + phase_errors += accum.errors; + engine.apply_accum(&accum); + + eprintln!(" images: chunk {}..{} ({:.0}/s)", + phase_total - accum.count, phase_total, + accum.count as f64 / img_start.elapsed().as_secs_f64().max(0.001)); + } + total_applied += phase_total as u64; + total_errors += phase_errors; + + eprintln!(" images: {} rows total, {:.1}s ({:.0}/s)", + phase_total, + img_start.elapsed().as_secs_f64(), + phase_total as f64 / img_start.elapsed().as_secs_f64().max(0.001)); + } + + // Phase 2: Tags (chunked rayon) + let tags_csv = csv_dir.join("tags.csv"); + if tags_csv.exists() { + let tag_start = Instant::now(); + let file = std::fs::File::open(&tags_csv).expect("open tags.csv"); + let mut reader = std::io::BufReader::with_capacity(8 * 1024 * 1024, file); + let mut phase_total = 0usize; + let mut phase_errors = 0u64; + let mut chunk_buf = Vec::with_capacity(CHUNK_SIZE); + + let f_names = &filter_names; + let s_configs = &sort_configs; + + loop { + let remaining = record_limit.saturating_sub(phase_total); + if remaining == 0 { break; } + let n = read_chunk(&mut reader, remaining.min(CHUNK_SIZE), &mut chunk_buf); + if n == 0 { break; } + + let accum = chunk_buf + .par_iter() + .fold( + || BitmapAccum::new(f_names, s_configs), + |mut acc, line| { + let (tag_id, image_id) = match parse_tag_row(line) { + Some(pair) => pair, + None => { acc.errors += 1; return acc; } + }; + let slot = image_id as u32; + if let Some(m) = acc.filter_maps.get_mut("tagIds") { + m.entry(tag_id as u64) + .or_insert_with(roaring::RoaringBitmap::new) + .insert(slot); + } + acc.count += 1; + acc + }, + ) + .reduce( + || BitmapAccum::new(f_names, s_configs), + |a, b| a.merge(b), + ); + + phase_total += accum.count; + phase_errors += accum.errors; + engine.apply_accum(&accum); + } + total_applied += phase_total as u64; + total_errors += phase_errors; + + eprintln!(" tags: {} rows, {:.1}s ({:.0}/s)", + phase_total, + tag_start.elapsed().as_secs_f64(), + phase_total as f64 / tag_start.elapsed().as_secs_f64().max(0.001)); + } + + // Phase 3: Tools (chunked rayon) + let tools_csv = csv_dir.join("tools.csv"); + if tools_csv.exists() { + let tool_start = Instant::now(); + let file = std::fs::File::open(&tools_csv).expect("open tools.csv"); + let mut reader = std::io::BufReader::with_capacity(8 * 1024 * 1024, file); + let mut phase_total = 0usize; + let mut phase_errors = 0u64; + let mut chunk_buf = Vec::with_capacity(CHUNK_SIZE); + + let f_names = &filter_names; + let s_configs = &sort_configs; + + loop { + let remaining = record_limit.saturating_sub(phase_total); + if remaining == 0 { break; } + let n = read_chunk(&mut reader, remaining.min(CHUNK_SIZE), &mut chunk_buf); + if n == 0 { break; } + + let accum = chunk_buf + .par_iter() + .fold( + || BitmapAccum::new(f_names, s_configs), + |mut acc, line| { + let (tool_id, image_id) = match parse_tool_row(line) { + Some(pair) => pair, + None => { acc.errors += 1; return acc; } + }; + let slot = image_id as u32; + if let Some(m) = acc.filter_maps.get_mut("toolIds") { + m.entry(tool_id as u64) + .or_insert_with(roaring::RoaringBitmap::new) + .insert(slot); + } + acc.count += 1; + acc + }, + ) + .reduce( + || BitmapAccum::new(f_names, s_configs), + |a, b| a.merge(b), + ); + + phase_total += accum.count; + phase_errors += accum.errors; + engine.apply_accum(&accum); + } + total_applied += phase_total as u64; + total_errors += phase_errors; + + eprintln!(" tools: {} rows, {:.1}s ({:.0}/s)", + phase_total, + tool_start.elapsed().as_secs_f64(), + phase_total as f64 / tool_start.elapsed().as_secs_f64().max(0.001)); + } + + // Exit loading mode. On headless, this will timeout (no flush thread) but + // that's OK — the warning is harmless and the flag gets cleared. + engine.exit_loading_mode(); + + eprintln!(" Total: {total_applied} ops in {:.1}s ({:.0}/s)", + start.elapsed().as_secs_f64(), + total_applied as f64 / start.elapsed().as_secs_f64().max(0.001)); + + (total_applied, total_errors, start.elapsed().as_secs_f64()) +} + +/// Persist cursor position to disk. +pub fn save_cursor(path: &Path, cursor: u64) -> std::io::Result<()> { + std::fs::write(path, cursor.to_string()) +} + +/// Load cursor position from disk. Returns 0 if file doesn't exist. +pub fn load_cursor(path: &Path) -> u64 { + std::fs::read_to_string(path) + .ok() + .and_then(|s| s.trim().parse().ok()) + .unwrap_or(0) +} + +#[cfg(test)] +mod tests { + use super::*; + use serde_json::json; + + use crate::config::{Config, FilterFieldConfig, SortFieldConfig}; + use crate::filter::FilterFieldType; + use crate::ingester::BitmapSink; + + /// A test sink that records all operations for verification. + struct RecordingSink { + filter_inserts: Vec<(String, u64, u32)>, + filter_removes: Vec<(String, u64, u32)>, + sort_sets: Vec<(String, usize, u32)>, + sort_clears: Vec<(String, usize, u32)>, + alive_inserts: Vec, + alive_removes: Vec, + } + + impl RecordingSink { + fn new() -> Self { + Self { + filter_inserts: Vec::new(), + filter_removes: Vec::new(), + sort_sets: Vec::new(), + sort_clears: Vec::new(), + alive_inserts: Vec::new(), + alive_removes: Vec::new(), + } + } + } + + impl BitmapSink for RecordingSink { + fn filter_insert(&mut self, field: Arc, value: u64, slot: u32) { + self.filter_inserts.push((field.to_string(), value, slot)); + } + fn filter_remove(&mut self, field: Arc, value: u64, slot: u32) { + self.filter_removes.push((field.to_string(), value, slot)); + } + fn sort_set(&mut self, field: Arc, bit_layer: usize, slot: u32) { + self.sort_sets.push((field.to_string(), bit_layer, slot)); + } + fn sort_clear(&mut self, field: Arc, bit_layer: usize, slot: u32) { + self.sort_clears.push((field.to_string(), bit_layer, slot)); + } + fn alive_insert(&mut self, slot: u32) { + self.alive_inserts.push(slot); + } + fn alive_remove(&mut self, slot: u32) { + self.alive_removes.push(slot); + } + fn flush(&mut self) -> crate::error::Result<()> { + Ok(()) + } + } + + fn test_config() -> Config { + let mut config = Config::default(); + config.filter_fields = vec![ + FilterFieldConfig { + name: "nsfwLevel".into(), + field_type: FilterFieldType::SingleValue, + behaviors: None, + eviction: None, + eager_load: false, + }, + FilterFieldConfig { + name: "type".into(), + field_type: FilterFieldType::SingleValue, + behaviors: None, + eviction: None, + eager_load: false, + }, + FilterFieldConfig { + name: "tagIds".into(), + field_type: FilterFieldType::MultiValue, + behaviors: None, + eviction: None, + eager_load: false, + }, + FilterFieldConfig { + name: "hasMeta".into(), + field_type: FilterFieldType::Boolean, + behaviors: None, + eviction: None, + eager_load: false, + }, + ]; + config.sort_fields = vec![SortFieldConfig { + name: "existedAt".into(), + source_type: "uint32".into(), + encoding: "linear".into(), + bits: 32, + eager_load: false, + computed: None, + }]; + config + } + + #[test] + fn test_set_op_filter_insert() { + let config = test_config(); + let meta = FieldMeta::from_config(&config); + let mut sink = RecordingSink::new(); + + let mut batch = vec![EntityOps { + entity_id: 42, + creates_slot: true, + ops: vec![Op::Set { + field: "nsfwLevel".into(), + value: json!(16), + }], + }]; + + let (applied, skipped, errors) = apply_ops_batch(&mut sink, &meta, &mut batch, None); + assert_eq!(applied, 1); + assert_eq!(skipped, 0); + assert_eq!(errors, 0); + + assert_eq!(sink.filter_inserts.len(), 1); + assert_eq!(sink.filter_inserts[0], ("nsfwLevel".to_string(), 16, 42)); + assert_eq!(sink.alive_inserts, vec![42]); + } + + #[test] + fn test_remove_then_set_scalar_update() { + let config = test_config(); + let meta = FieldMeta::from_config(&config); + let mut sink = RecordingSink::new(); + + let mut batch = vec![EntityOps { + entity_id: 42, + creates_slot: true, + ops: vec![ + Op::Remove { + field: "nsfwLevel".into(), + value: json!(8), + }, + Op::Set { + field: "nsfwLevel".into(), + value: json!(16), + }, + ], + }]; + + let (applied, _, errors) = apply_ops_batch(&mut sink, &meta, &mut batch, None); + assert_eq!(applied, 1); + assert_eq!(errors, 0); + + // Should have one remove (old value 8) and one insert (new value 16) + assert_eq!(sink.filter_removes.len(), 1); + assert_eq!(sink.filter_removes[0], ("nsfwLevel".to_string(), 8, 42)); + assert_eq!(sink.filter_inserts.len(), 1); + assert_eq!(sink.filter_inserts[0], ("nsfwLevel".to_string(), 16, 42)); + } + + #[test] + fn test_add_multi_value() { + let config = test_config(); + let meta = FieldMeta::from_config(&config); + let mut sink = RecordingSink::new(); + + let mut batch = vec![EntityOps { + entity_id: 100, + creates_slot: false, + ops: vec![ + Op::Add { + field: "tagIds".into(), + value: json!(42), + }, + Op::Add { + field: "tagIds".into(), + value: json!(99), + }, + ], + }]; + + let (applied, _, errors) = apply_ops_batch(&mut sink, &meta, &mut batch, None); + assert_eq!(applied, 1); + assert_eq!(errors, 0); + + assert_eq!(sink.filter_inserts.len(), 2); + // Order after dedup is nondeterministic (HashMap iteration) + let mut values: Vec = sink.filter_inserts.iter().map(|(_, v, _)| *v).collect(); + values.sort(); + assert_eq!(values, vec![42, 99]); + // Add-only ops should NOT set alive (only Set ops do) + assert!(sink.alive_inserts.is_empty()); + } + + #[test] + fn test_sort_field_bit_decomposition() { + let config = test_config(); + let meta = FieldMeta::from_config(&config); + let mut sink = RecordingSink::new(); + + // existedAt = 5 = 0b101 → bits 0 and 2 set + let mut batch = vec![EntityOps { + entity_id: 10, + creates_slot: true, + ops: vec![Op::Set { + field: "existedAt".into(), + value: json!(5), + }], + }]; + + apply_ops_batch(&mut sink, &meta, &mut batch, None); + + // Should have sort_set for bits 0 and 2 + let sort_bits: Vec = sink.sort_sets.iter().map(|(_, bit, _)| *bit).collect(); + assert!(sort_bits.contains(&0)); + assert!(sort_bits.contains(&2)); + assert!(!sort_bits.contains(&1)); // bit 1 not set for value 5 + } + + #[test] + fn test_sort_field_remove_clears_bits() { + let config = test_config(); + let meta = FieldMeta::from_config(&config); + let mut sink = RecordingSink::new(); + + // Remove old sort value 5 = 0b101, set new value 6 = 0b110 + let mut batch = vec![EntityOps { + entity_id: 10, + creates_slot: true, + ops: vec![ + Op::Remove { + field: "existedAt".into(), + value: json!(5), + }, + Op::Set { + field: "existedAt".into(), + value: json!(6), + }, + ], + }]; + + apply_ops_batch(&mut sink, &meta, &mut batch, None); + + // Clears: bits 0, 2 (from value 5) + let clear_bits: Vec = sink.sort_clears.iter().map(|(_, bit, _)| *bit).collect(); + assert!(clear_bits.contains(&0)); + assert!(clear_bits.contains(&2)); + + // Sets: bits 1, 2 (from value 6) + let set_bits: Vec = sink.sort_sets.iter().map(|(_, bit, _)| *bit).collect(); + assert!(set_bits.contains(&1)); + assert!(set_bits.contains(&2)); + } + + #[test] + fn test_boolean_field() { + let config = test_config(); + let meta = FieldMeta::from_config(&config); + let mut sink = RecordingSink::new(); + + let mut batch = vec![EntityOps { + entity_id: 50, + creates_slot: true, + ops: vec![Op::Set { + field: "hasMeta".into(), + value: json!(true), + }], + }]; + + apply_ops_batch(&mut sink, &meta, &mut batch, None); + + // true → bitmap key 1 + assert_eq!(sink.filter_inserts.len(), 1); + assert_eq!(sink.filter_inserts[0], ("hasMeta".to_string(), 1, 50)); + } + + #[test] + fn test_unknown_field_ignored() { + let config = test_config(); + let meta = FieldMeta::from_config(&config); + let mut sink = RecordingSink::new(); + + let mut batch = vec![EntityOps { + entity_id: 1, + creates_slot: true, + ops: vec![Op::Set { + field: "unknownField".into(), + value: json!(42), + }], + }]; + + let (applied, _, errors) = apply_ops_batch(&mut sink, &meta, &mut batch, None); + assert_eq!(applied, 1); // still counts as applied (alive set) + assert_eq!(errors, 0); + + // No filter or sort ops emitted for unknown field + assert!(sink.filter_inserts.is_empty()); + assert!(sink.sort_sets.is_empty()); + } + + #[test] + fn test_delete_without_engine() { + let config = test_config(); + let meta = FieldMeta::from_config(&config); + let mut sink = RecordingSink::new(); + + let mut batch = vec![EntityOps { + entity_id: 42, + creates_slot: false, + ops: vec![Op::Delete], + }]; + + let (applied, _, errors) = apply_ops_batch(&mut sink, &meta, &mut batch, None); + assert_eq!(applied, 1); + assert_eq!(errors, 0); + + // In dump mode (no engine), delete only clears alive + assert_eq!(sink.alive_removes, vec![42]); + } + + #[test] + fn test_image_insert_all_fields() { + let config = test_config(); + let meta = FieldMeta::from_config(&config); + let mut sink = RecordingSink::new(); + + let mut batch = vec![EntityOps { + entity_id: 1000, + creates_slot: true, + ops: vec![ + Op::Set { + field: "nsfwLevel".into(), + value: json!(1), + }, + Op::Set { + field: "type".into(), + value: json!(0), // "image" mapped to 0 + }, + Op::Set { + field: "hasMeta".into(), + value: json!(true), + }, + Op::Set { + field: "existedAt".into(), + value: json!(1711234567u64), + }, + ], + }]; + + let (applied, _, errors) = apply_ops_batch(&mut sink, &meta, &mut batch, None); + assert_eq!(applied, 1); + assert_eq!(errors, 0); + + // 3 filter inserts (nsfwLevel, type, hasMeta) + sort bits for existedAt + assert_eq!(sink.filter_inserts.len(), 3); + assert!(!sink.sort_sets.is_empty()); // existedAt bit layers + assert_eq!(sink.alive_inserts, vec![1000]); + } + + #[test] + fn test_negative_entity_id_skipped() { + let config = test_config(); + let meta = FieldMeta::from_config(&config); + let mut sink = RecordingSink::new(); + + let mut batch = vec![EntityOps { + entity_id: -1, + creates_slot: true, + ops: vec![Op::Set { + field: "nsfwLevel".into(), + value: json!(1), + }], + }]; + + let (_, skipped, _) = apply_ops_batch(&mut sink, &meta, &mut batch, None); + assert_eq!(skipped, 1); + assert!(sink.filter_inserts.is_empty()); + } + + #[test] + fn test_parse_filter_eq() { + let filters = parse_filter_from_query_str("modelVersionIds eq 456").unwrap(); + assert_eq!(filters.len(), 1); + assert!(matches!( + &filters[0], + FilterClause::Eq(f, QValue::Integer(456)) if f == "modelVersionIds" + )); + } + + #[test] + fn test_parse_filter_in() { + let filters = + parse_filter_from_query_str("modelVersionIds in [101, 102, 103]").unwrap(); + assert_eq!(filters.len(), 1); + if let FilterClause::In(f, vals) = &filters[0] { + assert_eq!(f, "modelVersionIds"); + assert_eq!(vals.len(), 3); + } else { + panic!("Expected In clause"); + } + } + + #[test] + fn test_parse_query_value_types() { + assert!(matches!( + parse_query_value("42").unwrap(), + QValue::Integer(42) + )); + assert!(matches!( + parse_query_value("true").unwrap(), + QValue::Bool(true) + )); + assert!(matches!( + parse_query_value("\"hello\"").unwrap(), + QValue::String(s) if s == "hello" + )); + } + + #[test] + fn test_cursor_persistence() { + let dir = tempfile::TempDir::new().unwrap(); + let path = dir.path().join("cursor"); + assert_eq!(load_cursor(&path), 0); + save_cursor(&path, 12345).unwrap(); + assert_eq!(load_cursor(&path), 12345); + } +} diff --git a/src/ops_wal.rs b/src/ops_wal.rs new file mode 100644 index 00000000..cc90ac57 --- /dev/null +++ b/src/ops_wal.rs @@ -0,0 +1,430 @@ +//! Ops WAL — append-only log for sync operations. +//! +//! Format per record: +//! [4 bytes: payload_len (u32 LE)] +//! [8 bytes: entity_id (i64 LE)] +//! [1 byte: flags (bit 0 = creates_slot)] +//! [payload_len bytes: ops JSONB] +//! [4 bytes: CRC32 of entity_id + flags + ops] +//! +//! The writer appends records and fsyncs. The reader tails the file, +//! reading batches of records and tracking a byte-offset cursor. +//! Partial records at EOF are skipped (crash recovery). + +use std::fs::{self, File, OpenOptions}; +use std::io::{self, Read, Seek, Write}; +use std::path::{Path, PathBuf}; + +use crate::pg_sync::ops::{EntityOps, Op}; + +const HEADER_SIZE: usize = 4 + 8 + 1; // payload_len + entity_id + flags +const FLAG_CREATES_SLOT: u8 = 0x01; +const CRC_SIZE: usize = 4; + +/// WAL writer — appends ops records to a file with CRC32 integrity. +pub struct WalWriter { + path: PathBuf, +} + +impl WalWriter { + pub fn new(path: impl Into) -> Self { + Self { path: path.into() } + } + + /// Append a batch of entity ops to the WAL. Writes all records and fsyncs. + /// Returns the number of bytes written. + pub fn append_batch(&self, batch: &[EntityOps]) -> io::Result { + if batch.is_empty() { + return Ok(0); + } + + let mut file = OpenOptions::new() + .create(true) + .append(true) + .open(&self.path)?; + + let mut total_bytes = 0u64; + for entry in batch { + let ops_json = serde_json::to_vec(&entry.ops) + .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?; + + let payload_len = ops_json.len() as u32; + let entity_id_bytes = entry.entity_id.to_le_bytes(); + let flags: u8 = if entry.creates_slot { FLAG_CREATES_SLOT } else { 0 }; + + // CRC covers entity_id + flags + ops (not the length prefix) + let mut crc_input = Vec::with_capacity(9 + ops_json.len()); + crc_input.extend_from_slice(&entity_id_bytes); + crc_input.push(flags); + crc_input.extend_from_slice(&ops_json); + let crc = crc32fast::hash(&crc_input); + + // Write: [len][entity_id][flags][ops][crc] + file.write_all(&payload_len.to_le_bytes())?; + file.write_all(&entity_id_bytes)?; + file.write_all(&[flags])?; + file.write_all(&ops_json)?; + file.write_all(&crc.to_le_bytes())?; + + total_bytes += (HEADER_SIZE + ops_json.len() + CRC_SIZE) as u64; + } + + file.sync_all()?; + Ok(total_bytes) + } + + /// Get the file path. + pub fn path(&self) -> &Path { + &self.path + } + + /// Get current file size (0 if file doesn't exist). + pub fn file_size(&self) -> u64 { + fs::metadata(&self.path).map(|m| m.len()).unwrap_or(0) + } +} + +/// WAL reader — reads ops records from a file starting at a byte offset. +pub struct WalReader { + path: PathBuf, + /// Current read position (byte offset into the file) + cursor: u64, +} + +/// Result of reading a batch from the WAL. +pub struct WalBatch { + /// The ops read from the WAL + pub entries: Vec, + /// New cursor position after this batch + pub new_cursor: u64, + /// Number of bytes read + pub bytes_read: u64, + /// Number of records skipped due to CRC failure + pub crc_failures: u64, +} + +impl WalReader { + pub fn new(path: impl Into, cursor: u64) -> Self { + Self { + path: path.into(), + cursor, + } + } + + /// Read up to `max_records` from the WAL starting at the current cursor. + /// Uses incremental seek+read — only reads new data from the cursor position, + /// not the entire file. Safe for large WAL files. + pub fn read_batch(&mut self, max_records: usize) -> io::Result { + if !self.path.exists() { + return Ok(WalBatch { + entries: Vec::new(), + new_cursor: self.cursor, + bytes_read: 0, + crc_failures: 0, + }); + } + + let file_len = fs::metadata(&self.path)?.len(); + if self.cursor >= file_len { + return Ok(WalBatch { + entries: Vec::new(), + new_cursor: self.cursor, + bytes_read: 0, + crc_failures: 0, + }); + } + + // Read only from cursor to EOF (incremental, not full-file read) + let mut file = File::open(&self.path)?; + file.seek(std::io::SeekFrom::Start(self.cursor))?; + let remaining = (file_len - self.cursor) as usize; + let mut data = vec![0u8; remaining]; + file.read_exact(&mut data)?; + + let mut entries = Vec::new(); + let mut pos = 0usize; + let mut crc_failures = 0u64; + + while entries.len() < max_records && pos + HEADER_SIZE <= data.len() { + // Read header: [4-byte len][8-byte entity_id][1-byte flags] + let payload_len = + u32::from_le_bytes(data[pos..pos + 4].try_into().unwrap()) as usize; + let entity_id = + i64::from_le_bytes(data[pos + 4..pos + 12].try_into().unwrap()); + let flags = data[pos + 12]; + let creates_slot = (flags & FLAG_CREATES_SLOT) != 0; + + let record_end = pos + HEADER_SIZE + payload_len + CRC_SIZE; + if record_end > data.len() { + // Truncated record at EOF — stop here, don't advance cursor + break; + } + + // Verify CRC (covers entity_id + flags + ops) + let crc_input = &data[pos + 4..pos + HEADER_SIZE + payload_len]; + let stored_crc = u32::from_le_bytes( + data[pos + HEADER_SIZE + payload_len..record_end] + .try_into() + .unwrap(), + ); + let computed_crc = crc32fast::hash(crc_input); + + if stored_crc != computed_crc { + crc_failures += 1; + pos = record_end; + continue; + } + + // Parse ops JSON + let ops_data = &data[pos + HEADER_SIZE..pos + HEADER_SIZE + payload_len]; + match serde_json::from_slice::>(ops_data) { + Ok(ops) => { + entries.push(EntityOps { entity_id, ops, creates_slot }); + } + Err(_) => { + crc_failures += 1; + } + } + + pos = record_end; + } + + let bytes_read = pos as u64; + self.cursor += bytes_read; + + Ok(WalBatch { + entries, + new_cursor: self.cursor, + bytes_read, + crc_failures, + }) + } + + /// Get the current cursor position. + pub fn cursor(&self) -> u64 { + self.cursor + } + + /// Set the cursor position (for recovery from persisted state). + pub fn set_cursor(&mut self, cursor: u64) { + self.cursor = cursor; + } + + /// Check if there are more records to read (cursor < file size). + pub fn has_more(&self) -> bool { + let file_size = fs::metadata(&self.path).map(|m| m.len()).unwrap_or(0); + self.cursor < file_size + } +} + +/// Delete a WAL file. +pub fn remove_wal(path: &Path) -> io::Result<()> { + if path.exists() { + fs::remove_file(path)?; + } + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + use serde_json::json; + use tempfile::TempDir; + + fn make_ops(entity_id: i64, ops: Vec) -> EntityOps { + EntityOps { entity_id, ops, creates_slot: false } + } + + #[test] + fn test_write_read_roundtrip() { + let dir = TempDir::new().unwrap(); + let wal_path = dir.path().join("test.wal"); + + let writer = WalWriter::new(&wal_path); + let batch = vec![ + make_ops(1, vec![Op::Set { field: "nsfwLevel".into(), value: json!(16) }]), + make_ops(2, vec![Op::Add { field: "tagIds".into(), value: json!(42) }]), + make_ops(3, vec![Op::Delete]), + ]; + let bytes = writer.append_batch(&batch).unwrap(); + assert!(bytes > 0); + + let mut reader = WalReader::new(&wal_path, 0); + let result = reader.read_batch(100).unwrap(); + assert_eq!(result.entries.len(), 3); + assert_eq!(result.entries[0].entity_id, 1); + assert_eq!(result.entries[1].entity_id, 2); + assert_eq!(result.entries[2].entity_id, 3); + assert_eq!(result.crc_failures, 0); + assert!(!reader.has_more()); + } + + #[test] + fn test_multiple_appends() { + let dir = TempDir::new().unwrap(); + let wal_path = dir.path().join("test.wal"); + let writer = WalWriter::new(&wal_path); + + // First batch + writer.append_batch(&[ + make_ops(1, vec![Op::Set { field: "a".into(), value: json!(1) }]), + ]).unwrap(); + + // Second batch + writer.append_batch(&[ + make_ops(2, vec![Op::Set { field: "b".into(), value: json!(2) }]), + ]).unwrap(); + + let mut reader = WalReader::new(&wal_path, 0); + let result = reader.read_batch(100).unwrap(); + assert_eq!(result.entries.len(), 2); + assert_eq!(result.entries[0].entity_id, 1); + assert_eq!(result.entries[1].entity_id, 2); + } + + #[test] + fn test_cursor_resume() { + let dir = TempDir::new().unwrap(); + let wal_path = dir.path().join("test.wal"); + let writer = WalWriter::new(&wal_path); + + writer.append_batch(&[ + make_ops(1, vec![Op::Set { field: "a".into(), value: json!(1) }]), + make_ops(2, vec![Op::Set { field: "b".into(), value: json!(2) }]), + make_ops(3, vec![Op::Set { field: "c".into(), value: json!(3) }]), + ]).unwrap(); + + // Read first 2 + let mut reader = WalReader::new(&wal_path, 0); + let result = reader.read_batch(2).unwrap(); + assert_eq!(result.entries.len(), 2); + let saved_cursor = reader.cursor(); + + // Resume from cursor — should get the 3rd + let mut reader2 = WalReader::new(&wal_path, saved_cursor); + let result2 = reader2.read_batch(100).unwrap(); + assert_eq!(result2.entries.len(), 1); + assert_eq!(result2.entries[0].entity_id, 3); + assert!(!reader2.has_more()); + } + + #[test] + fn test_truncated_record_at_eof() { + let dir = TempDir::new().unwrap(); + let wal_path = dir.path().join("test.wal"); + let writer = WalWriter::new(&wal_path); + + writer.append_batch(&[ + make_ops(1, vec![Op::Set { field: "a".into(), value: json!(1) }]), + ]).unwrap(); + + // Append garbage (partial record) + let mut file = OpenOptions::new().append(true).open(&wal_path).unwrap(); + file.write_all(&[0u8; 6]).unwrap(); // Too short to be a valid header+payload + + let mut reader = WalReader::new(&wal_path, 0); + let result = reader.read_batch(100).unwrap(); + // Should read the valid record and stop at the truncated one + assert_eq!(result.entries.len(), 1); + assert_eq!(result.crc_failures, 0); + } + + #[test] + fn test_corrupted_crc_skipped() { + let dir = TempDir::new().unwrap(); + let wal_path = dir.path().join("test.wal"); + let writer = WalWriter::new(&wal_path); + + writer.append_batch(&[ + make_ops(1, vec![Op::Set { field: "a".into(), value: json!(1) }]), + make_ops(2, vec![Op::Set { field: "b".into(), value: json!(2) }]), + ]).unwrap(); + + // Corrupt the CRC of the first record + let mut data = fs::read(&wal_path).unwrap(); + // First record: header(12) + ops_json + crc(4) + // Find where the CRC is for the first record + let payload_len = u32::from_le_bytes(data[0..4].try_into().unwrap()) as usize; + let crc_offset = HEADER_SIZE + payload_len; + data[crc_offset] ^= 0xFF; // Flip bits in CRC + fs::write(&wal_path, &data).unwrap(); + + let mut reader = WalReader::new(&wal_path, 0); + let result = reader.read_batch(100).unwrap(); + // First record should be skipped (CRC failure), second should be read + assert_eq!(result.entries.len(), 1); + assert_eq!(result.entries[0].entity_id, 2); + assert_eq!(result.crc_failures, 1); + } + + #[test] + fn test_empty_file() { + let dir = TempDir::new().unwrap(); + let wal_path = dir.path().join("test.wal"); + + let mut reader = WalReader::new(&wal_path, 0); + let result = reader.read_batch(100).unwrap(); + assert_eq!(result.entries.len(), 0); + assert!(!reader.has_more()); + } + + #[test] + fn test_query_op_set_roundtrip() { + let dir = TempDir::new().unwrap(); + let wal_path = dir.path().join("test.wal"); + let writer = WalWriter::new(&wal_path); + + writer.append_batch(&[make_ops(456, vec![ + Op::QueryOpSet { + query: "modelVersionIds eq 456".into(), + ops: vec![ + Op::Remove { field: "baseModel".into(), value: json!("SD 1.5") }, + Op::Set { field: "baseModel".into(), value: json!("SDXL") }, + ], + }, + ])]).unwrap(); + + let mut reader = WalReader::new(&wal_path, 0); + let result = reader.read_batch(100).unwrap(); + assert_eq!(result.entries.len(), 1); + assert_eq!(result.entries[0].entity_id, 456); + match &result.entries[0].ops[0] { + Op::QueryOpSet { query, ops } => { + assert_eq!(query, "modelVersionIds eq 456"); + assert_eq!(ops.len(), 2); + } + _ => panic!("Expected QueryOpSet"), + } + } + + #[test] + fn test_file_size_tracking() { + let dir = TempDir::new().unwrap(); + let wal_path = dir.path().join("test.wal"); + let writer = WalWriter::new(&wal_path); + + assert_eq!(writer.file_size(), 0); + + writer.append_batch(&[ + make_ops(1, vec![Op::Delete]), + ]).unwrap(); + + assert!(writer.file_size() > 0); + } + + #[test] + fn test_remove_wal() { + let dir = TempDir::new().unwrap(); + let wal_path = dir.path().join("test.wal"); + + let writer = WalWriter::new(&wal_path); + writer.append_batch(&[make_ops(1, vec![Op::Delete])]).unwrap(); + assert!(wal_path.exists()); + + remove_wal(&wal_path).unwrap(); + assert!(!wal_path.exists()); + + // Remove non-existent is ok + remove_wal(&wal_path).unwrap(); + } +} diff --git a/src/pg_sync/bitdex_client.rs b/src/pg_sync/bitdex_client.rs index a5e45c79..50be9d2b 100644 --- a/src/pg_sync/bitdex_client.rs +++ b/src/pg_sync/bitdex_client.rs @@ -243,6 +243,24 @@ impl BitdexClient { .await; } + /// POST a batch of V2 ops to the BitDex /ops endpoint. + pub async fn post_ops(&self, batch: &super::ops::OpsBatch) -> Result<(), String> { + let url = format!("{}/ops", self.base_url); + let resp = self.client + .post(&url) + .json(batch) + .send() + .await + .map_err(|e| format!("post_ops request failed: {e}"))?; + + if !resp.status().is_success() { + let status = resp.status(); + let body = resp.text().await.unwrap_or_default(); + return Err(format!("post_ops returned {status}: {body}")); + } + Ok(()) + } + pub async fn get_cursor(&self, cursor_name: &str) -> Result, String> { let url = format!("{}/cursors/{}", self.base_url, cursor_name); let resp = self.client diff --git a/src/pg_sync/copy_queries.rs b/src/pg_sync/copy_queries.rs index 36bb568b..415e8062 100644 --- a/src/pg_sync/copy_queries.rs +++ b/src/pg_sync/copy_queries.rs @@ -723,6 +723,7 @@ mod tests { flags: (1 << 13), image_type: String::new(), user_id: 1, blocked_for: None, scanned_at_secs: None, created_at_secs: None, post_id: None, + width: None, height: None, published_at_secs: None, availability: String::new(), posted_to_id: None, }; assert!(row.has_meta()); @@ -739,6 +740,7 @@ mod tests { flags: (1 << 14), image_type: String::new(), user_id: 1, blocked_for: None, scanned_at_secs: None, created_at_secs: None, post_id: None, + width: None, height: None, published_at_secs: None, availability: String::new(), posted_to_id: None, }; assert!(row.on_site()); @@ -754,6 +756,7 @@ mod tests { scanned_at_secs: Some(100), created_at_secs: Some(200), published_at_secs: Some(150), + width: None, height: None, availability: String::new(), posted_to_id: None, post_id: None, }; assert_eq!(row.sort_at_secs(), 200); diff --git a/src/pg_sync/csv_ops.rs b/src/pg_sync/csv_ops.rs new file mode 100644 index 00000000..0d1ebbe9 --- /dev/null +++ b/src/pg_sync/csv_ops.rs @@ -0,0 +1,414 @@ +//! CSV→ops adapter for the dump pipeline. +//! +//! Reads existing CSV files (from PG COPY or local dumps) and transforms +//! each row into ops using the sync config schema. Writes ops to WAL files +//! for processing by the WAL reader thread. +//! +//! This is the local testing path and also the production dump path when +//! CSVs are pre-fetched to disk. + +use std::fs::File; +use std::io::{BufRead, BufReader}; +use std::path::Path; +use std::time::Instant; + +use serde_json::json; + +use super::copy_queries::{parse_image_row, parse_tag_row, parse_tool_row, CopyImageRow}; +use super::ops::{EntityOps, Op}; +use crate::ops_wal::WalWriter; + +/// Stats from a CSV→WAL conversion. +#[derive(Debug, Default)] +pub struct CsvOpsStats { + pub rows_read: u64, + pub rows_skipped: u64, + pub ops_written: u64, + pub bytes_written: u64, + pub elapsed_secs: f64, +} + +/// Convert images.csv to ops and write to WAL. +/// Each image row produces set ops for all tracked scalar fields. +pub fn images_csv_to_wal(csv_path: &Path, writer: &WalWriter, batch_size: usize) -> std::io::Result { + let start = Instant::now(); + let file = File::open(csv_path)?; + let reader = BufReader::with_capacity(8 * 1024 * 1024, file); + let mut stats = CsvOpsStats::default(); + let mut batch: Vec = Vec::with_capacity(batch_size); + + for line in reader.split(b'\n') { + let line = line?; + if line.is_empty() { + continue; + } + + let row = match parse_image_row(&line) { + Some(r) => r, + None => { + stats.rows_skipped += 1; + continue; + } + }; + stats.rows_read += 1; + + let ops = image_row_to_ops(&row); + batch.push(EntityOps { + entity_id: row.id, + ops, + creates_slot: true, // Image table creates alive slots + }); + + if batch.len() >= batch_size { + let bytes = writer.append_batch(&batch)?; + stats.ops_written += batch.len() as u64; + stats.bytes_written += bytes; + batch.clear(); + } + } + + // Flush remaining + if !batch.is_empty() { + let bytes = writer.append_batch(&batch)?; + stats.ops_written += batch.len() as u64; + stats.bytes_written += bytes; + } + + stats.elapsed_secs = start.elapsed().as_secs_f64(); + Ok(stats) +} + +/// Convert a single image CSV row to ops (public for direct dump path). +pub fn image_row_to_ops_pub(row: &CopyImageRow) -> Vec { + image_row_to_ops(row) +} + +/// Convert a single image CSV row to ops. +fn image_row_to_ops(row: &CopyImageRow) -> Vec { + let mut ops = Vec::with_capacity(12); + + ops.push(Op::Set { field: "nsfwLevel".into(), value: json!(row.nsfw_level) }); + ops.push(Op::Set { field: "type".into(), value: json!(row.image_type) }); + ops.push(Op::Set { field: "userId".into(), value: json!(row.user_id) }); + + if let Some(post_id) = row.post_id { + ops.push(Op::Set { field: "postId".into(), value: json!(post_id) }); + } + + // hasMeta and onSite from flags + let has_meta = row.has_meta(); + let on_site = row.on_site(); + ops.push(Op::Set { field: "hasMeta".into(), value: json!(has_meta) }); + ops.push(Op::Set { field: "onSite".into(), value: json!(on_site) }); + + // Minor and POI + let minor = row.minor(); + let poi = row.poi(); + ops.push(Op::Set { field: "minor".into(), value: json!(minor) }); + ops.push(Op::Set { field: "poi".into(), value: json!(poi) }); + + // existedAt = GREATEST(scannedAt, createdAt) in seconds + let existed_at = match (row.scanned_at_secs, row.created_at_secs) { + (Some(s), Some(c)) => s.max(c), + (Some(s), None) => s, + (None, Some(c)) => c, + (None, None) => 0, + }; + ops.push(Op::Set { field: "existedAt".into(), value: json!(existed_at) }); + + // blockedFor + if let Some(ref bf) = row.blocked_for { + ops.push(Op::Set { field: "blockedFor".into(), value: json!(bf) }); + } + + ops +} + +/// Convert tags.csv to add ops and write to WAL. +/// Each row: (tag_id, image_id) → add tagIds op on the image. +pub fn tags_csv_to_wal(csv_path: &Path, writer: &WalWriter, batch_size: usize) -> std::io::Result { + multi_value_csv_to_wal(csv_path, writer, batch_size, "tagIds", |line| { + // tags.csv: tag_id, image_id + parse_tag_row(line).map(|(tag_id, image_id)| (image_id, tag_id)) + }) +} + +/// Convert tools.csv to add ops and write to WAL. +pub fn tools_csv_to_wal(csv_path: &Path, writer: &WalWriter, batch_size: usize) -> std::io::Result { + multi_value_csv_to_wal(csv_path, writer, batch_size, "toolIds", |line| { + parse_tool_row(line).map(|(tool_id, image_id)| (image_id, tool_id)) + }) +} + +/// Generic multi-value CSV→WAL converter. +/// Parser returns (slot_id, value) pairs. +fn multi_value_csv_to_wal( + csv_path: &Path, + writer: &WalWriter, + batch_size: usize, + field_name: &str, + parser: impl Fn(&[u8]) -> Option<(i64, i64)>, +) -> std::io::Result { + let start = Instant::now(); + let file = File::open(csv_path)?; + let reader = BufReader::with_capacity(8 * 1024 * 1024, file); + let mut stats = CsvOpsStats::default(); + let mut batch: Vec = Vec::with_capacity(batch_size); + + for line in reader.split(b'\n') { + let line = line?; + if line.is_empty() { + continue; + } + + let (slot_id, value) = match parser(&line) { + Some(pair) => pair, + None => { + stats.rows_skipped += 1; + continue; + } + }; + stats.rows_read += 1; + + batch.push(EntityOps { + entity_id: slot_id, + ops: vec![Op::Add { + field: field_name.to_string(), + value: json!(value), + }], + creates_slot: false, // Join tables don't create alive slots + }); + + if batch.len() >= batch_size { + let bytes = writer.append_batch(&batch)?; + stats.ops_written += batch.len() as u64; + stats.bytes_written += bytes; + batch.clear(); + } + } + + if !batch.is_empty() { + let bytes = writer.append_batch(&batch)?; + stats.ops_written += batch.len() as u64; + stats.bytes_written += bytes; + } + + stats.elapsed_secs = start.elapsed().as_secs_f64(); + Ok(stats) +} + +/// Run the full CSV dump pipeline: read all CSVs, convert to ops, write to WAL. +/// Returns per-table stats. +pub fn run_csv_dump( + csv_dir: &Path, + wal_path: &Path, + batch_size: usize, + limit: Option, +) -> std::io::Result> { + let writer = WalWriter::new(wal_path); + let mut results = Vec::new(); + + // Phase 1: Images (must be first — sets alive + scalar fields) + let images_csv = csv_dir.join("images.csv"); + if images_csv.exists() { + eprintln!("CSV dump: loading images.csv..."); + let stats = if let Some(max) = limit { + images_csv_to_wal_limited(&images_csv, &writer, batch_size, max)? + } else { + images_csv_to_wal(&images_csv, &writer, batch_size)? + }; + eprintln!( + " images: {} rows, {} ops, {:.1}s ({:.0}/s)", + stats.rows_read, stats.ops_written, stats.elapsed_secs, + stats.rows_read as f64 / stats.elapsed_secs.max(0.001) + ); + results.push(("images".into(), stats)); + } + + // Phase 2: Multi-value tables (parallel-safe, but sequential here for simplicity) + let tags_csv = csv_dir.join("tags.csv"); + if tags_csv.exists() { + eprintln!("CSV dump: loading tags.csv..."); + let stats = if let Some(max) = limit { + multi_value_csv_to_wal_limited(&tags_csv, &writer, batch_size, "tagIds", max, |line| { + parse_tag_row(line).map(|(tag_id, image_id)| (image_id, tag_id)) + })? + } else { + tags_csv_to_wal(&tags_csv, &writer, batch_size)? + }; + eprintln!( + " tags: {} rows, {} ops, {:.1}s ({:.0}/s)", + stats.rows_read, stats.ops_written, stats.elapsed_secs, + stats.rows_read as f64 / stats.elapsed_secs.max(0.001) + ); + results.push(("tags".into(), stats)); + } + + let tools_csv = csv_dir.join("tools.csv"); + if tools_csv.exists() { + eprintln!("CSV dump: loading tools.csv..."); + let stats = if let Some(max) = limit { + multi_value_csv_to_wal_limited(&tools_csv, &writer, batch_size, "toolIds", max, |line| { + parse_tool_row(line).map(|(tool_id, image_id)| (image_id, tool_id)) + })? + } else { + tools_csv_to_wal(&tools_csv, &writer, batch_size)? + }; + eprintln!( + " tools: {} rows, {} ops, {:.1}s ({:.0}/s)", + stats.rows_read, stats.ops_written, stats.elapsed_secs, + stats.rows_read as f64 / stats.elapsed_secs.max(0.001) + ); + results.push(("tools".into(), stats)); + } + + Ok(results) +} + +/// Limited version of images_csv_to_wal — stops after `limit` rows. +fn images_csv_to_wal_limited(csv_path: &Path, writer: &WalWriter, batch_size: usize, limit: u64) -> std::io::Result { + let start = Instant::now(); + let file = File::open(csv_path)?; + let reader = BufReader::with_capacity(8 * 1024 * 1024, file); + let mut stats = CsvOpsStats::default(); + let mut batch: Vec = Vec::with_capacity(batch_size); + + for line in reader.split(b'\n') { + if stats.rows_read >= limit { + break; + } + let line = line?; + if line.is_empty() { continue; } + let row = match parse_image_row(&line) { + Some(r) => r, + None => { stats.rows_skipped += 1; continue; } + }; + stats.rows_read += 1; + batch.push(EntityOps { entity_id: row.id, ops: image_row_to_ops(&row), creates_slot: true }); + if batch.len() >= batch_size { + let bytes = writer.append_batch(&batch)?; + stats.ops_written += batch.len() as u64; + stats.bytes_written += bytes; + batch.clear(); + } + } + if !batch.is_empty() { + let bytes = writer.append_batch(&batch)?; + stats.ops_written += batch.len() as u64; + stats.bytes_written += bytes; + } + stats.elapsed_secs = start.elapsed().as_secs_f64(); + Ok(stats) +} + +/// Limited version of multi_value_csv_to_wal. +fn multi_value_csv_to_wal_limited( + csv_path: &Path, + writer: &WalWriter, + batch_size: usize, + field_name: &str, + limit: u64, + parser: impl Fn(&[u8]) -> Option<(i64, i64)>, +) -> std::io::Result { + let start = Instant::now(); + let file = File::open(csv_path)?; + let reader = BufReader::with_capacity(8 * 1024 * 1024, file); + let mut stats = CsvOpsStats::default(); + let mut batch: Vec = Vec::with_capacity(batch_size); + + for line in reader.split(b'\n') { + if stats.rows_read >= limit { + break; + } + let line = line?; + if line.is_empty() { continue; } + let (slot_id, value) = match parser(&line) { + Some(pair) => pair, + None => { stats.rows_skipped += 1; continue; } + }; + stats.rows_read += 1; + batch.push(EntityOps { + entity_id: slot_id, + ops: vec![Op::Add { field: field_name.to_string(), value: json!(value) }], + creates_slot: false, + }); + if batch.len() >= batch_size { + let bytes = writer.append_batch(&batch)?; + stats.ops_written += batch.len() as u64; + stats.bytes_written += bytes; + batch.clear(); + } + } + if !batch.is_empty() { + let bytes = writer.append_batch(&batch)?; + stats.ops_written += batch.len() as u64; + stats.bytes_written += bytes; + } + stats.elapsed_secs = start.elapsed().as_secs_f64(); + Ok(stats) +} + +#[cfg(test)] +mod tests { + use super::*; + use tempfile::TempDir; + + #[test] + fn test_image_row_to_ops() { + let row = CopyImageRow { + id: 1, + url: Some("test.jpg".into()), + nsfw_level: 16, + hash: None, + flags: (1 << 13), // hasMeta=true + image_type: "image".into(), + user_id: 42, + blocked_for: None, + scanned_at_secs: Some(1000), + created_at_secs: Some(2000), + post_id: Some(100), + width: None, + height: None, + published_at_secs: None, + availability: String::new(), + posted_to_id: None, + }; + let ops = image_row_to_ops(&row); + // Should have: nsfwLevel, type, userId, postId, hasMeta, onSite, minor, poi, existedAt + assert!(ops.len() >= 9); + + // Check nsfwLevel + let nsfw = ops.iter().find(|o| matches!(o, Op::Set { field, .. } if field == "nsfwLevel")).unwrap(); + if let Op::Set { value, .. } = nsfw { assert_eq!(*value, json!(16)); } + + // Check existedAt = max(1000, 2000) = 2000 + let existed = ops.iter().find(|o| matches!(o, Op::Set { field, .. } if field == "existedAt")).unwrap(); + if let Op::Set { value, .. } = existed { assert_eq!(*value, json!(2000)); } + + // Check hasMeta (flags bit 13 set) + let has_meta = ops.iter().find(|o| matches!(o, Op::Set { field, .. } if field == "hasMeta")).unwrap(); + if let Op::Set { value, .. } = has_meta { assert_eq!(*value, json!(true)); } + } + + #[test] + fn test_csv_to_wal_roundtrip() { + let dir = TempDir::new().unwrap(); + let csv_path = dir.path().join("images.csv"); + let wal_path = dir.path().join("ops.wal"); + + // Write a tiny CSV (comma-separated, matching PG COPY CSV format) + std::fs::write(&csv_path, b"1,http://img.jpg,16,,8192,image,42,,1000,2000,100\n2,,1,,0,video,99,,500,600,200\n").unwrap(); + + let stats = images_csv_to_wal(&csv_path, &WalWriter::new(&wal_path), 100).unwrap(); + assert_eq!(stats.rows_read, 2); + assert_eq!(stats.ops_written, 2); + assert!(stats.bytes_written > 0); + + // Read back from WAL + let mut reader = crate::ops_wal::WalReader::new(&wal_path, 0); + let batch = reader.read_batch(100).unwrap(); + assert_eq!(batch.entries.len(), 2); + assert_eq!(batch.entries[0].entity_id, 1); + assert_eq!(batch.entries[1].entity_id, 2); + } +} diff --git a/src/pg_sync/dump.rs b/src/pg_sync/dump.rs new file mode 100644 index 00000000..1de7ee99 --- /dev/null +++ b/src/pg_sync/dump.rs @@ -0,0 +1,297 @@ +//! Dump pipeline — manages table dump lifecycle for initial loading. +//! +//! Server side: dump registry (track which tables have been loaded). +//! Client side: pg-sync checks dump history, runs flat COPYs, writes WAL files. +//! +//! Dump lifecycle: +//! 1. PUT /dumps — register a new dump (returns task ID, WAL reader starts polling) +//! 2. pg-sync writes ops to WAL file on shared filesystem +//! 3. POST /dumps/{name}/loaded — signal file is complete +//! 4. WAL reader finishes processing, marks dump as complete +//! 5. GET /dumps — check status per table + +use std::collections::HashMap; +use std::path::{Path, PathBuf}; +use std::time::SystemTime; + +use serde::{Deserialize, Serialize}; + +/// State of a single dump. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct DumpEntry { + /// Dump name (e.g., "Image-a1b2c3d4") + pub name: String, + /// WAL file path (relative to data_dir) + pub wal_path: Option, + /// Current status + pub status: DumpStatus, + /// Number of ops written (reported by pg-sync) + pub ops_written: u64, + /// Number of ops processed by WAL reader + pub ops_processed: u64, + /// When the dump was registered + pub created_at: u64, + /// When the dump completed processing + pub completed_at: Option, +} + +/// Dump status. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +pub enum DumpStatus { + /// pg-sync is writing to the WAL file + Writing, + /// pg-sync signaled the file is complete, WAL reader is processing + Loading, + /// WAL reader finished processing + Complete, + /// Dump failed + Failed(String), +} + +/// Registry of dump state. Persisted to dumps.json in the data directory. +#[derive(Debug, Clone, Serialize, Deserialize, Default)] +pub struct DumpRegistry { + pub dumps: HashMap, +} + +impl DumpRegistry { + /// Load from a JSON file. Returns empty registry if file doesn't exist. + pub fn load(path: &Path) -> Self { + std::fs::read_to_string(path) + .ok() + .and_then(|s| serde_json::from_str(&s).ok()) + .unwrap_or_default() + } + + /// Save to a JSON file. + pub fn save(&self, path: &Path) -> std::io::Result<()> { + let json = serde_json::to_string_pretty(self) + .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))?; + // Atomic write via temp file + let tmp = path.with_extension("tmp"); + std::fs::write(&tmp, &json)?; + std::fs::rename(&tmp, path)?; + Ok(()) + } + + /// Register a new dump. Returns the entry. + pub fn register(&mut self, name: String, wal_path: Option) -> &DumpEntry { + let now = SystemTime::now() + .duration_since(SystemTime::UNIX_EPOCH) + .unwrap_or_default() + .as_secs(); + + self.dumps.insert( + name.clone(), + DumpEntry { + name: name.clone(), + wal_path, + status: DumpStatus::Writing, + ops_written: 0, + ops_processed: 0, + created_at: now, + completed_at: None, + }, + ); + &self.dumps[&name] + } + + /// Mark a dump as loaded (pg-sync finished writing the WAL file). + pub fn mark_loaded(&mut self, name: &str, ops_written: u64) -> Option<&DumpEntry> { + if let Some(entry) = self.dumps.get_mut(name) { + entry.status = DumpStatus::Loading; + entry.ops_written = ops_written; + Some(entry) + } else { + None + } + } + + /// Mark a dump as complete (WAL reader finished processing). + pub fn mark_complete(&mut self, name: &str, ops_processed: u64) -> Option<&DumpEntry> { + let now = SystemTime::now() + .duration_since(SystemTime::UNIX_EPOCH) + .unwrap_or_default() + .as_secs(); + + if let Some(entry) = self.dumps.get_mut(name) { + entry.status = DumpStatus::Complete; + entry.ops_processed = ops_processed; + entry.completed_at = Some(now); + Some(entry) + } else { + None + } + } + + /// Mark a dump as failed. + pub fn mark_failed(&mut self, name: &str, error: String) { + if let Some(entry) = self.dumps.get_mut(name) { + entry.status = DumpStatus::Failed(error); + } + } + + /// Remove a dump from the registry. + pub fn remove(&mut self, name: &str) -> Option { + self.dumps.remove(name) + } + + /// Clear all dumps. + pub fn clear(&mut self) { + self.dumps.clear(); + } + + /// Check if a dump with the given name exists and is complete. + pub fn is_complete(&self, name: &str) -> bool { + self.dumps + .get(name) + .map(|e| e.status == DumpStatus::Complete) + .unwrap_or(false) + } + + /// Get all dump names that are complete. + pub fn completed_names(&self) -> Vec<&str> { + self.dumps + .values() + .filter(|e| e.status == DumpStatus::Complete) + .map(|e| e.name.as_str()) + .collect() + } + + /// Check if all dumps are complete (no pending/writing/loading). + pub fn all_complete(&self) -> bool { + !self.dumps.is_empty() + && self.dumps.values().all(|e| e.status == DumpStatus::Complete) + } +} + +/// Build the dump name from a table name and config hash. +/// Format: "{Table}-{hash8}" +pub fn dump_name(table: &str, config_hash: &str) -> String { + format!("{}-{}", table, &config_hash[..8.min(config_hash.len())]) +} + +/// Compute a config hash for a sync source entry. +pub fn config_hash(yaml_fragment: &str) -> String { + use std::collections::hash_map::DefaultHasher; + use std::hash::{Hash, Hasher}; + let mut hasher = DefaultHasher::new(); + yaml_fragment.hash(&mut hasher); + format!("{:016x}", hasher.finish()) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_dump_lifecycle() { + let mut reg = DumpRegistry::default(); + assert!(reg.dumps.is_empty()); + + // Register + reg.register("Image-a1b2c3d4".into(), Some("dumps/image.wal".into())); + assert_eq!(reg.dumps.len(), 1); + assert_eq!(reg.dumps["Image-a1b2c3d4"].status, DumpStatus::Writing); + + // Mark loaded + reg.mark_loaded("Image-a1b2c3d4", 107_000_000); + assert_eq!(reg.dumps["Image-a1b2c3d4"].status, DumpStatus::Loading); + assert_eq!(reg.dumps["Image-a1b2c3d4"].ops_written, 107_000_000); + + // Mark complete + reg.mark_complete("Image-a1b2c3d4", 107_000_000); + assert_eq!(reg.dumps["Image-a1b2c3d4"].status, DumpStatus::Complete); + assert!(reg.dumps["Image-a1b2c3d4"].completed_at.is_some()); + + assert!(reg.is_complete("Image-a1b2c3d4")); + assert!(reg.all_complete()); + } + + #[test] + fn test_dump_persistence() { + let dir = tempfile::TempDir::new().unwrap(); + let path = dir.path().join("dumps.json"); + + let mut reg = DumpRegistry::default(); + reg.register("Image-abc".into(), None); + reg.mark_complete("Image-abc", 100); + reg.save(&path).unwrap(); + + let loaded = DumpRegistry::load(&path); + assert_eq!(loaded.dumps.len(), 1); + assert!(loaded.is_complete("Image-abc")); + } + + #[test] + fn test_dump_removal() { + let mut reg = DumpRegistry::default(); + reg.register("Image-abc".into(), None); + reg.register("Tags-def".into(), None); + assert_eq!(reg.dumps.len(), 2); + + reg.remove("Image-abc"); + assert_eq!(reg.dumps.len(), 1); + assert!(!reg.dumps.contains_key("Image-abc")); + } + + #[test] + fn test_dump_clear() { + let mut reg = DumpRegistry::default(); + reg.register("Image-abc".into(), None); + reg.register("Tags-def".into(), None); + reg.clear(); + assert!(reg.dumps.is_empty()); + } + + #[test] + fn test_all_complete() { + let mut reg = DumpRegistry::default(); + assert!(!reg.all_complete()); // Empty = not all complete + + reg.register("Image-abc".into(), None); + reg.register("Tags-def".into(), None); + assert!(!reg.all_complete()); + + reg.mark_complete("Image-abc", 100); + assert!(!reg.all_complete()); // Tags still pending + + reg.mark_loaded("Tags-def", 50); + reg.mark_complete("Tags-def", 50); + assert!(reg.all_complete()); + } + + #[test] + fn test_dump_name() { + assert_eq!(dump_name("Image", "a1b2c3d4e5f6"), "Image-a1b2c3d4"); + } + + #[test] + fn test_config_hash_deterministic() { + let h1 = config_hash("table: Image\nslot_field: id\ntrack_fields: [nsfwLevel]"); + let h2 = config_hash("table: Image\nslot_field: id\ntrack_fields: [nsfwLevel]"); + assert_eq!(h1, h2); + } + + #[test] + fn test_config_hash_changes() { + let h1 = config_hash("table: Image\ntrack_fields: [nsfwLevel]"); + let h2 = config_hash("table: Image\ntrack_fields: [nsfwLevel, type]"); + assert_ne!(h1, h2); + } + + #[test] + fn test_load_missing_file() { + let reg = DumpRegistry::load(Path::new("/nonexistent/dumps.json")); + assert!(reg.dumps.is_empty()); + } + + #[test] + fn test_failed_dump() { + let mut reg = DumpRegistry::default(); + reg.register("Image-abc".into(), None); + reg.mark_failed("Image-abc", "connection reset".into()); + assert!(matches!(reg.dumps["Image-abc"].status, DumpStatus::Failed(_))); + assert!(!reg.is_complete("Image-abc")); + } +} diff --git a/src/pg_sync/metrics_poller.rs b/src/pg_sync/metrics_poller.rs index fe91f9fb..b7059d53 100644 --- a/src/pg_sync/metrics_poller.rs +++ b/src/pg_sync/metrics_poller.rs @@ -1,18 +1,20 @@ //! ClickHouse metrics poller: polls for recent metric events, fetches aggregate -//! counts, rebuilds full docs from PG, and pushes to Bitdex. +//! counts, and pushes sort-field ops to BitDex via the V2 ops pipeline. //! //! ClickHouse is queried via its HTTP interface (POST with SQL). +//! Metrics (reactionCount, commentCount, collectedCount) are sort-only fields, +//! so ops are sent with `creates_slot: false` — they update existing slots +//! without touching the alive bitmap. use std::collections::HashMap; use std::time::{SystemTime, UNIX_EPOCH}; use reqwest::Client; -use sqlx::PgPool; +use serde_json::json; use tokio::time::{Duration, interval}; use super::bitdex_client::BitdexClient; -use super::queries; -use super::row_assembler::{assemble_batch, EnrichmentData, MetricInfo}; +use super::ops::{EntityOps, Op, OpsBatch, SyncMeta}; /// ClickHouse connection config. pub struct ClickHouseConfig { @@ -21,9 +23,19 @@ pub struct ClickHouseConfig { pub password: Option, } +/// Aggregate metric counts for a single image from ClickHouse. +struct MetricInfo { + reaction_count: i64, + comment_count: i64, + collected_count: i64, +} + /// Run the ClickHouse metrics poller loop. Runs forever until cancelled. +/// +/// V2 pipeline: fetches aggregate counts from ClickHouse, converts them to +/// `Op::Set` ops for sort fields, and POSTs via the `/ops` endpoint. +/// No PG round-trip needed — metrics are self-contained sort-field updates. pub async fn run_metrics_poller( - pool: &PgPool, ch_config: &ClickHouseConfig, bitdex_client: &BitdexClient, poll_interval_secs: u64, @@ -41,7 +53,7 @@ pub async fn run_metrics_poller( loop { ticker.tick().await; - // Health gate: skip ClickHouse + PG fetch if BitDex is unreachable. + // Health gate: skip ClickHouse fetch if BitDex is unreachable. if !bitdex_client.is_healthy().await { if !bitdex_was_down { eprintln!("Metrics: BitDex is unreachable, pausing until healthy"); @@ -56,10 +68,10 @@ pub async fn run_metrics_poller( let now = current_epoch_secs(); - match poll_metrics_and_push(pool, &http, ch_config, bitdex_client, last_poll_ts).await { + match poll_metrics_and_push(&http, ch_config, bitdex_client, last_poll_ts).await { Ok(count) => { if count > 0 { - eprintln!("Metrics: updated {count} documents"); + eprintln!("Metrics: pushed {count} ops batches"); } last_poll_ts = now; } @@ -78,9 +90,12 @@ fn current_epoch_secs() -> i64 { .as_secs() as i64 } -/// Single poll + push cycle. +/// Maximum number of entity ops per HTTP request to `/ops`. +/// Keeps request bodies reasonable and avoids timeouts. +const OPS_BATCH_SIZE: usize = 5_000; + +/// Single poll + push cycle. Fetches CH metrics, converts to V2 ops, POSTs to BitDex. async fn poll_metrics_and_push( - pool: &PgPool, http: &Client, ch_config: &ClickHouseConfig, bitdex_client: &BitdexClient, @@ -93,41 +108,25 @@ async fn poll_metrics_and_push( return Ok(0); } - let image_ids: Vec = metrics.keys().copied().collect(); - - // Fetch full documents from PG (same enrichment pipeline as outbox) - let images = queries::fetch_images_by_ids(pool, &image_ids) - .await - .map_err(|e| format!("fetch_images_by_ids: {e}"))?; - - if images.is_empty() { - return Ok(0); - } - - let fetched_ids: Vec = images.iter().map(|r| r.id).collect(); - - let (tags, tools, techniques, resources) = tokio::try_join!( - queries::fetch_tags(pool, &fetched_ids), - queries::fetch_tools(pool, &fetched_ids), - queries::fetch_techniques(pool, &fetched_ids), - queries::fetch_resources(pool, &fetched_ids), - ) - .map_err(|e| format!("enrichment queries: {e}"))?; - - let mut enrichment = EnrichmentData::from_rows(tags, tools, techniques, resources); - - // Merge ClickHouse metrics into enrichment - enrichment.metrics = metrics; - - let docs = assemble_batch(&images, &enrichment); - let count = docs.len(); - - // Use PATCH for metrics updates — preserves fields not included in this update. - if !docs.is_empty() { - bitdex_client.patch_batch(&docs, None).await?; + let entity_ops = metrics_to_entity_ops(metrics); + + let total = entity_ops.len(); + + // Send in batches to keep request sizes manageable. + for chunk in entity_ops.chunks(OPS_BATCH_SIZE) { + let batch = OpsBatch { + ops: chunk.to_vec(), + meta: Some(SyncMeta { + source: "clickhouse-metrics".into(), + cursor: None, + max_id: None, + lag_rows: None, + }), + }; + bitdex_client.post_ops(&batch).await?; } - Ok(count) + Ok(total) } /// Query ClickHouse HTTP interface for aggregate metrics. @@ -214,3 +213,188 @@ async fn fetch_metrics_from_clickhouse( Ok(metrics) } + +/// Convert a map of CH metrics into V2 EntityOps. +/// +/// Each image gets three `Op::Set` ops (reactionCount, commentCount, collectedCount). +/// `creates_slot` is false because these are sort-only field updates — they should +/// never create new alive slots. +fn metrics_to_entity_ops(metrics: HashMap) -> Vec { + metrics + .into_iter() + .map(|(image_id, info)| { + EntityOps::new( + image_id, + vec![ + Op::Set { + field: "reactionCount".into(), + value: json!(info.reaction_count), + }, + Op::Set { + field: "commentCount".into(), + value: json!(info.comment_count), + }, + Op::Set { + field: "collectedCount".into(), + value: json!(info.collected_count), + }, + ], + ) + }) + .collect() +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_metrics_to_entity_ops_single() { + let mut metrics = HashMap::new(); + metrics.insert( + 42, + MetricInfo { + reaction_count: 100, + comment_count: 5, + collected_count: 3, + }, + ); + + let ops = metrics_to_entity_ops(metrics); + assert_eq!(ops.len(), 1); + + let entity = &ops[0]; + assert_eq!(entity.entity_id, 42); + assert!(!entity.creates_slot, "metrics ops must not create slots"); + assert_eq!(entity.ops.len(), 3); + + // Verify all three sort fields are present as Set ops + let fields: Vec<&str> = entity + .ops + .iter() + .filter_map(|op| match op { + Op::Set { field, .. } => Some(field.as_str()), + _ => None, + }) + .collect(); + assert!(fields.contains(&"reactionCount")); + assert!(fields.contains(&"commentCount")); + assert!(fields.contains(&"collectedCount")); + } + + #[test] + fn test_metrics_to_entity_ops_values() { + let mut metrics = HashMap::new(); + metrics.insert( + 99, + MetricInfo { + reaction_count: 1234, + comment_count: 56, + collected_count: 78, + }, + ); + + let ops = metrics_to_entity_ops(metrics); + let entity = &ops[0]; + + for op in &entity.ops { + match op { + Op::Set { field, value } => match field.as_str() { + "reactionCount" => assert_eq!(value, &json!(1234)), + "commentCount" => assert_eq!(value, &json!(56)), + "collectedCount" => assert_eq!(value, &json!(78)), + other => panic!("unexpected field: {other}"), + }, + other => panic!("expected Op::Set, got {other:?}"), + } + } + } + + #[test] + fn test_metrics_to_entity_ops_empty() { + let metrics = HashMap::new(); + let ops = metrics_to_entity_ops(metrics); + assert!(ops.is_empty()); + } + + #[test] + fn test_metrics_to_entity_ops_multiple_images() { + let mut metrics = HashMap::new(); + for id in 1..=100 { + metrics.insert( + id, + MetricInfo { + reaction_count: id * 10, + comment_count: id, + collected_count: id / 2, + }, + ); + } + + let ops = metrics_to_entity_ops(metrics); + assert_eq!(ops.len(), 100); + + // Every entry should have creates_slot = false and 3 ops + for entity in &ops { + assert!(!entity.creates_slot); + assert_eq!(entity.ops.len(), 3); + } + } + + #[test] + fn test_metrics_ops_batch_serialization() { + let mut metrics = HashMap::new(); + metrics.insert( + 42, + MetricInfo { + reaction_count: 100, + comment_count: 5, + collected_count: 3, + }, + ); + + let entity_ops = metrics_to_entity_ops(metrics); + let batch = OpsBatch { + ops: entity_ops, + meta: Some(SyncMeta { + source: "clickhouse-metrics".into(), + cursor: None, + max_id: None, + lag_rows: None, + }), + }; + + // Verify it serializes to valid JSON matching the expected ops format + let json = serde_json::to_value(&batch).unwrap(); + assert_eq!(json["meta"]["source"], "clickhouse-metrics"); + assert_eq!(json["ops"].as_array().unwrap().len(), 1); + + let first = &json["ops"][0]; + assert_eq!(first["entity_id"], 42); + assert_eq!(first["creates_slot"], false); + assert_eq!(first["ops"].as_array().unwrap().len(), 3); + } + + #[test] + fn test_metrics_zero_counts() { + let mut metrics = HashMap::new(); + metrics.insert( + 1, + MetricInfo { + reaction_count: 0, + comment_count: 0, + collected_count: 0, + }, + ); + + let ops = metrics_to_entity_ops(metrics); + assert_eq!(ops.len(), 1); + // Zero counts should still produce Set ops (correct cumulative value) + assert_eq!(ops[0].ops.len(), 3); + for op in &ops[0].ops { + if let Op::Set { value, .. } = op { + assert_eq!(value, &json!(0)); + } + } + } +} diff --git a/src/pg_sync/mod.rs b/src/pg_sync/mod.rs index 839aa4a3..d1cbcff7 100644 --- a/src/pg_sync/mod.rs +++ b/src/pg_sync/mod.rs @@ -11,8 +11,14 @@ pub mod bulk_loader; pub mod config; pub mod copy_queries; pub mod copy_streams; +pub mod csv_ops; +pub mod dump; pub mod metrics_poller; +pub mod op_dedup; +pub mod ops; +pub mod ops_poller; pub mod outbox_poller; +pub mod trigger_gen; pub mod progress; pub mod queries; pub mod row_assembler; diff --git a/src/pg_sync/op_dedup.rs b/src/pg_sync/op_dedup.rs new file mode 100644 index 00000000..0e889183 --- /dev/null +++ b/src/pg_sync/op_dedup.rs @@ -0,0 +1,294 @@ +//! Op deduplication and compression. +//! +//! Shared helper used by both pg-sync (before sending) and the WAL reader +//! (before applying). Two layers of dedup catch duplicates at both stages. +//! +//! Rules: +//! - LIFO per (entity_id, field): last op wins for set/remove pairs +//! - Add/remove cancellation: add X then remove X = net zero, dropped +//! - QueryOpSet dedup: by (entity_id, query string), last wins +//! - Delete absorbs all prior ops for the same entity_id + +use std::collections::HashMap; + +use super::ops::{EntityOps, Op}; + +/// Deduplicate a batch of entity ops in-place. +/// +/// Processes ops in order (oldest first), applying LIFO semantics: +/// for each (entity_id, field), only the last op survives. +/// Add/remove cancellation eliminates net-zero multi-value ops. +/// A delete op absorbs all prior ops for that entity. +pub fn dedup_ops(batch: &mut Vec) { + // Phase 1: Merge all ops per entity_id, preserving creates_slot (OR across sources) + let mut entity_map: HashMap> = HashMap::new(); + let mut creates_slot_map: HashMap = HashMap::new(); + for entry in batch.drain(..) { + entity_map + .entry(entry.entity_id) + .or_default() + .extend(entry.ops); + // If ANY source for this entity sets creates_slot, preserve it + if entry.creates_slot { + creates_slot_map.insert(entry.entity_id, true); + } + } + + // Phase 2: Dedup ops within each entity + for (_entity_id, ops) in &mut entity_map { + dedup_entity_ops(ops); + } + + // Phase 3: Rebuild batch, dropping empty entries + *batch = entity_map + .into_iter() + .filter(|(_, ops)| !ops.is_empty()) + .map(|(entity_id, ops)| EntityOps { + entity_id, + ops, + creates_slot: creates_slot_map.get(&entity_id).copied().unwrap_or(false), + }) + .collect(); +} + +/// Dedup ops for a single entity. Mutates the vec in place. +fn dedup_entity_ops(ops: &mut Vec) { + if ops.is_empty() { + return; + } + + // If there's a Delete, it absorbs everything — only keep the delete + if ops.iter().any(|op| matches!(op, Op::Delete)) { + ops.clear(); + ops.push(Op::Delete); + return; + } + + // First pass: collect all ops, tracking which fields have Set ops + let mut all_ops: Vec = ops.drain(..).collect(); + let mut set_fields: std::collections::HashSet = std::collections::HashSet::new(); + for op in &all_ops { + if let Op::Set { field, .. } = op { + set_fields.insert(field.clone()); + } + } + + // LIFO for set/remove on scalar fields (paired with Set = old value cleanup) + let mut last_set: HashMap = HashMap::new(); + let mut last_remove: HashMap = HashMap::new(); + + // Track add/remove for multi-value fields (net operations) + // Key: (field, value_as_string), Value: net count (+1 for add, -1 for remove) + let mut multi_value_net: HashMap<(String, String), i64> = HashMap::new(); + + // Track queryOpSet by query string (last wins) + let mut query_ops: HashMap> = HashMap::new(); + + for op in all_ops { + match op { + Op::Set { ref field, ref value } => { + last_set.insert(field.clone(), value.clone()); + } + Op::Remove { ref field, ref value } => { + if set_fields.contains(field) { + // Scalar field: this remove is paired with a set (old value cleanup) + last_remove.insert(field.clone(), value.clone()); + } else { + // Multi-value field: track net operations + let key = (field.clone(), value.to_string()); + *multi_value_net.entry(key).or_insert(0) -= 1; + } + } + Op::Add { ref field, ref value } => { + let key = (field.clone(), value.to_string()); + *multi_value_net.entry(key).or_insert(0) += 1; + } + Op::QueryOpSet { ref query, ops: ref nested_ops } => { + query_ops.insert(query.clone(), nested_ops.clone()); + } + Op::Delete => unreachable!("handled above"), + } + } + + // Rebuild: remove ops first, then set ops (order matters for bitmap updates) + for (field, value) in &last_remove { + ops.push(Op::Remove { + field: field.clone(), + value: value.clone(), + }); + } + + for (field, value) in last_set { + ops.push(Op::Set { field, value }); + } + + // Multi-value: emit net operations + for ((field, value_str), net) in multi_value_net { + if net == 0 { + continue; // Cancelled out + } + let value: serde_json::Value = serde_json::from_str(&value_str) + .unwrap_or(serde_json::Value::String(value_str)); + if net > 0 { + ops.push(Op::Add { field, value }); + } else { + ops.push(Op::Remove { field, value }); + } + } + + // QueryOpSets: last query string wins + for (query, nested) in query_ops { + ops.push(Op::QueryOpSet { query, ops: nested }); + } +} + +#[cfg(test)] +mod tests { + use super::*; + use serde_json::json; + + fn entity(id: i64, ops: Vec) -> EntityOps { + EntityOps { entity_id: id, ops, creates_slot: false } + } + + #[test] + fn test_lifo_set_same_field() { + let mut batch = vec![ + entity(1, vec![ + Op::Set { field: "nsfwLevel".into(), value: json!(8) }, + ]), + entity(1, vec![ + Op::Set { field: "nsfwLevel".into(), value: json!(16) }, + ]), + ]; + dedup_ops(&mut batch); + assert_eq!(batch.len(), 1); + let ops = &batch[0].ops; + // Last set wins + let set_op = ops.iter().find(|op| matches!(op, Op::Set { field, .. } if field == "nsfwLevel")).unwrap(); + if let Op::Set { value, .. } = set_op { + assert_eq!(*value, json!(16)); + } + } + + #[test] + fn test_different_fields_preserved() { + let mut batch = vec![entity(1, vec![ + Op::Set { field: "nsfwLevel".into(), value: json!(16) }, + Op::Set { field: "type".into(), value: json!("video") }, + ])]; + dedup_ops(&mut batch); + assert_eq!(batch[0].ops.len(), 2); + } + + #[test] + fn test_add_remove_cancellation() { + let mut batch = vec![entity(1, vec![ + Op::Add { field: "tagIds".into(), value: json!(42) }, + Op::Remove { field: "tagIds".into(), value: json!(42) }, + ])]; + dedup_ops(&mut batch); + // Net zero — entity should be dropped entirely + assert!(batch.is_empty() || batch[0].ops.is_empty()); + } + + #[test] + fn test_add_survives_when_no_cancel() { + let mut batch = vec![entity(1, vec![ + Op::Add { field: "tagIds".into(), value: json!(42) }, + Op::Add { field: "tagIds".into(), value: json!(99) }, + ])]; + dedup_ops(&mut batch); + assert_eq!(batch.len(), 1); + let adds: Vec<_> = batch[0].ops.iter() + .filter(|op| matches!(op, Op::Add { .. })) + .collect(); + assert_eq!(adds.len(), 2); + } + + #[test] + fn test_delete_absorbs_all() { + let mut batch = vec![ + entity(1, vec![ + Op::Set { field: "nsfwLevel".into(), value: json!(16) }, + Op::Add { field: "tagIds".into(), value: json!(42) }, + ]), + entity(1, vec![Op::Delete]), + ]; + dedup_ops(&mut batch); + assert_eq!(batch.len(), 1); + assert_eq!(batch[0].ops.len(), 1); + assert!(matches!(&batch[0].ops[0], Op::Delete)); + } + + #[test] + fn test_different_entities_independent() { + let mut batch = vec![ + entity(1, vec![Op::Set { field: "nsfwLevel".into(), value: json!(16) }]), + entity(2, vec![Op::Set { field: "nsfwLevel".into(), value: json!(32) }]), + ]; + dedup_ops(&mut batch); + assert_eq!(batch.len(), 2); + } + + #[test] + fn test_query_op_set_last_wins() { + let mut batch = vec![entity(456, vec![ + Op::QueryOpSet { + query: "modelVersionIds eq 456".into(), + ops: vec![Op::Set { field: "baseModel".into(), value: json!("SD 1.5") }], + }, + Op::QueryOpSet { + query: "modelVersionIds eq 456".into(), + ops: vec![Op::Set { field: "baseModel".into(), value: json!("SDXL") }], + }, + ])]; + dedup_ops(&mut batch); + let qops: Vec<_> = batch[0].ops.iter() + .filter(|op| matches!(op, Op::QueryOpSet { .. })) + .collect(); + assert_eq!(qops.len(), 1); + if let Op::QueryOpSet { ops, .. } = &qops[0] { + if let Op::Set { value, .. } = &ops[0] { + assert_eq!(*value, json!("SDXL")); + } + } + } + + #[test] + fn test_remove_set_pair_preserved() { + // An update: remove old value, set new value — both should survive + let mut batch = vec![entity(1, vec![ + Op::Remove { field: "nsfwLevel".into(), value: json!(8) }, + Op::Set { field: "nsfwLevel".into(), value: json!(16) }, + ])]; + dedup_ops(&mut batch); + assert_eq!(batch.len(), 1); + let has_remove = batch[0].ops.iter().any(|op| matches!(op, Op::Remove { field, .. } if field == "nsfwLevel")); + let has_set = batch[0].ops.iter().any(|op| matches!(op, Op::Set { field, .. } if field == "nsfwLevel")); + assert!(has_remove, "remove should survive"); + assert!(has_set, "set should survive"); + } + + #[test] + fn test_empty_batch() { + let mut batch: Vec = vec![]; + dedup_ops(&mut batch); + assert!(batch.is_empty()); + } + + #[test] + fn test_multiple_adds_same_value_collapse() { + // Adding tag 42 three times should still produce one add + let mut batch = vec![entity(1, vec![ + Op::Add { field: "tagIds".into(), value: json!(42) }, + Op::Add { field: "tagIds".into(), value: json!(42) }, + Op::Add { field: "tagIds".into(), value: json!(42) }, + ])]; + dedup_ops(&mut batch); + let adds: Vec<_> = batch[0].ops.iter() + .filter(|op| matches!(op, Op::Add { field, .. } if field == "tagIds")) + .collect(); + assert_eq!(adds.len(), 1); + } +} diff --git a/src/pg_sync/ops.rs b/src/pg_sync/ops.rs new file mode 100644 index 00000000..eadbf5cf --- /dev/null +++ b/src/pg_sync/ops.rs @@ -0,0 +1,300 @@ +//! V2 ops data types for the ops-based sync pipeline. +//! +//! Ops are self-contained mutations: each carries the field name, old value (for removes), +//! and new value (for sets). This eliminates docstore reads on the write path. +//! +//! Op types: +//! - `set`: Set a scalar/sort field to a new value +//! - `remove`: Clear a slot from a field's bitmap (carries old value) +//! - `add`: Add a value to a multi-value field (tags, tools, etc.) +//! - `delete`: Delete a document (clears all bitmaps + alive bit) +//! - `queryOpSet`: Resolve slots via a BitDex query, apply nested ops to all matches + +use serde::{Deserialize, Serialize}; + +/// A single operation within an ops array. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +#[serde(tag = "op")] +pub enum Op { + /// Set a field to a value. For filter fields, sets the bit in the value's bitmap. + /// For sort fields, decomposes to bit layers. + #[serde(rename = "set")] + Set { + field: String, + value: serde_json::Value, + }, + + /// Remove a slot from a field's bitmap (old value). Used in remove/set pairs + /// for field changes: remove old value, then set new value. + #[serde(rename = "remove")] + Remove { + field: String, + value: serde_json::Value, + }, + + /// Add a value to a multi-value field (e.g., tagIds, toolIds). + /// Used for join-table INSERTs. + #[serde(rename = "add")] + Add { + field: String, + value: serde_json::Value, + }, + + /// Delete a document. Clears all filter/sort bitmap bits + alive bit. + /// Requires a docstore read to determine which bitmaps to clear. + #[serde(rename = "delete")] + Delete, + + /// Query-resolved bulk operation. Resolves slots via a BitDex query string, + /// then applies the nested ops to all matching slots. + /// Used for fan-out tables (ModelVersion, Post, Model). + #[serde(rename = "queryOpSet")] + QueryOpSet { + /// BitDex query string (e.g., "modelVersionIds eq 456") + query: String, + /// Ops to apply to all slots matching the query + ops: Vec, + }, +} + +/// A row from the BitdexOps table. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct OpsRow { + /// Auto-incrementing ID (used as cursor position) + pub id: i64, + /// The entity (image) ID this op targets. For queryOpSet, this is the + /// source entity ID (e.g., ModelVersion ID, Post ID). + pub entity_id: i64, + /// Array of operations to apply + pub ops: Vec, +} + +/// A batch of ops sent to the BitDex /ops endpoint. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct OpsBatch { + /// Per-entity ops + pub ops: Vec, + /// Optional sync source metadata (cursor position, lag, etc.) + #[serde(skip_serializing_if = "Option::is_none")] + pub meta: Option, +} + +/// Ops for a single entity within a batch. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct EntityOps { + /// The entity (image) ID + pub entity_id: i64, + /// Operations to apply + pub ops: Vec, + /// If true, this entity should have its alive bit set (creates the slot if new). + /// Only the primary entity table (e.g., Image with sets_alive: true) sets this. + /// Join tables (tags, tools) leave this false — they only add multi-value bitmaps. + #[serde(default)] + pub creates_slot: bool, +} + +impl EntityOps { + /// Convenience constructor — creates_slot defaults to false. + pub fn new(entity_id: i64, ops: Vec) -> Self { + Self { entity_id, ops, creates_slot: false } + } + + /// Constructor for primary entity ops that should create alive slots. + pub fn with_alive(entity_id: i64, ops: Vec) -> Self { + Self { entity_id, ops, creates_slot: true } + } +} + +/// Sync source metadata, bundled with ops payloads. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct SyncMeta { + /// Sync source identifier (e.g., "pg-sync-default", "clickhouse") + pub source: String, + /// Current cursor position in the ops table + #[serde(skip_serializing_if = "Option::is_none")] + pub cursor: Option, + /// Max ID in the ops table (for lag calculation) + #[serde(skip_serializing_if = "Option::is_none")] + pub max_id: Option, + /// Number of rows behind (max_id - cursor) + #[serde(skip_serializing_if = "Option::is_none")] + pub lag_rows: Option, +} + +/// SQL for creating the BitdexOps table and index. +pub const SETUP_OPS_SQL: &str = r#" +CREATE TABLE IF NOT EXISTS "BitdexOps" ( + id BIGSERIAL PRIMARY KEY, + entity_id BIGINT NOT NULL, + ops JSONB NOT NULL, + created_at TIMESTAMPTZ DEFAULT now() +); + +CREATE INDEX IF NOT EXISTS idx_bitdex_ops_id ON "BitdexOps" (id); +"#; + +#[cfg(test)] +mod tests { + use super::*; + use serde_json::json; + + #[test] + fn test_set_op_roundtrip() { + let op = Op::Set { + field: "nsfwLevel".into(), + value: json!(16), + }; + let json = serde_json::to_string(&op).unwrap(); + let parsed: Op = serde_json::from_str(&json).unwrap(); + assert_eq!(op, parsed); + } + + #[test] + fn test_remove_op_roundtrip() { + let op = Op::Remove { + field: "nsfwLevel".into(), + value: json!(8), + }; + let json = serde_json::to_string(&op).unwrap(); + let parsed: Op = serde_json::from_str(&json).unwrap(); + assert_eq!(op, parsed); + } + + #[test] + fn test_add_op_roundtrip() { + let op = Op::Add { + field: "tagIds".into(), + value: json!(42), + }; + let json = serde_json::to_string(&op).unwrap(); + let parsed: Op = serde_json::from_str(&json).unwrap(); + assert_eq!(op, parsed); + } + + #[test] + fn test_delete_op_roundtrip() { + let op = Op::Delete; + let json = serde_json::to_string(&op).unwrap(); + let parsed: Op = serde_json::from_str(&json).unwrap(); + assert_eq!(op, parsed); + } + + #[test] + fn test_query_op_set_roundtrip() { + let op = Op::QueryOpSet { + query: "modelVersionIds eq 456".into(), + ops: vec![ + Op::Remove { + field: "baseModel".into(), + value: json!("SD 1.5"), + }, + Op::Set { + field: "baseModel".into(), + value: json!("SDXL"), + }, + ], + }; + let json = serde_json::to_string(&op).unwrap(); + let parsed: Op = serde_json::from_str(&json).unwrap(); + assert_eq!(op, parsed); + } + + #[test] + fn test_ops_array_from_json() { + let json = json!([ + {"op": "remove", "field": "nsfwLevel", "value": 8}, + {"op": "set", "field": "nsfwLevel", "value": 16}, + {"op": "add", "field": "tagIds", "value": 42}, + {"op": "delete"} + ]); + let ops: Vec = serde_json::from_value(json).unwrap(); + assert_eq!(ops.len(), 4); + assert!(matches!(&ops[0], Op::Remove { field, .. } if field == "nsfwLevel")); + assert!(matches!(&ops[1], Op::Set { field, .. } if field == "nsfwLevel")); + assert!(matches!(&ops[2], Op::Add { field, .. } if field == "tagIds")); + assert!(matches!(&ops[3], Op::Delete)); + } + + #[test] + fn test_ops_batch_with_meta() { + let batch = OpsBatch { + ops: vec![EntityOps { + entity_id: 123, + creates_slot: false, + ops: vec![Op::Set { + field: "nsfwLevel".into(), + value: json!(16), + }], + }], + meta: Some(SyncMeta { + source: "pg-sync-default".into(), + cursor: Some(420_000_000), + max_id: Some(500_000_000), + lag_rows: Some(80_000_000), + }), + }; + let json = serde_json::to_string(&batch).unwrap(); + let parsed: OpsBatch = serde_json::from_str(&json).unwrap(); + assert_eq!(parsed.ops.len(), 1); + assert_eq!(parsed.ops[0].entity_id, 123); + assert!(parsed.meta.is_some()); + assert_eq!(parsed.meta.unwrap().source, "pg-sync-default"); + } + + #[test] + fn test_ops_batch_without_meta() { + let batch = OpsBatch { + ops: vec![], + meta: None, + }; + let json = serde_json::to_string(&batch).unwrap(); + assert!(!json.contains("meta")); + } + + #[test] + fn test_image_insert_ops() { + // Simulates what an Image INSERT trigger would produce + let ops: Vec = vec![ + Op::Set { field: "nsfwLevel".into(), value: json!(1) }, + Op::Set { field: "type".into(), value: json!("image") }, + Op::Set { field: "userId".into(), value: json!(12345) }, + Op::Set { field: "postId".into(), value: json!(67890) }, + Op::Set { field: "existedAt".into(), value: json!(1711234567) }, + ]; + let json = serde_json::to_value(&ops).unwrap(); + let parsed: Vec = serde_json::from_value(json).unwrap(); + assert_eq!(parsed.len(), 5); + } + + #[test] + fn test_image_update_ops_with_old_values() { + // Simulates Image UPDATE: nsfwLevel 8→16 + let ops: Vec = vec![ + Op::Remove { field: "nsfwLevel".into(), value: json!(8) }, + Op::Set { field: "nsfwLevel".into(), value: json!(16) }, + ]; + let json = serde_json::to_value(&ops).unwrap(); + let parsed: Vec = serde_json::from_value(json).unwrap(); + assert_eq!(parsed.len(), 2); + } + + #[test] + fn test_query_op_set_with_in_query() { + // Model POI change: fan-out to all model versions + let op = Op::QueryOpSet { + query: "modelVersionIds in [101, 102, 103]".into(), + ops: vec![Op::Set { + field: "poi".into(), + value: json!(true), + }], + }; + let json = serde_json::to_string(&op).unwrap(); + let parsed: Op = serde_json::from_str(&json).unwrap(); + if let Op::QueryOpSet { query, ops } = parsed { + assert!(query.contains("in [101")); + assert_eq!(ops.len(), 1); + } else { + panic!("Expected QueryOpSet"); + } + } +} diff --git a/src/pg_sync/ops_poller.rs b/src/pg_sync/ops_poller.rs new file mode 100644 index 00000000..994f4f89 --- /dev/null +++ b/src/pg_sync/ops_poller.rs @@ -0,0 +1,211 @@ +//! V2 ops poller: reads from BitdexOps table, deduplicates, and POSTs to BitDex /ops endpoint. +//! +//! Replaces the V1 outbox_poller by reading self-contained ops (with old+new values) +//! instead of entity IDs that require enrichment queries. +//! +//! Poll loop: +//! 1. On boot: read cursor from PG bitdex_cursors table +//! 2. SELECT from BitdexOps WHERE id > cursor ORDER BY id ASC LIMIT N +//! 3. Deserialize JSONB ops arrays +//! 4. Dedup via shared dedup_ops() +//! 5. POST batch to BitDex /ops endpoint with sync metadata +//! 6. Advance cursor in PG +//! 7. Report max_outbox_id for lag calculation + +use std::time::Duration; + +use sqlx::PgPool; +use tokio::time::interval; + +use super::bitdex_client::BitdexClient; +use super::op_dedup::dedup_ops; +use super::ops::{EntityOps, Op, OpsBatch, SyncMeta}; + +/// Row from BitdexOps table. +#[derive(Debug, sqlx::FromRow)] +struct OpsRow { + id: i64, + entity_id: i64, + ops: sqlx::types::Json>, +} + +/// Run the V2 ops poller loop. Runs forever until cancelled. +pub async fn run_ops_poller( + pool: &PgPool, + client: &BitdexClient, + poll_interval_secs: u64, + batch_limit: i64, + cursor_name: &str, + replica_id: Option<&str>, +) -> Result<(), String> { + // Wait for BitDex health + eprintln!("Ops poller waiting for BitDex to be healthy..."); + loop { + if client.is_healthy().await { + break; + } + tokio::time::sleep(Duration::from_secs(2)).await; + } + eprintln!("BitDex is healthy."); + + // Read initial cursor from PG + let mut cursor: i64 = read_cursor_from_pg(pool, cursor_name) + .await + .unwrap_or(0); + eprintln!( + "Ops poller started (interval={}s, batch_limit={}, cursor_name={}, starting_cursor={})", + poll_interval_secs, batch_limit, cursor_name, cursor + ); + + let mut ticker = interval(Duration::from_secs(poll_interval_secs)); + let mut bitdex_was_down = false; + + loop { + ticker.tick().await; + + // Health gate + if !client.is_healthy().await { + if !bitdex_was_down { + eprintln!("Ops poller: BitDex unreachable, pausing"); + bitdex_was_down = true; + } + continue; + } + if bitdex_was_down { + eprintln!("Ops poller: BitDex is back, resuming"); + bitdex_was_down = false; + } + + let cycle_start = std::time::Instant::now(); + match poll_and_process(pool, client, batch_limit, cursor_name, &mut cursor, replica_id).await { + Ok(processed) => { + let cycle_secs = cycle_start.elapsed().as_secs_f64(); + if processed > 0 { + eprintln!("Ops poller: processed {processed} ops (cursor={cursor}, cycle={cycle_secs:.3}s)"); + } + } + Err(e) => { + eprintln!("Ops poller error: {e}"); + } + } + } +} + +/// Single poll + process cycle. +async fn poll_and_process( + pool: &PgPool, + client: &BitdexClient, + batch_limit: i64, + cursor_name: &str, + cursor: &mut i64, + replica_id: Option<&str>, +) -> Result { + // Fetch ops after cursor + let rows = poll_ops_from_cursor(pool, *cursor, batch_limit) + .await + .map_err(|e| format!("poll_ops: {e}"))?; + + if rows.is_empty() { + return Ok(0); + } + + let max_id = rows.iter().map(|r| r.id).max().unwrap_or(*cursor); + let total_rows = rows.len(); + + // Convert to EntityOps + let mut batch: Vec = rows + .into_iter() + .map(|row| EntityOps { + entity_id: row.entity_id, + ops: row.ops.0, + creates_slot: false, // Determined by trigger config at source; override in pg-sync + }) + .collect(); + + // Dedup + dedup_ops(&mut batch); + + if batch.is_empty() { + // All ops cancelled out — still advance cursor + advance_cursor(pool, cursor_name, max_id, cursor).await?; + return Ok(total_rows); + } + + // Get max ops ID for lag calculation + let max_ops_id = get_max_ops_id(pool).await.unwrap_or(max_id); + + // Build batch with metadata + let ops_batch = OpsBatch { + ops: batch, + meta: Some(SyncMeta { + source: replica_id.unwrap_or("default").to_string(), + cursor: Some(max_id), + max_id: Some(max_ops_id), + lag_rows: Some(max_ops_id - max_id), + }), + }; + + // POST to BitDex + client + .post_ops(&ops_batch) + .await + .map_err(|e| format!("post_ops: {e}"))?; + + // Advance cursor + advance_cursor(pool, cursor_name, max_id, cursor).await?; + + Ok(total_rows) +} + +async fn advance_cursor( + pool: &PgPool, + cursor_name: &str, + max_id: i64, + cursor: &mut i64, +) -> Result<(), String> { + super::queries::upsert_cursor(pool, cursor_name, max_id) + .await + .map_err(|e| format!("upsert_cursor: {e}"))?; + *cursor = max_id; + Ok(()) +} + +// ── SQL queries ── + +/// Read cursor from PG bitdex_cursors table. +async fn read_cursor_from_pg(pool: &PgPool, cursor_name: &str) -> Result { + let row: Option<(i64,)> = sqlx::query_as( + r#"SELECT last_outbox_id FROM bitdex_cursors WHERE replica_id = $1"#, + ) + .bind(cursor_name) + .fetch_optional(pool) + .await?; + Ok(row.map(|r| r.0).unwrap_or(0)) +} + +/// Poll ops from BitdexOps table after a cursor position. +async fn poll_ops_from_cursor( + pool: &PgPool, + cursor: i64, + limit: i64, +) -> Result, sqlx::Error> { + sqlx::query_as::<_, OpsRow>( + r#"SELECT id, entity_id, ops FROM "BitdexOps" + WHERE id > $1 + ORDER BY id ASC + LIMIT $2"#, + ) + .bind(cursor) + .bind(limit) + .fetch_all(pool) + .await +} + +/// Get the current max ops ID (for lag calculation). +async fn get_max_ops_id(pool: &PgPool) -> Result { + let row: (Option,) = + sqlx::query_as(r#"SELECT MAX(id) FROM "BitdexOps""#) + .fetch_one(pool) + .await?; + Ok(row.0.unwrap_or(0)) +} diff --git a/src/pg_sync/trigger_gen.rs b/src/pg_sync/trigger_gen.rs new file mode 100644 index 00000000..82652b35 --- /dev/null +++ b/src/pg_sync/trigger_gen.rs @@ -0,0 +1,558 @@ +//! YAML-driven PG trigger SQL generator for V2 ops pipeline. +//! +//! Reads a `sync_sources` YAML config and generates PL/pgSQL trigger functions +//! that emit ops into the BitdexOps table. Two table types: +//! +//! **Direct tables** (slot = PG column): +//! - `track_fields`: scalar fields → emit remove/set pairs via IS DISTINCT FROM +//! - `field` + `value_field`: multi-value join tables → emit add/remove +//! - `on_delete: delete_slot`: emit delete op +//! - `sets_alive: true`: only this table can create new alive slots +//! +//! **Fan-out tables** (slots resolved by BitDex query): +//! - `query`: BitDex query template with {column} placeholders +//! - `query_source`: optional PG subquery for cross-table values +//! - `track_fields`: fields to track on the source table + +use std::collections::hash_map::DefaultHasher; +use std::hash::{Hash, Hasher}; + +use serde::Deserialize; + +/// A sync source definition from the YAML config. +#[derive(Debug, Clone, Deserialize)] +pub struct SyncSource { + /// PG table name (e.g., "Image", "TagsOnImageNew") + pub table: String, + + /// For direct tables: PG column that maps to the BitDex slot ID + pub slot_field: Option, + + /// For direct tables: list of scalar fields to track. + /// Can include expressions: "GREATEST({scannedAt}, {createdAt}) as existedAt" + pub track_fields: Option>, + + /// For multi-value join tables: the BitDex field name (e.g., "tagIds") + pub field: Option, + + /// For multi-value join tables: the PG column containing the value (e.g., "tagId") + pub value_field: Option, + + /// Optional SQL WHERE filter for the trigger (e.g., CollectionItem status filter) + pub filter: Option, + + /// If true, this table's INSERT ops set the alive bit on new slots + #[serde(default)] + pub sets_alive: bool, + + /// If "delete_slot", emit a delete op on DELETE + pub on_delete: Option, + + /// For fan-out tables: BitDex query template with {column} placeholders + pub query: Option, + + /// For fan-out tables: PG subquery to get values not on the triggering table + pub query_source: Option, + + /// Tables that must be loaded before this one during dumps + #[serde(rename = "dependsOn")] + pub depends_on: Option>, +} + +/// Full sync config loaded from YAML. +#[derive(Debug, Clone, Deserialize)] +pub struct SyncConfig { + pub sync_sources: Vec, +} + +impl SyncConfig { + /// Load from a YAML string. + pub fn from_yaml(yaml: &str) -> Result { + serde_yaml::from_str(yaml).map_err(|e| format!("Failed to parse sync config: {e}")) + } +} + +/// Generate the trigger function name with hash for reconciliation. +/// Format: bitdex_{table}_{hash8} +pub fn trigger_function_name(source: &SyncSource) -> String { + let body = generate_trigger_body(source); + let hash = short_hash(&body); + format!( + "bitdex_{}_ops_{}", + source.table.to_lowercase(), + hash + ) +} + +/// Generate the trigger name. +pub fn trigger_name(source: &SyncSource) -> String { + let body = generate_trigger_body(source); + let hash = short_hash(&body); + format!("bitdex_{}_{}", source.table.to_lowercase(), hash) +} + +/// Generate the full CREATE OR REPLACE FUNCTION + CREATE TRIGGER SQL +/// for a sync source. +pub fn generate_trigger_sql(source: &SyncSource) -> String { + let func_name = trigger_function_name(source); + let trig_name = trigger_name(source); + let body = generate_trigger_body(source); + + let trigger_events = if source.field.is_some() { + // Multi-value join table: INSERT and DELETE only + "AFTER INSERT OR DELETE" + } else if source.on_delete.as_deref() == Some("delete_slot") { + "AFTER INSERT OR UPDATE OR DELETE" + } else { + "AFTER INSERT OR UPDATE" + }; + + format!( + r#"CREATE OR REPLACE FUNCTION {func_name}() RETURNS trigger AS $$ +{body} +$$ LANGUAGE plpgsql; + +DROP TRIGGER IF EXISTS {trig_name} ON "{table}"; +CREATE TRIGGER {trig_name} {trigger_events} ON "{table}" + FOR EACH ROW EXECUTE FUNCTION {func_name}(); +ALTER TABLE "{table}" ENABLE ALWAYS TRIGGER {trig_name}; +"#, + func_name = func_name, + trig_name = trig_name, + body = body, + trigger_events = trigger_events, + table = source.table, + ) +} + +/// Generate the PL/pgSQL function body for a sync source. +fn generate_trigger_body(source: &SyncSource) -> String { + if let Some(ref field) = source.field { + // Multi-value join table (tags, tools, techniques, etc.) + generate_multi_value_body(source, field) + } else if source.query.is_some() { + // Fan-out table (ModelVersion, Post, Model) + generate_fan_out_body(source) + } else { + // Direct table (Image) + generate_direct_body(source) + } +} + +/// Generate body for direct tables (e.g., Image). +fn generate_direct_body(source: &SyncSource) -> String { + let slot_field = source.slot_field.as_deref().unwrap_or("id"); + let track_fields = source.track_fields.as_deref().unwrap_or(&[]); + let has_delete = source.on_delete.as_deref() == Some("delete_slot"); + + let mut body = String::from("DECLARE\n _ops jsonb;\nBEGIN\n"); + + // INSERT: emit set ops for all tracked fields (no remove since no prior state) + body.push_str(" IF TG_OP = 'INSERT' THEN\n"); + body.push_str(" _ops := jsonb_build_array(\n"); + let insert_ops: Vec = track_fields + .iter() + .map(|f| { + let (field_name, expr) = parse_track_field(f); + let new_expr = substitute_columns(&expr, "NEW"); + format!( + " jsonb_build_object('op', 'set', 'field', '{}', 'value', to_jsonb({}))", + field_name, new_expr + ) + }) + .collect(); + body.push_str(&insert_ops.join(",\n")); + body.push_str("\n );\n"); + body.push_str(&format!( + " INSERT INTO \"BitdexOps\" (entity_id, ops) VALUES (NEW.\"{}\", _ops);\n", + slot_field + )); + body.push_str(" RETURN NEW;\n"); + + // DELETE + if has_delete { + body.push_str(" ELSIF TG_OP = 'DELETE' THEN\n"); + body.push_str(&format!( + " INSERT INTO \"BitdexOps\" (entity_id, ops) VALUES (OLD.\"{}\", '[{{\"op\":\"delete\"}}]'::jsonb);\n", + slot_field + )); + body.push_str(" RETURN OLD;\n"); + } + + // UPDATE: emit remove/set pairs only for changed fields + body.push_str(" ELSE\n"); + body.push_str(" _ops := '[]'::jsonb;\n"); + for f in track_fields { + let (field_name, expr) = parse_track_field(f); + let old_expr = substitute_columns(&expr, "OLD"); + let new_expr = substitute_columns(&expr, "NEW"); + body.push_str(&format!( + " IF ({old}) IS DISTINCT FROM ({new}) THEN\n\ + \x20 _ops := _ops || jsonb_build_array(\n\ + \x20 jsonb_build_object('op', 'remove', 'field', '{field}', 'value', to_jsonb({old})),\n\ + \x20 jsonb_build_object('op', 'set', 'field', '{field}', 'value', to_jsonb({new}))\n\ + \x20 );\n\ + \x20 END IF;\n", + old = old_expr, + new = new_expr, + field = field_name, + )); + } + body.push_str(" IF jsonb_array_length(_ops) > 0 THEN\n"); + body.push_str(&format!( + " INSERT INTO \"BitdexOps\" (entity_id, ops) VALUES (NEW.\"{}\", _ops);\n", + slot_field + )); + body.push_str(" END IF;\n"); + body.push_str(" RETURN NEW;\n"); + body.push_str(" END IF;\n"); + body.push_str("END;"); + + body +} + +/// Generate body for multi-value join tables (e.g., TagsOnImageNew). +fn generate_multi_value_body(source: &SyncSource, field: &str) -> String { + let slot_field = source.slot_field.as_deref().unwrap_or("imageId"); + let value_field = source.value_field.as_deref().unwrap_or("id"); + let filter_clause = source + .filter + .as_ref() + .map(|f| format!(" IF {} THEN\n", f.replace("imageId", "NEW.\"imageId\""))) + .unwrap_or_default(); + let filter_end = if source.filter.is_some() { + " END IF;\n" + } else { + "" + }; + + format!( + r#"BEGIN + IF TG_OP = 'INSERT' THEN +{filter_start} INSERT INTO "BitdexOps" (entity_id, ops) + VALUES (NEW."{slot}", jsonb_build_array( + jsonb_build_object('op', 'add', 'field', '{field}', 'value', to_jsonb(NEW."{value}")) + )); +{filter_end} RETURN NEW; + ELSIF TG_OP = 'DELETE' THEN + INSERT INTO "BitdexOps" (entity_id, ops) + VALUES (OLD."{slot}", jsonb_build_array( + jsonb_build_object('op', 'remove', 'field', '{field}', 'value', to_jsonb(OLD."{value}")) + )); + RETURN OLD; + END IF; + RETURN COALESCE(NEW, OLD); +END;"#, + slot = slot_field, + field = field, + value = value_field, + filter_start = filter_clause, + filter_end = filter_end, + ) +} + +/// Generate body for fan-out tables (e.g., ModelVersion, Post). +fn generate_fan_out_body(source: &SyncSource) -> String { + let query_template = source.query.as_deref().unwrap_or(""); + let track_fields = source.track_fields.as_deref().unwrap_or(&[]); + + let mut body = String::from("DECLARE\n _ops jsonb;\n _query text;\n"); + + // If there's a query_source, we need a variable for its result + if source.query_source.is_some() { + body.push_str(" _source_result jsonb;\n"); + } + body.push_str("BEGIN\n"); + body.push_str(" IF TG_OP = 'UPDATE' THEN\n"); + + // Build the query string with column substitution + if let Some(ref query_source) = source.query_source { + let source_sql = substitute_columns(query_source, "NEW"); + body.push_str(&format!( + " EXECUTE format('SELECT ({})') INTO _source_result;\n", + source_sql.replace('\'', "''") + )); + // Substitute the query_source result into the query template + body.push_str(&format!( + " _query := '{}';\n", + query_template + )); + // Replace placeholders with source result values + body.push_str(" -- Substitute source values into query template\n"); + } else { + // Direct substitution from NEW columns + let query_sql = substitute_columns(query_template, "NEW"); + body.push_str(&format!(" _query := '{}';\n", query_sql)); + } + + // Build ops array from tracked fields that changed + body.push_str(" _ops := '[]'::jsonb;\n"); + for f in track_fields { + let (field_name, expr) = parse_track_field(f); + let old_expr = substitute_columns(&expr, "OLD"); + let new_expr = substitute_columns(&expr, "NEW"); + body.push_str(&format!( + " IF ({old}) IS DISTINCT FROM ({new}) THEN\n\ + \x20 _ops := _ops || jsonb_build_array(\n\ + \x20 jsonb_build_object('op', 'remove', 'field', '{field}', 'value', to_jsonb({old})),\n\ + \x20 jsonb_build_object('op', 'set', 'field', '{field}', 'value', to_jsonb({new}))\n\ + \x20 );\n\ + \x20 END IF;\n", + old = old_expr, + new = new_expr, + field = field_name, + )); + } + + body.push_str(" IF jsonb_array_length(_ops) > 0 THEN\n"); + body.push_str(&format!( + " INSERT INTO \"BitdexOps\" (entity_id, ops) VALUES (NEW.id, jsonb_build_array(\n\ + \x20 jsonb_build_object('op', 'queryOpSet', 'query', _query, 'ops', _ops)\n\ + \x20 ));\n" + )); + body.push_str(" END IF;\n"); + body.push_str(" RETURN NEW;\n"); + body.push_str(" END IF;\n"); + body.push_str(" RETURN COALESCE(NEW, OLD);\n"); + body.push_str("END;"); + + body +} + +/// Parse a track_field entry. Returns (bitdex_field_name, sql_expression). +/// Simple field: "nsfwLevel" → ("nsfwLevel", "\"nsfwLevel\"") +/// Expression: "GREATEST({scannedAt}, {createdAt}) as existedAt" → ("existedAt", "GREATEST(\"scannedAt\", \"createdAt\")") +fn parse_track_field(field: &str) -> (String, String) { + if let Some(as_pos) = field.to_lowercase().rfind(" as ") { + let expr = &field[..as_pos].trim(); + let alias = &field[as_pos + 4..].trim(); + // Replace {col} with "col" (quoted column reference) + let sql = expr + .replace('{', "\"") + .replace('}', "\""); + (alias.to_string(), sql) + } else { + // Simple field name + (field.to_string(), format!("\"{}\"", field)) + } +} + +/// Substitute {column} placeholders with prefix."column" references. +/// E.g., substitute_columns("GREATEST({scannedAt}, {createdAt})", "NEW") +/// → "GREATEST(NEW.\"scannedAt\", NEW.\"createdAt\")" +fn substitute_columns(expr: &str, prefix: &str) -> String { + let mut result = String::new(); + let mut chars = expr.chars().peekable(); + while let Some(c) = chars.next() { + if c == '{' { + let mut col = String::new(); + while let Some(&next) = chars.peek() { + if next == '}' { + chars.next(); + break; + } + col.push(chars.next().unwrap()); + } + result.push_str(&format!("{}.\"{}\"", prefix, col)); + } else { + result.push(c); + } + } + result +} + +/// Compute a short (8-char) hash of a string. +fn short_hash(s: &str) -> String { + let mut hasher = DefaultHasher::new(); + s.hash(&mut hasher); + format!("{:016x}", hasher.finish())[..8].to_string() +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_parse_track_field_simple() { + let (name, expr) = parse_track_field("nsfwLevel"); + assert_eq!(name, "nsfwLevel"); + assert_eq!(expr, "\"nsfwLevel\""); + } + + #[test] + fn test_parse_track_field_expression() { + let (name, expr) = parse_track_field("GREATEST({scannedAt}, {createdAt}) as existedAt"); + assert_eq!(name, "existedAt"); + assert_eq!(expr, "GREATEST(\"scannedAt\", \"createdAt\")"); + } + + #[test] + fn test_substitute_columns() { + let result = substitute_columns("GREATEST({scannedAt}, {createdAt})", "NEW"); + assert_eq!(result, "GREATEST(NEW.\"scannedAt\", NEW.\"createdAt\")"); + } + + #[test] + fn test_substitute_columns_simple() { + let result = substitute_columns("{nsfwLevel}", "OLD"); + assert_eq!(result, "OLD.\"nsfwLevel\""); + } + + #[test] + fn test_generate_multi_value_trigger() { + let source = SyncSource { + table: "TagsOnImageNew".into(), + slot_field: Some("imageId".into()), + track_fields: None, + field: Some("tagIds".into()), + value_field: Some("tagId".into()), + filter: None, + sets_alive: false, + on_delete: None, + query: None, + query_source: None, + depends_on: None, + }; + let sql = generate_trigger_sql(&source); + assert!(sql.contains("CREATE OR REPLACE FUNCTION")); + assert!(sql.contains("'add'")); + assert!(sql.contains("'remove'")); + assert!(sql.contains("tagIds")); + assert!(sql.contains("ENABLE ALWAYS")); + } + + #[test] + fn test_generate_direct_trigger() { + let source = SyncSource { + table: "Image".into(), + slot_field: Some("id".into()), + track_fields: Some(vec!["nsfwLevel".into(), "type".into()]), + field: None, + value_field: None, + filter: None, + sets_alive: true, + on_delete: Some("delete_slot".into()), + query: None, + query_source: None, + depends_on: None, + }; + let sql = generate_trigger_sql(&source); + assert!(sql.contains("IS DISTINCT FROM")); + assert!(sql.contains("nsfwLevel")); + assert!(sql.contains("delete")); + } + + #[test] + fn test_generate_fan_out_trigger() { + let source = SyncSource { + table: "ModelVersion".into(), + slot_field: None, + track_fields: Some(vec!["baseModel".into()]), + field: None, + value_field: None, + filter: None, + sets_alive: false, + on_delete: None, + query: Some("modelVersionIds eq {id}".into()), + query_source: None, + depends_on: None, + }; + let sql = generate_trigger_sql(&source); + assert!(sql.contains("queryOpSet")); + assert!(sql.contains("modelVersionIds eq")); + } + + #[test] + fn test_trigger_name_includes_hash() { + let source = SyncSource { + table: "Image".into(), + slot_field: Some("id".into()), + track_fields: Some(vec!["nsfwLevel".into()]), + field: None, + value_field: None, + filter: None, + sets_alive: false, + on_delete: None, + query: None, + query_source: None, + depends_on: None, + }; + let name = trigger_name(&source); + assert!(name.starts_with("bitdex_image_")); + assert_eq!(name.len(), "bitdex_image_".len() + 8); + } + + #[test] + fn test_trigger_hash_changes_with_config() { + let source1 = SyncSource { + table: "Image".into(), + slot_field: Some("id".into()), + track_fields: Some(vec!["nsfwLevel".into()]), + field: None, + value_field: None, + filter: None, + sets_alive: false, + on_delete: None, + query: None, + query_source: None, + depends_on: None, + }; + let source2 = SyncSource { + track_fields: Some(vec!["nsfwLevel".into(), "type".into()]), + ..source1.clone() + }; + let name1 = trigger_name(&source1); + let name2 = trigger_name(&source2); + assert_ne!(name1, name2, "Different configs should produce different hashes"); + } + + #[test] + fn test_yaml_parsing() { + let yaml = r#" +sync_sources: + - table: Image + slot_field: id + sets_alive: true + track_fields: [nsfwLevel, type] + on_delete: delete_slot + - table: TagsOnImageNew + slot_field: imageId + field: tagIds + value_field: tagId + - table: ModelVersion + query: "modelVersionIds eq {id}" + track_fields: [baseModel] +"#; + let config = SyncConfig::from_yaml(yaml).unwrap(); + assert_eq!(config.sync_sources.len(), 3); + assert_eq!(config.sync_sources[0].table, "Image"); + assert!(config.sync_sources[0].sets_alive); + assert_eq!(config.sync_sources[1].field.as_deref(), Some("tagIds")); + assert!(config.sync_sources[2].query.is_some()); + } + + #[test] + fn test_expression_in_track_fields() { + let source = SyncSource { + table: "Image".into(), + slot_field: Some("id".into()), + track_fields: Some(vec![ + "nsfwLevel".into(), + "GREATEST({scannedAt}, {createdAt}) as existedAt".into(), + "({flags} & (1 << 13)) != 0 AND ({flags} & (1 << 2)) = 0 as hasMeta".into(), + ]), + field: None, + value_field: None, + filter: None, + sets_alive: true, + on_delete: Some("delete_slot".into()), + query: None, + query_source: None, + depends_on: None, + }; + let sql = generate_trigger_sql(&source); + assert!(sql.contains("GREATEST")); + assert!(sql.contains("existedAt")); + assert!(sql.contains("hasMeta")); + } +} diff --git a/src/server.rs b/src/server.rs index a72435c9..dca83926 100644 --- a/src/server.rs +++ b/src/server.rs @@ -290,6 +290,15 @@ struct AppState { metrics_bitmap_memory: AtomicBool, metrics_eviction_stats: AtomicBool, metrics_boundstore_disk: AtomicBool, + /// WAL writer for V2 ops endpoint. Created lazily on first ops POST. + #[cfg(feature = "pg-sync")] + ops_wal: Mutex>, + /// Latest sync source metadata (cursor, lag) keyed by source name. + #[cfg(feature = "pg-sync")] + sync_meta: Mutex>, + /// Dump registry for tracking table dump lifecycle. + #[cfg(feature = "pg-sync")] + dump_registry: Mutex, } type SharedState = Arc; @@ -991,6 +1000,15 @@ impl BitdexServer { metrics_bitmap_memory: AtomicBool::new(true), metrics_eviction_stats: AtomicBool::new(true), metrics_boundstore_disk: AtomicBool::new(true), + #[cfg(feature = "pg-sync")] + ops_wal: Mutex::new(None), + #[cfg(feature = "pg-sync")] + sync_meta: Mutex::new(std::collections::HashMap::new()), + #[cfg(feature = "pg-sync")] + dump_registry: { + let dumps_path = self.data_dir.join("dumps.json"); + Mutex::new(crate::pg_sync::dump::DumpRegistry::load(&dumps_path)) + }, }); // Try to restore an existing index from disk @@ -1019,6 +1037,78 @@ impl BitdexServer { } } + // Spawn WAL reader thread if pg-sync feature is enabled and index exists + #[cfg(feature = "pg-sync")] + { + let wal_dir = self.data_dir.join("wal"); + let wal_path = wal_dir.join("ops.wal"); + let cursor_path = wal_dir.join("cursor"); + let wal_state = Arc::clone(&state); + std::thread::Builder::new() + .name("wal-reader".into()) + .spawn(move || { + let cursor = crate::ops_processor::load_cursor(&cursor_path); + let mut reader = crate::ops_wal::WalReader::new(&wal_path, cursor); + eprintln!("WAL reader started (cursor={cursor}, path={})", wal_path.display()); + + loop { + // Read a batch from the WAL + match reader.read_batch(10_000) { + Ok(batch) if !batch.entries.is_empty() => { + // Get engine reference + let engine = { + let guard = wal_state.index.lock(); + guard.as_ref().map(|idx| Arc::clone(&idx.engine)) + }; + + if let Some(engine) = engine { + // Build FieldMeta and CoalescerSink for the ops processor + let meta = crate::ops_processor::FieldMeta::from_config(engine.config()); + let sender = engine.mutation_sender(); + let mut sink = crate::ingester::CoalescerSink::new(sender); + + let mut entries = batch.entries; + let (applied, skipped, errors) = + crate::ops_processor::apply_ops_batch( + &mut sink, &meta, &mut entries, Some(&engine), + ); + + if applied > 0 || errors > 0 { + eprintln!( + "WAL reader: applied={applied} skipped={skipped} errors={errors} cursor={}", + reader.cursor() + ); + } + + // Persist cursor after successful processing + if let Err(e) = crate::ops_processor::save_cursor(&cursor_path, reader.cursor()) { + eprintln!("WAL reader: failed to save cursor: {e}"); + } + + // Update WAL bytes metric + let wal_size = std::fs::metadata(&wal_path).map(|m| m.len()).unwrap_or(0); + wal_state.metrics.sync_wal_bytes + .with_label_values(&["wal-reader"]) + .set(wal_size as i64); + } else { + // No index loaded yet — sleep and retry + std::thread::sleep(std::time::Duration::from_secs(1)); + } + } + Ok(_) => { + // No new records — sleep briefly + std::thread::sleep(std::time::Duration::from_millis(50)); + } + Err(e) => { + eprintln!("WAL reader error: {e}"); + std::thread::sleep(std::time::Duration::from_secs(1)); + } + } + } + }) + .ok(); + } + let shutdown_state = Arc::clone(&state); // Admin routes — require Bearer token (or disabled if no token configured) @@ -1068,6 +1158,13 @@ impl BitdexServer { .route("/debug/heap-dump", axum::routing::post(handle_heap_dump)) .route("/api/formats", get(handle_list_formats)) .route("/api/internal/pgsync-metrics", post(handle_pgsync_metrics)) + .route("/api/indexes/{name}/ops", post(handle_ops)) + .route("/api/internal/sync-lag", get(handle_sync_lag)) + .route("/api/indexes/{name}/dumps", get(handle_list_dumps)) + .route("/api/indexes/{name}/dumps", put(handle_register_dump)) + .route("/api/indexes/{name}/dumps/{dump_name}/loaded", post(handle_dump_loaded)) + .route("/api/indexes/{name}/dumps/{dump_name}", delete(handle_delete_dump)) + .route("/api/indexes/{name}/dumps/clear", post(handle_clear_dumps)) .route("/metrics", get(handle_metrics)) .route("/", get(handle_ui)) .with_state(Arc::clone(&state)); @@ -4156,6 +4253,233 @@ async fn handle_pgsync_metrics( StatusCode::NO_CONTENT } +/// POST /api/indexes/{name}/ops — Accept a batch of sync ops, append to WAL. +/// Returns 200 only after all records are written and fsynced. +#[cfg(feature = "pg-sync")] +async fn handle_ops( + State(state): State, + AxumPath(name): AxumPath, + Json(batch): Json, +) -> impl IntoResponse { + // Verify index exists + { + let guard = state.index.lock(); + match guard.as_ref() { + Some(idx) if idx.definition.name == name => {} + _ => { + return ( + StatusCode::NOT_FOUND, + Json(serde_json::json!({"error": format!("Index '{}' not found", name)})), + ).into_response(); + } + } + } + + // Store sync metadata + update Prometheus metrics + if let Some(meta) = &batch.meta { + let mut sync_meta = state.sync_meta.lock(); + sync_meta.insert(meta.source.clone(), meta.clone()); + + let m = &state.metrics; + let source = meta.source.as_str(); + if let Some(cursor) = meta.cursor { + m.sync_cursor_position.with_label_values(&[source]).set(cursor); + } + if let Some(max_id) = meta.max_id { + m.sync_max_id.with_label_values(&[source]).set(max_id); + } + if let Some(lag) = meta.lag_rows { + m.sync_lag_rows.with_label_values(&[source]).set(lag); + } + } + + let ops_count = batch.ops.len(); + if ops_count == 0 { + return (StatusCode::OK, Json(serde_json::json!({"accepted": 0}))).into_response(); + } + + // Ensure WAL writer exists (lazy init) + let wal_path = { + let mut wal_guard = state.ops_wal.lock(); + if wal_guard.is_none() { + let wal_dir = state.data_dir.join("wal"); + std::fs::create_dir_all(&wal_dir).ok(); + let path = wal_dir.join("ops.wal"); + *wal_guard = Some(crate::ops_wal::WalWriter::new(path)); + } + wal_guard.as_ref().unwrap().path().to_path_buf() + }; + + // Write to WAL on blocking thread (fsync is blocking I/O) + let result = tokio::task::spawn_blocking(move || { + let writer = crate::ops_wal::WalWriter::new(&wal_path); + writer.append_batch(&batch.ops) + }) + .await; + + match result { + Ok(Ok(bytes)) => { + (StatusCode::OK, Json(serde_json::json!({ + "accepted": ops_count, + "bytes_written": bytes, + }))).into_response() + } + Ok(Err(e)) => { + eprintln!("WAL write error: {e}"); + (StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({ + "error": format!("WAL write failed: {e}"), + }))).into_response() + } + Err(e) => { + eprintln!("WAL write task panicked: {e}"); + (StatusCode::INTERNAL_SERVER_ERROR, Json(serde_json::json!({ + "error": "Internal error", + }))).into_response() + } + } +} + +/// Fallback for when pg-sync feature is disabled. +#[cfg(not(feature = "pg-sync"))] +async fn handle_ops( + AxumPath(_name): AxumPath, +) -> impl IntoResponse { + (StatusCode::NOT_FOUND, Json(serde_json::json!({"error": "pg-sync feature not enabled"}))) +} + +// ── Dump endpoints ── + +/// GET /api/indexes/{name}/dumps — List all dumps and their status. +#[cfg(feature = "pg-sync")] +async fn handle_list_dumps( + State(state): State, + AxumPath(_name): AxumPath, +) -> impl IntoResponse { + let reg = state.dump_registry.lock(); + Json(serde_json::json!({ + "dumps": reg.dumps, + "all_complete": reg.all_complete(), + })) +} + +#[cfg(not(feature = "pg-sync"))] +async fn handle_list_dumps(AxumPath(_name): AxumPath) -> impl IntoResponse { + Json(serde_json::json!({"dumps": {}})) +} + +/// PUT /api/indexes/{name}/dumps — Register a new dump. +#[cfg(feature = "pg-sync")] +async fn handle_register_dump( + State(state): State, + AxumPath(_name): AxumPath, + Json(body): Json, +) -> impl IntoResponse { + let dump_name = body["name"].as_str().unwrap_or("unknown").to_string(); + let wal_path = body["wal_path"].as_str().map(|s| s.to_string()); + + let mut reg = state.dump_registry.lock(); + reg.register(dump_name.clone(), wal_path); + + let dumps_path = state.data_dir.join("dumps.json"); + if let Err(e) = reg.save(&dumps_path) { + eprintln!("Warning: failed to save dump registry: {e}"); + } + + (StatusCode::CREATED, Json(serde_json::json!({ + "name": dump_name, + "status": "writing", + }))) +} + +#[cfg(not(feature = "pg-sync"))] +async fn handle_register_dump( + AxumPath(_name): AxumPath, + Json(_body): Json, +) -> impl IntoResponse { + (StatusCode::NOT_FOUND, Json(serde_json::json!({"error": "pg-sync not enabled"}))) +} + +/// POST /api/indexes/{name}/dumps/{dump_name}/loaded — Signal dump file is complete. +#[cfg(feature = "pg-sync")] +async fn handle_dump_loaded( + State(state): State, + AxumPath((_name, dump_name)): AxumPath<(String, String)>, + Json(body): Json, +) -> impl IntoResponse { + let ops_written = body["ops_written"].as_u64().unwrap_or(0); + + let mut reg = state.dump_registry.lock(); + match reg.mark_loaded(&dump_name, ops_written) { + Some(_) => { + let dumps_path = state.data_dir.join("dumps.json"); + reg.save(&dumps_path).ok(); + Json(serde_json::json!({"status": "loading", "name": dump_name})) + } + None => Json(serde_json::json!({"error": format!("Dump '{}' not found", dump_name)})), + } +} + +#[cfg(not(feature = "pg-sync"))] +async fn handle_dump_loaded( + AxumPath((_name, _dump_name)): AxumPath<(String, String)>, + Json(_body): Json, +) -> impl IntoResponse { + Json(serde_json::json!({"error": "pg-sync not enabled"})) +} + +/// DELETE /api/indexes/{name}/dumps/{dump_name} — Remove a dump from history. +#[cfg(feature = "pg-sync")] +async fn handle_delete_dump( + State(state): State, + AxumPath((_name, dump_name)): AxumPath<(String, String)>, +) -> impl IntoResponse { + let mut reg = state.dump_registry.lock(); + reg.remove(&dump_name); + let dumps_path = state.data_dir.join("dumps.json"); + reg.save(&dumps_path).ok(); + StatusCode::NO_CONTENT +} + +#[cfg(not(feature = "pg-sync"))] +async fn handle_delete_dump( + AxumPath((_name, _dump_name)): AxumPath<(String, String)>, +) -> impl IntoResponse { + StatusCode::NOT_FOUND +} + +/// POST /api/indexes/{name}/dumps/clear — Clear all dump history. +#[cfg(feature = "pg-sync")] +async fn handle_clear_dumps( + State(state): State, + AxumPath(_name): AxumPath, +) -> impl IntoResponse { + let mut reg = state.dump_registry.lock(); + reg.clear(); + let dumps_path = state.data_dir.join("dumps.json"); + reg.save(&dumps_path).ok(); + StatusCode::NO_CONTENT +} + +#[cfg(not(feature = "pg-sync"))] +async fn handle_clear_dumps(AxumPath(_name): AxumPath) -> impl IntoResponse { + StatusCode::NOT_FOUND +} + +/// GET /api/internal/sync-lag — Return latest sync metadata from all sources. +#[cfg(feature = "pg-sync")] +async fn handle_sync_lag( + State(state): State, +) -> impl IntoResponse { + let sync_meta = state.sync_meta.lock(); + let sources: Vec<&crate::pg_sync::ops::SyncMeta> = sync_meta.values().collect(); + Json(serde_json::json!({ "sources": sources })) +} + +#[cfg(not(feature = "pg-sync"))] +async fn handle_sync_lag() -> impl IntoResponse { + Json(serde_json::json!({ "sources": [] })) +} + async fn handle_ui() -> impl IntoResponse { Html(include_str!("../static/index.html")) } diff --git a/tests/sync_v2_integration.rs b/tests/sync_v2_integration.rs new file mode 100644 index 00000000..7901af0c --- /dev/null +++ b/tests/sync_v2_integration.rs @@ -0,0 +1,390 @@ +//! Integration tests for the Sync V2 pipeline. +//! +//! Tests the ops → WAL → processor pipeline without PG. +//! Full E2E tests (PG triggers → poller → server) require a running +//! server and PG instance — see tests/e2e/ for those. + +#![cfg(feature = "pg-sync")] + +use serde_json::json; +use tempfile::TempDir; + +use bitdex_v2::ops_wal::{WalReader, WalWriter}; +use bitdex_v2::pg_sync::op_dedup::dedup_ops; +use bitdex_v2::pg_sync::ops::{EntityOps, Op, OpsBatch, SyncMeta}; +use bitdex_v2::pg_sync::dump::{DumpRegistry, DumpStatus, dump_name, config_hash}; +use bitdex_v2::pg_sync::trigger_gen::{SyncConfig, SyncSource, generate_trigger_sql}; + +// ── WAL Pipeline Integration ── + +#[test] +fn test_ops_wal_roundtrip_with_dedup() { + let dir = TempDir::new().unwrap(); + let wal_path = dir.path().join("ops.wal"); + + // Write ops with duplicates + let writer = WalWriter::new(&wal_path); + let batch = vec![ + EntityOps { + entity_id: 1, + creates_slot: false, + ops: vec![ + Op::Set { field: "nsfwLevel".into(), value: json!(8) }, + ], + }, + EntityOps { + entity_id: 1, + creates_slot: false, + ops: vec![ + Op::Set { field: "nsfwLevel".into(), value: json!(16) }, // Overwrites first + ], + }, + EntityOps { + entity_id: 2, + creates_slot: false, + ops: vec![ + Op::Add { field: "tagIds".into(), value: json!(42) }, + ], + }, + ]; + writer.append_batch(&batch).unwrap(); + + // Read back + let mut reader = WalReader::new(&wal_path, 0); + let result = reader.read_batch(100).unwrap(); + assert_eq!(result.entries.len(), 3); + + // Dedup + let mut entries = result.entries; + dedup_ops(&mut entries); + + // Entity 1: last set wins (nsfwLevel=16) + let entity1 = entries.iter().find(|e| e.entity_id == 1).unwrap(); + let set_ops: Vec<_> = entity1.ops.iter() + .filter(|op| matches!(op, Op::Set { field, .. } if field == "nsfwLevel")) + .collect(); + assert_eq!(set_ops.len(), 1); + if let Op::Set { value, .. } = &set_ops[0] { + assert_eq!(*value, json!(16)); + } + + // Entity 2: add preserved + let entity2 = entries.iter().find(|e| e.entity_id == 2).unwrap(); + assert_eq!(entity2.ops.len(), 1); + assert!(matches!(&entity2.ops[0], Op::Add { field, .. } if field == "tagIds")); +} + +#[test] +fn test_delete_absorbs_prior_ops_through_wal() { + let dir = TempDir::new().unwrap(); + let wal_path = dir.path().join("ops.wal"); + + let writer = WalWriter::new(&wal_path); + + // First batch: set some fields + writer.append_batch(&[EntityOps { + entity_id: 1, + creates_slot: false, + ops: vec![ + Op::Set { field: "nsfwLevel".into(), value: json!(16) }, + Op::Add { field: "tagIds".into(), value: json!(42) }, + ], + }]).unwrap(); + + // Second batch: delete the entity + writer.append_batch(&[EntityOps { + entity_id: 1, + creates_slot: false, + ops: vec![Op::Delete], + }]).unwrap(); + + // Read all + let mut reader = WalReader::new(&wal_path, 0); + let result = reader.read_batch(100).unwrap(); + assert_eq!(result.entries.len(), 2); + + // Dedup should collapse to just delete + let mut entries = result.entries; + dedup_ops(&mut entries); + assert_eq!(entries.len(), 1); + assert_eq!(entries[0].ops.len(), 1); + assert!(matches!(&entries[0].ops[0], Op::Delete)); +} + +#[test] +fn test_add_remove_cancellation_through_wal() { + let dir = TempDir::new().unwrap(); + let wal_path = dir.path().join("ops.wal"); + + let writer = WalWriter::new(&wal_path); + writer.append_batch(&[ + EntityOps { + entity_id: 1, + creates_slot: false, + ops: vec![Op::Add { field: "tagIds".into(), value: json!(42) }], + }, + EntityOps { + entity_id: 1, + creates_slot: false, + ops: vec![Op::Remove { field: "tagIds".into(), value: json!(42) }], + }, + ]).unwrap(); + + let mut reader = WalReader::new(&wal_path, 0); + let result = reader.read_batch(100).unwrap(); + let mut entries = result.entries; + dedup_ops(&mut entries); + + // Net zero — entity should be dropped + assert!(entries.is_empty() || entries[0].ops.is_empty()); +} + +#[test] +fn test_query_op_set_through_wal() { + let dir = TempDir::new().unwrap(); + let wal_path = dir.path().join("ops.wal"); + + let writer = WalWriter::new(&wal_path); + writer.append_batch(&[EntityOps { + entity_id: 456, + creates_slot: false, + ops: vec![Op::QueryOpSet { + query: "modelVersionIds eq 456".into(), + ops: vec![ + Op::Remove { field: "baseModel".into(), value: json!("SD 1.5") }, + Op::Set { field: "baseModel".into(), value: json!("SDXL") }, + ], + }], + }]).unwrap(); + + let mut reader = WalReader::new(&wal_path, 0); + let result = reader.read_batch(100).unwrap(); + assert_eq!(result.entries.len(), 1); + + let entry = &result.entries[0]; + assert_eq!(entry.entity_id, 456); + match &entry.ops[0] { + Op::QueryOpSet { query, ops } => { + assert_eq!(query, "modelVersionIds eq 456"); + assert_eq!(ops.len(), 2); + } + _ => panic!("Expected QueryOpSet"), + } +} + +// ── Cursor Resume Integration ── + +#[test] +fn test_wal_cursor_resume_across_appends() { + let dir = TempDir::new().unwrap(); + let wal_path = dir.path().join("ops.wal"); + let writer = WalWriter::new(&wal_path); + + // Batch 1 + writer.append_batch(&[EntityOps { + entity_id: 1, + creates_slot: false, + ops: vec![Op::Set { field: "a".into(), value: json!(1) }], + }]).unwrap(); + + // Read batch 1 + let mut reader = WalReader::new(&wal_path, 0); + let r1 = reader.read_batch(100).unwrap(); + assert_eq!(r1.entries.len(), 1); + let cursor = reader.cursor(); + + // Batch 2 (appended after first read) + writer.append_batch(&[EntityOps { + entity_id: 2, + creates_slot: false, + ops: vec![Op::Set { field: "b".into(), value: json!(2) }], + }]).unwrap(); + + // Resume from cursor — should only get batch 2 + let mut reader2 = WalReader::new(&wal_path, cursor); + let r2 = reader2.read_batch(100).unwrap(); + assert_eq!(r2.entries.len(), 1); + assert_eq!(r2.entries[0].entity_id, 2); +} + +// ── Dump Registry Integration ── + +#[test] +fn test_dump_registry_full_workflow() { + let dir = TempDir::new().unwrap(); + let dumps_path = dir.path().join("dumps.json"); + + let mut reg = DumpRegistry::default(); + + // Simulate boot: check if dumps are complete + let image_hash = config_hash("table: Image\ntrack_fields: [nsfwLevel]"); + let tags_hash = config_hash("table: TagsOnImageNew\nfield: tagIds"); + let image_name = dump_name("Image", &image_hash); + let tags_name = dump_name("TagsOnImageNew", &tags_hash); + + assert!(!reg.is_complete(&image_name)); + assert!(!reg.is_complete(&tags_name)); + + // Register dumps + reg.register(image_name.clone(), Some("dumps/image.wal".into())); + reg.register(tags_name.clone(), Some("dumps/tags.wal".into())); + reg.save(&dumps_path).unwrap(); + + // Simulate pg-sync writing WAL and signaling loaded + reg.mark_loaded(&image_name, 107_500_000); + reg.mark_loaded(&tags_name, 375_000_000); + + // Simulate WAL reader completing + reg.mark_complete(&image_name, 107_500_000); + assert!(!reg.all_complete()); // Tags not done yet + + reg.mark_complete(&tags_name, 375_000_000); + assert!(reg.all_complete()); + + // Persist and reload + reg.save(&dumps_path).unwrap(); + let loaded = DumpRegistry::load(&dumps_path); + assert!(loaded.is_complete(&image_name)); + assert!(loaded.is_complete(&tags_name)); + assert!(loaded.all_complete()); +} + +#[test] +fn test_dump_config_change_detection() { + let hash1 = config_hash("table: Image\ntrack_fields: [nsfwLevel]"); + let hash2 = config_hash("table: Image\ntrack_fields: [nsfwLevel, type]"); + let name1 = dump_name("Image", &hash1); + let name2 = dump_name("Image", &hash2); + + let mut reg = DumpRegistry::default(); + reg.register(name1.clone(), None); + reg.mark_loaded(&name1, 100); + reg.mark_complete(&name1, 100); + + // After config change, the dump name is different + assert!(reg.is_complete(&name1)); + assert!(!reg.is_complete(&name2)); // New hash → not loaded → needs re-dump +} + +// ── Trigger Generation Integration ── + +#[test] +fn test_full_civitai_config() { + let yaml = r#" +sync_sources: + - table: Image + slot_field: id + sets_alive: true + track_fields: + - nsfwLevel + - type + - userId + - postId + - minor + - poi + - blockedFor + - "GREATEST({scannedAt}, {createdAt}) as existedAt" + - "({flags} & (1 << 13)) != 0 AND ({flags} & (1 << 2)) = 0 as hasMeta" + - "({flags} & (1 << 14)) != 0 as onSite" + on_delete: delete_slot + + - table: TagsOnImageNew + slot_field: imageId + field: tagIds + value_field: tagId + + - table: ImageTool + slot_field: imageId + field: toolIds + value_field: toolId + + - table: ImageTechnique + slot_field: imageId + field: techniqueIds + value_field: techniqueId + + - table: ModelVersion + query: "modelVersionIds eq {id}" + track_fields: [baseModel] + + - table: Post + query: "postId eq {id}" + track_fields: [publishedAt, availability] +"#; + + let config = SyncConfig::from_yaml(yaml).unwrap(); + assert_eq!(config.sync_sources.len(), 6); + + // Generate SQL for each and verify they're non-empty and contain expected patterns + for source in &config.sync_sources { + let sql = generate_trigger_sql(source); + assert!(sql.contains("CREATE OR REPLACE FUNCTION"), "Missing function for {}", source.table); + assert!(sql.contains("ENABLE ALWAYS"), "Missing ENABLE ALWAYS for {}", source.table); + + match source.table.as_str() { + "Image" => { + assert!(sql.contains("IS DISTINCT FROM"), "Image should use IS DISTINCT FROM"); + assert!(sql.contains("delete"), "Image should handle delete"); + assert!(sql.contains("GREATEST"), "Image should have existedAt expression"); + } + "TagsOnImageNew" => { + assert!(sql.contains("'add'"), "Tags should have add ops"); + assert!(sql.contains("'remove'"), "Tags should have remove ops"); + } + "ModelVersion" => { + assert!(sql.contains("queryOpSet"), "MV should use queryOpSet"); + assert!(sql.contains("modelVersionIds eq"), "MV should query by MV id"); + } + "Post" => { + assert!(sql.contains("queryOpSet"), "Post should use queryOpSet"); + assert!(sql.contains("postId eq"), "Post should query by postId"); + } + _ => {} + } + } +} + +// ── OpsBatch Serialization ── + +#[test] +fn test_ops_batch_json_format() { + let batch = OpsBatch { + ops: vec![ + EntityOps { + entity_id: 123, + creates_slot: false, + ops: vec![ + Op::Remove { field: "nsfwLevel".into(), value: json!(8) }, + Op::Set { field: "nsfwLevel".into(), value: json!(16) }, + ], + }, + EntityOps { + entity_id: 456, + creates_slot: false, + ops: vec![Op::QueryOpSet { + query: "modelVersionIds eq 456".into(), + ops: vec![ + Op::Remove { field: "baseModel".into(), value: json!("SD 1.5") }, + Op::Set { field: "baseModel".into(), value: json!("SDXL") }, + ], + }], + }, + ], + meta: Some(SyncMeta { + source: "pg-sync-default".into(), + cursor: Some(420_000_000), + max_id: Some(500_000_000), + lag_rows: Some(80_000_000), + }), + }; + + // Round-trip through JSON + let json_str = serde_json::to_string(&batch).unwrap(); + let parsed: OpsBatch = serde_json::from_str(&json_str).unwrap(); + assert_eq!(parsed.ops.len(), 2); + assert_eq!(parsed.ops[0].entity_id, 123); + assert_eq!(parsed.ops[1].entity_id, 456); + let meta = parsed.meta.unwrap(); + assert_eq!(meta.source, "pg-sync-default"); + assert_eq!(meta.lag_rows, Some(80_000_000)); +}