diff --git a/.claude/settings.local.json b/.claude/settings.local.json
index afaa7ae..612cf9d 100644
--- a/.claude/settings.local.json
+++ b/.claude/settings.local.json
@@ -1,4 +1,4 @@
 {
-  "outputStyle": "Justin",
+  "outputStyle": "default",
   "prefersReducedMotion": true
 }
diff --git a/Cargo.toml b/Cargo.toml
index 14c77a2..8261c15 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -154,10 +154,6 @@ harness = false
 name = "bound_store_bench"
 harness = false
 
-[[bench]]
-name = "parse_alloc_bench"
-harness = false
-
 [[bin]]
 name = "bitdex-benchmark"
 path = "src/bin/benchmark.rs"
@@ -182,9 +178,6 @@ name = "bitdex-replay"
 path = "src/bin/replay.rs"
 required-features = ["replay"]
 
-[[bin]]
-name = "rebuild_bench"
-path = "src/bin/rebuild_bench.rs"
 
 [[example]]
 name = "load_from_csv"
diff --git a/src/bin/pg_sync.rs b/src/bin/pg_sync.rs
index dc010ac..10000dc 100644
--- a/src/bin/pg_sync.rs
+++ b/src/bin/pg_sync.rs
@@ -366,15 +366,8 @@ async fn run_boot_sequence(
     if let Some(config) = full_sync_config {
         run_streaming_pipeline(pool, sync_config, bitdex_client, config, stage_dir).await;
     } else {
-        // V1 fallback: download all then process manually
-        bulk_loader::download_all_tables(pool, stage_dir)
-            .await
-            .unwrap_or_else(|e| {
-                eprintln!("CSV download failed: {e}");
-                std::process::exit(1);
-            });
-        eprintln!("No sync config YAML — skipping dump pipeline.");
-        eprintln!("CSVs staged at: {}. Use /dumps endpoint manually.", stage_dir.display());
+        eprintln!("No sync config YAML — cannot run dump pipeline. Provide --sync-config.");
+        std::process::exit(1);
     }
 
     // Step 10: Seed cursor at pre_dump_cursor
diff --git a/src/bin/rebuild_bench.rs b/src/bin/rebuild_bench.rs
deleted file mode 100644
index 64550dd..0000000
--- a/src/bin/rebuild_bench.rs
+++ /dev/null
@@ -1,906 +0,0 @@
-//! Microbenchmarks for docstore → bitmap rebuild pipeline.
-//!
-//! Measures individual stages of the rebuild pipeline to identify bottlenecks:
-//! 1. Raw shard I/O: read + zstd decompress
-//! 2. Decode: msgpack → StoredDoc
-//! 3. Bitmap extraction: StoredDoc → filter/sort bitmaps
-//! 4. Full pipeline: read → decode → extract → merge
-//!
-//! Usage:
-//!   cargo run --release --bin rebuild_bench -- --data-dir ./data --index civitai [--shards 1000]
-
-use std::collections::HashMap;
-use std::path::{Path, PathBuf};
-use std::sync::atomic::{AtomicU64, Ordering};
-use std::time::Instant;
-
-use rayon::prelude::*;
-use roaring::RoaringBitmap;
-
-use bitdex_v2::silos::doc_format::{PackedValue, StoredDoc};
-use bitdex_v2::silos::doc_silo_adapter::DocSiloAdapter;
-use bitdex_v2::mutation::{value_to_bitmap_key, value_to_sort_u32};
-use bitdex_v2::query::Value;
-
-#[global_allocator]
-static ALLOC: rpmalloc::RpMalloc = rpmalloc::RpMalloc;
-
-struct BenchConfig {
-    data_dir: PathBuf,
-    index_name: String,
-    max_shards: Option<u32>,
-    full_build: bool,
-    add_field: Option<String>,
-}
-
-fn parse_args() -> BenchConfig {
-    let args: Vec<String> = std::env::args().collect();
-    let mut data_dir = PathBuf::from("./data");
-    let mut index_name = "civitai".to_string();
-    let mut max_shards: Option<u32> = None;
-    let mut full_build = false;
-    let mut add_field: Option<String> = None;
-
-    let mut i = 1;
-    while i < args.len() {
-        match args[i].as_str() {
-            "--data-dir" => { data_dir = PathBuf::from(&args[i + 1]); i += 2; }
-            "--index" => { index_name = args[i + 1].clone(); i += 2; }
-            "--shards" => { max_shards = Some(args[i + 1].parse().unwrap()); i += 2; }
-            "--full" => { full_build = true; i += 1; }
-            "--add-field" => { add_field = Some(args[i + 1].clone()); i += 2; }
-            _ => { i += 1; }
-        }
-    }
-
-    BenchConfig { data_dir, index_name, max_shards, full_build, add_field }
-}
-
-/// Count total shards by scanning the shard directory.
-fn count_shards(docs_path: &Path) -> u32 {
-    let shards_dir = docs_path.join("shards");
-    let mut count = 0u32;
-    if let Ok(entries) = std::fs::read_dir(&shards_dir) {
-        for entry in entries.flatten() {
-            if entry.file_type().map(|t| t.is_dir()).unwrap_or(false) {
-                if let Ok(sub_entries) = std::fs::read_dir(entry.path()) {
-                    count += sub_entries
-                        .filter(|e| e.as_ref().map(|e| {
-                            e.path().extension().map(|ext| ext == "bin").unwrap_or(false)
-                        }).unwrap_or(false))
-                        .count() as u32;
-                }
-            }
-        }
-    }
-    count
-}
-
-/// Find the maximum shard ID by scanning shard files.
-fn find_max_shard(docs_path: &Path) -> u32 {
-    let shards_dir = docs_path.join("shards");
-    let mut max_id = 0u32;
-    if let Ok(entries) = std::fs::read_dir(&shards_dir) {
-        for entry in entries.flatten() {
-            if entry.file_type().map(|t| t.is_dir()).unwrap_or(false) {
-                if let Ok(sub_entries) = std::fs::read_dir(entry.path()) {
-                    for sub in sub_entries.flatten() {
-                        if let Some(stem) = sub.path().file_stem() {
-                            if let Ok(id) = stem.to_string_lossy().parse::<u32>() {
-                                max_id = max_id.max(id);
-                            }
-                        }
-                    }
-                }
-            }
-        }
-    }
-    max_id
-}
-
-/// Stage 1: Raw shard I/O — read files + zstd decompress, no decode.
-fn bench_raw_io(docs_path: &Path, num_shards: u32) -> (f64, u64, u64) {
-    eprintln!("\n=== Stage 1: Raw shard I/O (read + zstd decompress) ===");
-    let bytes_read = AtomicU64::new(0);
-    let bytes_decompressed = AtomicU64::new(0);
-    let shards_read = AtomicU64::new(0);
-
-    let t0 = Instant::now();
-
-    (0..num_shards).into_par_iter().for_each(|shard_id| {
-        let dir_byte = ((shard_id >> 8) & 0xFF) as u8;
-        let path = docs_path
-            .join("shards")
-            .join(format!("{:02x}", dir_byte))
-            .join(format!("{:06}.bin", shard_id));
-
-        match std::fs::read(&path) {
-            Ok(data) => {
-                bytes_read.fetch_add(data.len() as u64, Ordering::Relaxed);
-                // Decompress to measure decompression throughput
-                // BitmapSilo format — count bytes as decompressed (no separate compression layer)
-                bytes_decompressed.fetch_add(data.len() as u64, Ordering::Relaxed);
-                shards_read.fetch_add(1, Ordering::Relaxed);
-            }
-            Err(_) => {}
-        }
-    });
-
-    let elapsed = t0.elapsed().as_secs_f64();
-    let total_read = bytes_read.load(Ordering::Relaxed);
-    let total_decompressed = bytes_decompressed.load(Ordering::Relaxed);
-    let total_shards = shards_read.load(Ordering::Relaxed);
-
-    eprintln!("  Shards read:       {}", total_shards);
-    eprintln!("  Compressed bytes:  {:.2} GB", total_read as f64 / 1e9);
-    eprintln!("  Decompressed:      {:.2} GB", total_decompressed as f64 / 1e9);
-    eprintln!("  Time:              {:.2}s", elapsed);
-    eprintln!("  Read throughput:   {:.0} MB/s (compressed)", total_read as f64 / elapsed / 1e6);
-    eprintln!("  Decomp throughput: {:.0} MB/s (decompressed)", total_decompressed as f64 / elapsed / 1e6);
-
-    (elapsed, total_read, total_decompressed)
-}
-
-/// Stage 2: Read + decode to StoredDoc.
-fn bench_decode(docs_path: &Path, num_shards: u32) -> (f64, u64) {
-    eprintln!("\n=== Stage 2: Read + Decode (→ StoredDoc) ===");
-    let docs_decoded = AtomicU64::new(0);
-
-    let reader = DocSiloAdapter::open(docs_path).expect("open docstore");
-
-    let t0 = Instant::now();
-
-    (0..num_shards).into_par_iter().for_each(|shard_id| {
-        match reader.get_shard(shard_id) {
-            Ok(docs) => {
-                docs_decoded.fetch_add(docs.len() as u64, Ordering::Relaxed);
-            }
-            Err(_) => {}
-        }
-    });
-
-    let elapsed = t0.elapsed().as_secs_f64();
-    let total_docs = docs_decoded.load(Ordering::Relaxed);
-
-    eprintln!("  Docs decoded:      {}", total_docs);
-    eprintln!("  Time:              {:.2}s", elapsed);
-    eprintln!("  Throughput:        {:.0} docs/s", total_docs as f64 / elapsed);
-    eprintln!("  Per-doc avg:       {:.2} µs", elapsed * 1e6 / total_docs as f64);
-
-    (elapsed, total_docs)
-}
-
-/// Stage 3: Full rebuild pipeline — read + decode + extract filter/sort bitmaps + merge.
-fn bench_full_rebuild(
-    docs_path: &Path,
-    num_shards: u32,
-    filter_names: &[&str],
-    sort_names: &[&str],
-    sort_bits: &[usize],
-) -> (f64, u64) {
-    eprintln!("\n=== Stage 3: Full Rebuild Pipeline ===");
-    eprintln!("  Filter fields: {:?}", filter_names);
-    eprintln!("  Sort fields:   {:?}", sort_names);
-
-    let reader = DocSiloAdapter::open(docs_path).expect("open docstore");
-
-    type FilterMap = HashMap<(usize, u64), RoaringBitmap>;
-    struct Accum {
-        sort_layers: Vec<Vec<RoaringBitmap>>,
-        filter_map: FilterMap,
-        alive: RoaringBitmap,
-        count: u64,
-    }
-
-    let make_accum = || Accum {
-        sort_layers: sort_bits.iter().map(|&b| {
-            (0..b).map(|_| RoaringBitmap::new()).collect()
-        }).collect(),
-        filter_map: FilterMap::new(),
-        alive: RoaringBitmap::new(),
-        count: 0,
-    };
-
-    let chunk_size = 500u32;
-    let num_chunks = (num_shards + chunk_size - 1) / chunk_size;
-
-    let t0 = Instant::now();
-
-    let merged = (0..num_chunks)
-        .into_par_iter()
-        .fold(make_accum, |mut acc, chunk_idx| {
-            let shard_start = chunk_idx * chunk_size;
-            let shard_end = std::cmp::min(shard_start + chunk_size, num_shards);
-
-            for shard_id in shard_start..shard_end {
-                let docs = match reader.get_shard(shard_id) {
-                    Ok(d) => d,
-                    Err(_) => continue,
-                };
-
-                for (slot_id, doc) in &docs {
-                    acc.alive.insert(*slot_id);
-
-                    // Filter bitmap extraction
-                    for (fi, &fname) in filter_names.iter().enumerate() {
-                        if let Some(fv) = doc.fields.get(fname) {
-                            match fv {
-                                bitdex_v2::mutation::FieldValue::Single(v) => {
-                                    if let Some(key) = value_to_bitmap_key(v) {
-                                        acc.filter_map
-                                            .entry((fi, key))
-                                            .or_insert_with(RoaringBitmap::new)
-                                            .insert(*slot_id);
-                                    }
-                                }
-                                bitdex_v2::mutation::FieldValue::Multi(vals) => {
-                                    for v in vals {
-                                        if let Some(key) = value_to_bitmap_key(v) {
-                                            acc.filter_map
-                                                .entry((fi, key))
-                                                .or_insert_with(RoaringBitmap::new)
-                                                .insert(*slot_id);
-                                        }
-                                    }
-                                }
-                            }
-                        }
-                    }
-
-                    // Sort bitmap extraction
-                    for (si, &sname) in sort_names.iter().enumerate() {
-                        if let Some(fv) = doc.fields.get(sname) {
-                            if let bitdex_v2::mutation::FieldValue::Single(ref v) = fv {
-                                if let Some(value) = value_to_sort_u32(v) {
-                                    let num_bits = sort_bits[si];
-                                    for bit in 0..num_bits {
-                                        if (value >> bit) & 1 == 1 {
-                                            acc.sort_layers[si][bit].insert(*slot_id);
-                                        }
-                                    }
-                                }
-                            }
-                        }
-                    }
-
-                    acc.count += 1;
-                }
-            }
-            acc
-        })
-        .reduce(make_accum, |mut a, b| {
-            for (si, b_layers) in b.sort_layers.into_iter().enumerate() {
-                for (bit, bm) in b_layers.into_iter().enumerate() {
-                    a.sort_layers[si][bit] |= bm;
-                }
-            }
-            for (key, bm) in b.filter_map {
-                a.filter_map.entry(key)
-                    .and_modify(|existing| *existing |= &bm)
-                    .or_insert(bm);
-            }
-            a.alive |= b.alive;
-            a.count += b.count;
-            a
-        });
-
-    let elapsed = t0.elapsed().as_secs_f64();
-
-    let total_filter_bitmaps: usize = merged.filter_map.len();
-    let total_sort_layers: usize = merged.sort_layers.iter()
-        .map(|layers| layers.iter().filter(|bm| !bm.is_empty()).count())
-        .sum();
-
-    eprintln!("  Docs processed:    {}", merged.count);
-    eprintln!("  Alive bitmap:      {} bits", merged.alive.len());
-    eprintln!("  Filter bitmaps:    {} distinct (field,value) pairs", total_filter_bitmaps);
-    eprintln!("  Sort layers:       {} non-empty layers", total_sort_layers);
-    eprintln!("  Time:              {:.2}s", elapsed);
-    eprintln!("  Throughput:        {:.0} docs/s", merged.count as f64 / elapsed);
-
-    (elapsed, merged.count)
-}
-
-/// Stage 4: Rebuild a single field — measures per-field rebuild cost.
-fn bench_single_field_rebuild(
-    docs_path: &Path,
-    num_shards: u32,
-    field_name: &str,
-    is_sort: bool,
-    bits: usize,
-) -> (f64, u64) {
-    eprintln!("\n=== Stage 4: Single Field Rebuild — {} ({}) ===",
-        field_name, if is_sort { "sort" } else { "filter" });
-
-    let reader = DocSiloAdapter::open(docs_path).expect("open docstore");
-    let docs_processed = AtomicU64::new(0);
-
-    let chunk_size = 500u32;
-    let num_chunks = (num_shards + chunk_size - 1) / chunk_size;
-
-    let t0 = Instant::now();
-
-    if is_sort {
-        // Sort field rebuild
-        struct SortAccum {
-            layers: Vec<RoaringBitmap>,
-            count: u64,
-        }
-
-        let make_accum = || SortAccum {
-            layers: (0..bits).map(|_| RoaringBitmap::new()).collect(),
-            count: 0,
-        };
-
-        let merged = (0..num_chunks)
-            .into_par_iter()
-            .fold(make_accum, |mut acc, chunk_idx| {
-                let shard_start = chunk_idx * chunk_size;
-                let shard_end = std::cmp::min(shard_start + chunk_size, num_shards);
-
-                for shard_id in shard_start..shard_end {
-                    let docs = match reader.get_shard(shard_id) {
-                        Ok(d) => d,
-                        Err(_) => continue,
-                    };
-                    for (slot_id, doc) in &docs {
-                        if let Some(fv) = doc.fields.get(field_name) {
-                            if let bitdex_v2::mutation::FieldValue::Single(ref v) = fv {
-                                if let Some(value) = value_to_sort_u32(v) {
-                                    for bit in 0..bits {
-                                        if (value >> bit) & 1 == 1 {
-                                            acc.layers[bit].insert(*slot_id);
-                                        }
-                                    }
-                                }
-                            }
-                        }
-                        acc.count += 1;
-                    }
-                }
-                acc
-            })
-            .reduce(make_accum, |mut a, b| {
-                for (bit, bm) in b.layers.into_iter().enumerate() {
-                    a.layers[bit] |= bm;
-                }
-                a.count += b.count;
-                a
-            });
-
-        let elapsed = t0.elapsed().as_secs_f64();
-        let non_empty = merged.layers.iter().filter(|l| !l.is_empty()).count();
-        eprintln!("  Docs:    {}", merged.count);
-        eprintln!("  Layers:  {}/{} non-empty", non_empty, bits);
-        eprintln!("  Time:    {:.2}s", elapsed);
-        eprintln!("  Rate:    {:.0} docs/s", merged.count as f64 / elapsed);
-        (elapsed, merged.count)
-    } else {
-        // Filter field rebuild
-        type FMap = HashMap<u64, RoaringBitmap>;
-        struct FilterAccum {
-            map: FMap,
-            count: u64,
-        }
-
-        let make_accum = || FilterAccum {
-            map: FMap::new(),
-            count: 0,
-        };
-
-        let merged = (0..num_chunks)
-            .into_par_iter()
-            .fold(make_accum, |mut acc, chunk_idx| {
-                let shard_start = chunk_idx * chunk_size;
-                let shard_end = std::cmp::min(shard_start + chunk_size, num_shards);
-
-                for shard_id in shard_start..shard_end {
-                    let docs = match reader.get_shard(shard_id) {
-                        Ok(d) => d,
-                        Err(_) => continue,
-                    };
-                    for (slot_id, doc) in &docs {
-                        if let Some(fv) = doc.fields.get(field_name) {
-                            match fv {
-                                bitdex_v2::mutation::FieldValue::Single(v) => {
-                                    if let Some(key) = value_to_bitmap_key(v) {
-                                        acc.map.entry(key)
-                                            .or_insert_with(RoaringBitmap::new)
-                                            .insert(*slot_id);
-                                    }
-                                }
-                                bitdex_v2::mutation::FieldValue::Multi(vals) => {
-                                    for v in vals {
-                                        if let Some(key) = value_to_bitmap_key(v) {
-                                            acc.map.entry(key)
-                                                .or_insert_with(RoaringBitmap::new)
-                                                .insert(*slot_id);
-                                        }
-                                    }
-                                }
-                            }
-                        }
-                        acc.count += 1;
-                    }
-                }
-                acc
-            })
-            .reduce(make_accum, |mut a, b| {
-                for (key, bm) in b.map {
-                    a.map.entry(key)
-                        .and_modify(|existing| *existing |= &bm)
-                        .or_insert(bm);
-                }
-                a.count += b.count;
-                a
-            });
-
-        let elapsed = t0.elapsed().as_secs_f64();
-        eprintln!("  Docs:        {}", merged.count);
-        eprintln!("  Distinct:    {} values", merged.map.len());
-        eprintln!("  Time:        {:.2}s", elapsed);
-        eprintln!("  Rate:        {:.0} docs/s", merged.count as f64 / elapsed);
-        (elapsed, merged.count)
-    }
-}
-
-/// Stage 5: Split-phase — pre-read all shards into memory, then benchmark
-/// bitmap construction with zero I/O. This isolates CPU cost of bitmap ops.
-fn bench_bitmap_only(
-    docs_path: &Path,
-    num_shards: u32,
-    filter_names: &[&str],
-    sort_names: &[&str],
-    sort_bits: &[usize],
-) -> (f64, f64, u64) {
-    eprintln!("\n=== Stage 5: Split-Phase (pre-read → bitmap-only) ===");
-
-    let reader = DocSiloAdapter::open(docs_path).expect("open docstore");
-
-    // Phase A: Read all shards into memory (decoded StoredDocs)
-    let t_read = Instant::now();
-    let all_docs: Vec<Vec<(u32, StoredDoc)>> = (0..num_shards)
-        .into_par_iter()
-        .filter_map(|shard_id| {
-            reader.get_shard(shard_id).ok().filter(|d| !d.is_empty())
-        })
-        .collect();
-    let read_time = t_read.elapsed().as_secs_f64();
-
-    let total_docs: u64 = all_docs.iter().map(|s| s.len() as u64).sum();
-    eprintln!("  Read phase:        {:.2}s ({} docs, {:.0} docs/s)",
-        read_time, total_docs, total_docs as f64 / read_time);
-
-    // Phase B: Build bitmaps from in-memory docs (no I/O)
-    type FilterMap = HashMap<(usize, u64), RoaringBitmap>;
-    struct Accum {
-        sort_layers: Vec<Vec<RoaringBitmap>>,
-        filter_map: FilterMap,
-        alive: RoaringBitmap,
-        count: u64,
-    }
-
-    let make_accum = || Accum {
-        sort_layers: sort_bits.iter().map(|&b| {
-            (0..b).map(|_| RoaringBitmap::new()).collect()
-        }).collect(),
-        filter_map: FilterMap::new(),
-        alive: RoaringBitmap::new(),
-        count: 0,
-    };
-
-    let t_bitmap = Instant::now();
-
-    let merged = all_docs
-        .par_iter()
-        .fold(make_accum, |mut acc, shard_docs| {
-            for (slot_id, doc) in shard_docs {
-                acc.alive.insert(*slot_id);
-
-                for (fi, &fname) in filter_names.iter().enumerate() {
-                    if let Some(fv) = doc.fields.get(fname) {
-                        match fv {
-                            bitdex_v2::mutation::FieldValue::Single(v) => {
-                                if let Some(key) = value_to_bitmap_key(v) {
-                                    acc.filter_map
-                                        .entry((fi, key))
-                                        .or_insert_with(RoaringBitmap::new)
-                                        .insert(*slot_id);
-                                }
-                            }
-                            bitdex_v2::mutation::FieldValue::Multi(vals) => {
-                                for v in vals {
-                                    if let Some(key) = value_to_bitmap_key(v) {
-                                        acc.filter_map
-                                            .entry((fi, key))
-                                            .or_insert_with(RoaringBitmap::new)
-                                            .insert(*slot_id);
-                                    }
-                                }
-                            }
-                        }
-                    }
-                }
-
-                for (si, &sname) in sort_names.iter().enumerate() {
-                    if let Some(fv) = doc.fields.get(sname) {
-                        if let bitdex_v2::mutation::FieldValue::Single(ref v) = fv {
-                            if let Some(value) = value_to_sort_u32(v) {
-                                let num_bits = sort_bits[si];
-                                for bit in 0..num_bits {
-                                    if (value >> bit) & 1 == 1 {
-                                        acc.sort_layers[si][bit].insert(*slot_id);
-                                    }
-                                }
-                            }
-                        }
-                    }
-                }
-
-                acc.count += 1;
-            }
-            acc
-        })
-        .reduce(make_accum, |mut a, b| {
-            for (si, b_layers) in b.sort_layers.into_iter().enumerate() {
-                for (bit, bm) in b_layers.into_iter().enumerate() {
-                    a.sort_layers[si][bit] |= bm;
-                }
-            }
-            for (key, bm) in b.filter_map {
-                a.filter_map.entry(key)
-                    .and_modify(|existing| *existing |= &bm)
-                    .or_insert(bm);
-            }
-            a.alive |= b.alive;
-            a.count += b.count;
-            a
-        });
-
-    let bitmap_time = t_bitmap.elapsed().as_secs_f64();
-
-    eprintln!("  Bitmap phase:      {:.2}s ({:.0} docs/s)",
-        bitmap_time, merged.count as f64 / bitmap_time);
-    eprintln!("  Filter bitmaps:    {} distinct pairs", merged.filter_map.len());
-    eprintln!("  Total:             {:.2}s", read_time + bitmap_time);
-
-    (read_time, bitmap_time, merged.count)
-}
-
-/// Stage 6: Raw bytes → bitmap extraction WITHOUT full StoredDoc decode.
-/// Decodes msgpack pairs directly, only extracting fields we need.
-fn bench_selective_decode(
-    docs_path: &Path,
-    num_shards: u32,
-    target_fields: &[&str],
-) -> (f64, u64) {
-    eprintln!("\n=== Stage 6: Selective Decode (skip full StoredDoc) ===");
-    eprintln!("  Target fields: {:?}", target_fields);
-
-    let reader = DocSiloAdapter::open(docs_path).expect("open docstore");
-    let field_to_idx = &reader;
-
-    // We'll read raw shard bytes and decode only needed fields
-    let docs_processed = AtomicU64::new(0);
-
-    let t0 = Instant::now();
-
-    // For now, just measure the difference by reading shards and only looking up target fields
-    // This shows the cost of HashMap::get vs iterating all fields
-    let chunk_size = 500u32;
-    let num_chunks = (num_shards + chunk_size - 1) / chunk_size;
-
-    let total: u64 = (0..num_chunks)
-        .into_par_iter()
-        .map(|chunk_idx| {
-            let shard_start = chunk_idx * chunk_size;
-            let shard_end = std::cmp::min(shard_start + chunk_size, num_shards);
-            let mut count = 0u64;
-
-            for shard_id in shard_start..shard_end {
-                let docs = match reader.get_shard(shard_id) {
-                    Ok(d) => d,
-                    Err(_) => continue,
-                };
-                for (slot_id, doc) in &docs {
-                    // Only access target fields — simulates selective decode
-                    for &fname in target_fields {
-                        let _ = doc.fields.get(fname);
-                    }
-                    count += 1;
-                }
-            }
-            count
-        })
-        .sum();
-
-    let elapsed = t0.elapsed().as_secs_f64();
-    eprintln!("  Docs:    {}", total);
-    eprintln!("  Time:    {:.2}s", elapsed);
-    eprintln!("  Rate:    {:.0} docs/s", total as f64 / elapsed);
-
-    (elapsed, total)
-}
-
-/// Stage 7: Zero-alloc packed rebuild — decode to Vec<(u16, PackedValue)> directly,
-/// use field dictionary u16 indices instead of String HashMap lookups.
-/// This is the "what if we skip StoredDoc entirely" benchmark.
-fn bench_packed_rebuild(
-    docs_path: &Path,
-    num_shards: u32,
-    filter_names: &[&str],
-    sort_names: &[&str],
-    sort_bits: &[usize],
-) -> (f64, u64) {
-    eprintln!("\n=== Stage 7: Packed Rebuild (skip StoredDoc) ===");
-
-    let reader = DocSiloAdapter::open(docs_path).expect("open docstore");
-
-    // Build u16 index → (role, position) lookup table from field dictionary
-    // role: 0 = filter, 1 = sort, 2 = both
-    let field_dict = reader.field_to_idx();
-    let mut filter_idx_map: HashMap<u16, usize> = HashMap::new(); // dict_idx → filter position
-    let mut sort_idx_map: HashMap<u16, (usize, usize)> = HashMap::new(); // dict_idx → (sort position, bits)
-
-    for (fi, &fname) in filter_names.iter().enumerate() {
-        if let Some(&idx) = field_dict.get(fname) {
-            filter_idx_map.insert(idx, fi);
-        }
-    }
-    for (si, &sname) in sort_names.iter().enumerate() {
-        if let Some(&idx) = field_dict.get(sname) {
-            sort_idx_map.insert(idx, (si, sort_bits[si]));
-        }
-    }
-
-    eprintln!("  Filter fields mapped: {}/{}", filter_idx_map.len(), filter_names.len());
-    eprintln!("  Sort fields mapped:   {}/{}", sort_idx_map.len(), sort_names.len());
-
-    type FilterMap = HashMap<(usize, u64), RoaringBitmap>;
-    struct Accum {
-        sort_layers: Vec<Vec<RoaringBitmap>>,
-        filter_map: FilterMap,
-        alive: RoaringBitmap,
-        count: u64,
-    }
-
-    let make_accum = || Accum {
-        sort_layers: sort_bits.iter().map(|&b| {
-            (0..b).map(|_| RoaringBitmap::new()).collect()
-        }).collect(),
-        filter_map: FilterMap::new(),
-        alive: RoaringBitmap::new(),
-        count: 0,
-    };
-
-    let chunk_size = 500u32;
-    let num_chunks = (num_shards + chunk_size - 1) / chunk_size;
-
-    let t0 = Instant::now();
-
-    let merged = (0..num_chunks)
-        .into_par_iter()
-        .fold(make_accum, |mut acc, chunk_idx| {
-            let shard_start = chunk_idx * chunk_size;
-            let shard_end = std::cmp::min(shard_start + chunk_size, num_shards);
-
-            for shard_id in shard_start..shard_end {
-                let packed_docs = match reader.get_shard_packed(shard_id) {
-                    Ok(d) => d,
-                    Err(_) => continue,
-                };
-
-                for (slot_id, pairs) in &packed_docs {
-                    acc.alive.insert(*slot_id);
-
-                    for (field_idx, pv) in pairs {
-                        // Filter extraction — direct u16 lookup, no String
-                        if let Some(&fi) = filter_idx_map.get(field_idx) {
-                            match pv {
-                                PackedValue::I(v) => {
-                                    acc.filter_map
-                                        .entry((fi, *v as u64))
-                                        .or_insert_with(RoaringBitmap::new)
-                                        .insert(*slot_id);
-                                }
-                                PackedValue::B(b) => {
-                                    acc.filter_map
-                                        .entry((fi, if *b { 1 } else { 0 }))
-                                        .or_insert_with(RoaringBitmap::new)
-                                        .insert(*slot_id);
-                                }
-                                PackedValue::Mi(vals) => {
-                                    for v in vals {
-                                        acc.filter_map
-                                            .entry((fi, *v as u64))
-                                            .or_insert_with(RoaringBitmap::new)
-                                            .insert(*slot_id);
-                                    }
-                                }
-                                _ => {}
-                            }
-                        }
-
-                        // Sort extraction — direct u16 lookup
-                        if let Some(&(si, bits)) = sort_idx_map.get(field_idx) {
-                            if let PackedValue::I(v) = pv {
-                                let value = (*v).max(0) as u32;
-                                for bit in 0..bits {
-                                    if (value >> bit) & 1 == 1 {
-                                        acc.sort_layers[si][bit].insert(*slot_id);
-                                    }
-                                }
-                            }
-                        }
-                    }
-
-                    acc.count += 1;
-                }
-            }
-            acc
-        })
-        .reduce(make_accum, |mut a, b| {
-            for (si, b_layers) in b.sort_layers.into_iter().enumerate() {
-                for (bit, bm) in b_layers.into_iter().enumerate() {
-                    a.sort_layers[si][bit] |= bm;
-                }
-            }
-            for (key, bm) in b.filter_map {
-                a.filter_map.entry(key)
-                    .and_modify(|existing| *existing |= &bm)
-                    .or_insert(bm);
-            }
-            a.alive |= b.alive;
-            a.count += b.count;
-            a
-        });
-
-    let elapsed = t0.elapsed().as_secs_f64();
-
-    eprintln!("  Docs processed:    {}", merged.count);
-    eprintln!("  Filter bitmaps:    {} distinct pairs", merged.filter_map.len());
-    eprintln!("  Time:              {:.2}s", elapsed);
-    eprintln!("  Throughput:        {:.0} docs/s", merged.count as f64 / elapsed);
-
-    (elapsed, merged.count)
-}
-
-/// Full-scale build: not yet implemented — DataSilo bulk scan API pending.
-fn run_full_build(_data_dir: &Path, _index_name: &str) {
-    eprintln!("ERROR: build_all_from_docstore is not yet implemented (DataSilo bulk scan API pending).");
-    std::process::exit(1);
-}
-
-/// --add-field mode: not yet implemented — DataSilo bulk scan API pending.
-fn run_add_field(_data_dir: &Path, _index_name: &str, _field_name: &str) {
-    eprintln!("ERROR: add_fields_from_docstore is not yet implemented (DataSilo bulk scan API pending).");
-    std::process::exit(1);
-}
-
-fn main() {
-    let config = parse_args();
-    let index_dir = config.data_dir.join("indexes").join(&config.index_name);
-    let docs_path = index_dir.join("docs");
-
-    // --add-field mode: benchmark hot-adding a single field
-    if let Some(ref field_name) = config.add_field {
-        run_add_field(&config.data_dir, &config.index_name, field_name);
-        return;
-    }
-
-    // --full mode: run the engine-level build_all_from_docstore
-    if config.full_build {
-        run_full_build(&config.data_dir, &config.index_name);
-        return;
-    }
-
-    eprintln!("Rebuild Benchmark — index: {}", config.index_name);
-    eprintln!("Docs path: {}", docs_path.display());
-
-    // Count shards
-    let t0 = Instant::now();
-    let max_shard = find_max_shard(&docs_path);
-    let total_shards = max_shard + 1;
-    eprintln!("Found max shard ID {} ({} total) in {:.1}s",
-        max_shard, total_shards, t0.elapsed().as_secs_f64());
-
-    let num_shards = config.max_shards.unwrap_or(total_shards).min(total_shards);
-    eprintln!("Benchmarking {} shards (~{} docs)",
-        num_shards, num_shards as u64 * 512);
-
-    let num_threads = rayon::current_num_threads();
-    eprintln!("Rayon threads: {}", num_threads);
-
-    // Stage 1: Raw I/O
-    let (io_time, compressed_bytes, decompressed_bytes) = bench_raw_io(&docs_path, num_shards);
-
-    // Stage 2: Read + Decode
-    let (decode_time, total_docs) = bench_decode(&docs_path, num_shards);
-
-    // Stage 3: Full rebuild (all filter + sort fields)
-    let filter_names: Vec<&str> = vec![
-        "nsfwLevel", "userId", "postId", "postedToId", "type", "baseModel",
-        "availability", "blockedFor", "remixOfId", "hasMeta", "onSite", "poi", "minor",
-        "tagIds", "modelVersionIds", "modelVersionIdsManual", "toolIds", "techniqueIds",
-    ];
-    let sort_names: Vec<&str> = vec!["reactionCount", "sortAt", "commentCount", "collectedCount", "id"];
-    let sort_bits: Vec<usize> = vec![32, 32, 32, 32, 32];
-
-    let (full_time, full_docs) = bench_full_rebuild(
-        &docs_path, num_shards,
-        &filter_names, &sort_names, &sort_bits,
-    );
-
-    // Stage 4: Single field rebuilds (interesting ones)
-    eprintln!("\n--- Per-field rebuild times ---");
-
-    let (nsfw_time, _) = bench_single_field_rebuild(&docs_path, num_shards, "nsfwLevel", false, 0);
-    let (tags_time, _) = bench_single_field_rebuild(&docs_path, num_shards, "tagIds", false, 0);
-    let (sort_time, _) = bench_single_field_rebuild(&docs_path, num_shards, "sortAt", true, 32);
-    let (reaction_time, _) = bench_single_field_rebuild(&docs_path, num_shards, "reactionCount", true, 32);
-
-    // Stage 5: Split-phase (isolate I/O from CPU)
-    let (split_read, split_bitmap, split_docs) = bench_bitmap_only(
-        &docs_path, num_shards,
-        &filter_names, &sort_names, &sort_bits,
-    );
-
-    // Stage 6: Selective decode (only target fields)
-    let (selective_1_time, _) = bench_selective_decode(
-        &docs_path, num_shards, &["nsfwLevel"],
-    );
-    let (selective_all_time, _) = bench_selective_decode(
-        &docs_path, num_shards,
-        &["nsfwLevel", "userId", "tagIds", "reactionCount", "sortAt"],
-    );
-
-    // Stage 7: Packed rebuild (skip StoredDoc entirely)
-    let (packed_time, packed_docs) = bench_packed_rebuild(
-        &docs_path, num_shards,
-        &filter_names, &sort_names, &sort_bits,
-    );
-
-    // Summary
-    eprintln!("\n========================================");
-    eprintln!("  SUMMARY ({} docs, {} shards, {} threads)", total_docs, num_shards, num_threads);
-    eprintln!("========================================");
-    eprintln!("  Raw I/O:          {:.2}s  ({:.0} MB/s compressed, {:.0} MB/s decompressed)",
-        io_time,
-        compressed_bytes as f64 / io_time / 1e6,
-        decompressed_bytes as f64 / io_time / 1e6);
-    eprintln!("  Read + Decode:    {:.2}s  ({:.0} docs/s)",
-        decode_time, total_docs as f64 / decode_time);
-    eprintln!("  Full Rebuild:     {:.2}s  ({:.0} docs/s) [current: StoredDoc path]",
-        full_time, full_docs as f64 / full_time);
-    eprintln!("  Packed Rebuild:   {:.2}s  ({:.0} docs/s) [new: skip StoredDoc]",
-        packed_time, packed_docs as f64 / packed_time);
-    if packed_time < full_time {
-        eprintln!("  >>> Packed is {:.1}x FASTER than current <<<",
-            full_time / packed_time);
-    }
-    eprintln!("  ---");
-    eprintln!("  Split-phase read: {:.2}s  ({:.0} docs/s)",
-        split_read, split_docs as f64 / split_read);
-    eprintln!("  Split-phase bmap: {:.2}s  ({:.0} docs/s)",
-        split_bitmap, split_docs as f64 / split_bitmap);
-    eprintln!("  ---");
-    eprintln!("  nsfwLevel only:   {:.2}s", nsfw_time);
-    eprintln!("  tagIds only:      {:.2}s", tags_time);
-    eprintln!("  sortAt only:      {:.2}s", sort_time);
-    eprintln!("  reactionCount:    {:.2}s", reaction_time);
-    eprintln!("  ---");
-    eprintln!("  Selective (1):    {:.2}s  (decode + 1 field lookup)", selective_1_time);
-    eprintln!("  Selective (5):    {:.2}s  (decode + 5 field lookups)", selective_all_time);
-    eprintln!("  ---");
-    eprintln!("  Decode overhead:  {:.1}x vs raw I/O", decode_time / io_time);
-    eprintln!("  Bitmap overhead:  {:.1}x vs decode-only", full_time / decode_time);
-    eprintln!("  I/O vs CPU split: {:.0}% I/O, {:.0}% bitmap",
-        split_read / (split_read + split_bitmap) * 100.0,
-        split_bitmap / (split_read + split_bitmap) * 100.0);
-    eprintln!("  ---");
-    eprintln!("  105M extrapolation:");
-    eprintln!("    Current:  {:.0}s ({:.1} min)", total_docs as f64 / (full_docs as f64 / full_time) * (105e6 / total_docs as f64),
-        total_docs as f64 / (full_docs as f64 / full_time) * (105e6 / total_docs as f64) / 60.0);
-    eprintln!("    Packed:   {:.0}s ({:.1} min)", total_docs as f64 / (packed_docs as f64 / packed_time) * (105e6 / total_docs as f64),
-        total_docs as f64 / (packed_docs as f64 / packed_time) * (105e6 / total_docs as f64) / 60.0);
-}
diff --git a/src/engine/concurrent_engine.rs b/src/engine/concurrent_engine.rs
index c6027da..c88b6bf 100644
--- a/src/engine/concurrent_engine.rs
+++ b/src/engine/concurrent_engine.rs
@@ -554,17 +554,6 @@ impl ConcurrentEngine {
         }
         Ok(())
     }
-    /// Persist dirty dictionaries to disk. Call after upserts that may have
-    /// created new LowCardinalityString values. Only writes dictionaries that
-    /// have new entries since the last persist, and clears their dirty flags.
-    ///
-    /// This ensures dictionary mappings survive crashes even before the next
-    /// full `save_snapshot()`. Dictionaries are small (typically < 1 KB), so
-    /// the I/O cost is negligible.
-    pub fn persist_dirty_dictionaries(&self) -> Result<()> {
-        // No-op: BitmapSilo saves dictionaries at save_snapshot time.
-        Ok(())
-    }
     /// Load dictionaries from disk for all LowCardinalityString fields in the schema.
     pub fn load_dictionaries(
         schema: &crate::config::DataSchema,
@@ -739,20 +728,11 @@ impl ConcurrentEngine {
         // Read directly from DataSilo (no separate doc cache — DataSilo uses mmap).
         Ok(self.docstore.lock().get(slot_id)?)
     }
-    /// Compact the docstore, reclaiming space from old write transactions.
-    pub fn compact_docstore(&self) -> Result<bool> {
-        Ok(self.docstore.lock().compact()?)
-    }
     /// Configure docstore field defaults from a DataSchema.
     /// Must be called before `prepare_bulk_writer()` so the BulkWriter inherits the defaults.
     pub fn set_docstore_defaults(&self, schema: &crate::config::DataSchema) {
         self.docstore.lock().set_field_defaults(schema);
     }
-    /// Get the current schema version from the docstore.
-    pub fn docstore_schema_version(&self) -> u8 {
-        self.docstore.lock().schema_version()
-    }
-
     /// Get a clone of the Arc<Mutex<DocSiloAdapter>> for external writers.
     pub fn docstore_arc(&self) -> Arc<parking_lot::Mutex<DocSiloAdapter>> {
         Arc::clone(&self.docstore)
@@ -771,19 +751,6 @@ impl ConcurrentEngine {
         self.docstore.lock().prepare_field_names(field_names)
             .map_err(|e| crate::error::BitdexError::Storage(format!("prepare_field_names: {e}")))
     }
-    /// Return the set of indexed field names (filter + sort + "id").
-    /// Used by the loader to strip doc-only fields from the bitmap accumulator.
-    pub fn indexed_field_names(&self) -> std::collections::HashSet<String> {
-        let mut s = std::collections::HashSet::new();
-        for f in &self.config.filter_fields {
-            s.insert(f.name.clone());
-        }
-        for f in &self.config.sort_fields {
-            s.insert(f.name.clone());
-        }
-        s.insert("id".to_string());
-        s
-    }
     /// Get the current pending buffer depth. Always 0 (tier 2 removed).
     pub fn pending_depth(&self) -> usize {
         0
diff --git a/src/engine/executor.rs b/src/engine/executor.rs
index 3136d64..95cc831 100644
--- a/src/engine/executor.rs
+++ b/src/engine/executor.rs
@@ -90,30 +90,7 @@ impl<'a> QueryExecutor<'a> {
             bitmap_silo,
         }
     }
-    /// Attach string maps for MappedString field reverse lookup.
-    /// Enables querying with `Value::String("SD 1.5")` on MappedString fields.
-    pub fn with_string_maps(mut self, maps: &'a StringMaps) -> Self {
-        self.string_maps = Some(maps);
-        self
-    }
-    /// Attach case-sensitive field set for string matching control.
-    pub fn with_case_sensitive_fields(mut self, fields: &'a CaseSensitiveFields) -> Self {
-        self.case_sensitive_fields = Some(fields);
-        self
-    }
-    /// Attach live dictionaries for LowCardinalityString field query resolution.
-    /// Used as fallback when the string_maps snapshot doesn't have a recently-added value.
-    pub fn with_dictionaries(mut self, dicts: &'a HashMap<String, FieldDictionary>) -> Self {
-        self.dictionaries = Some(dicts);
-        self
-    }
-    /// Attach a BitmapSilo for frozen bitmap reads.
-    /// When filter/sort bitmaps are unloaded, the executor reads frozen data
-    /// directly from the silo's mmap (zero-copy, near-zero heap).
-    pub fn with_bitmap_silo(mut self, silo: &'a BitmapSilo) -> Self {
-        self.bitmap_silo = Some(silo);
-        self
-    }
+
     /// Attach a time bucket manager for in-executor bucket snapping (C3).
     /// Range filters on the bucketed field will be snapped to pre-computed bitmaps.
     pub fn with_time_buckets(mut self, tb: &'a crate::time_buckets::TimeBucketManager, now: u64) -> Self {
@@ -265,17 +242,6 @@ impl<'a> QueryExecutor<'a> {
             total_matched,
         })
     }
-    /// Check if a single slot matches all the given filter clauses.
-    /// Used by post-validation to revalidate slots that overlap with in-flight writes.
-    pub fn slot_matches_filters(&self, slot: u32, clauses: &[FilterClause]) -> Result<bool> {
-        for clause in clauses {
-            let bitmap = self.evaluate_clause(clause)?;
-            if !bitmap.contains(slot) {
-                return Ok(false);
-            }
-        }
-        Ok(true)
-    }
     /// Execute from a pre-computed filter bitmap: alive AND + sort + paginate.
     /// Used when the caller handles cache interaction separately.
     pub fn execute_from_bitmap(
diff --git a/src/engine/filter.rs b/src/engine/filter.rs
index b3b8b4c..7a17535 100644
--- a/src/engine/filter.rs
+++ b/src/engine/filter.rs
@@ -173,10 +173,6 @@ impl FilterField {
     pub fn cardinality(&self, value: u64) -> u64 {
         self.bitmaps.get(&value).map_or(0, |vb| vb.base_len())
     }
-    /// Get the number of distinct values tracked.
-    pub fn distinct_count(&self) -> usize {
-        self.bitmaps.len()
-    }
     /// Compute the union of bitmaps for multiple values (OR).
     pub fn union(&self, values: &[u64]) -> RoaringBitmap {
         let mut result = RoaringBitmap::new();
diff --git a/src/engine/sort.rs b/src/engine/sort.rs
index 34ab993..02f0eec 100644
--- a/src/engine/sort.rs
+++ b/src/engine/sort.rs
@@ -376,6 +376,7 @@ impl SortField {
     /// Iterates every slot in `universe` and reconstructs its value from the
     /// bit layers. O(universe_size * num_bits) — acceptable when the matching
     /// fraction is small (e.g. a 300-second window out of 86400 seconds).
+    #[cfg(test)]
     pub fn slots_in_range(
         &self,
         universe: &RoaringBitmap,
@@ -429,25 +430,6 @@ impl SortField {
         }
     }
 
-    /// Get base bitmap references for all layers (for persistence).
-    /// Only valid when layers are clean (merged).
-    pub fn layer_bases(&self) -> Vec<&RoaringBitmap> {
-        self.bit_layers
-            .iter()
-            .map(|vb| {
-                debug_assert!(!vb.is_dirty(), "persisting dirty sort layer");
-                vb.base()
-            })
-            .collect()
-    }
-
-    /// Get fused bitmap references for all layers (for zero-copy persistence).
-    /// Returns `Cow::Borrowed` when the layer is clean (zero copy),
-    /// `Cow::Owned` when the layer has pending diffs.
-    pub fn layer_bases_fused(&self) -> Vec<Cow<'_, RoaringBitmap>> {
-        self.bit_layers.iter().map(|vb| vb.fused_cow()).collect()
-    }
-
     /// Return the serialized byte size of all bit layer bitmaps.
     pub fn bitmap_bytes(&self) -> usize {
         self.bit_layers.iter().map(|bm| bm.bitmap_bytes()).sum()
diff --git a/src/mutation.rs b/src/mutation.rs
index 198625e..b666cb7 100644
--- a/src/mutation.rs
+++ b/src/mutation.rs
@@ -95,12 +95,14 @@ pub enum FieldValue {
 }
 /// A partial update payload for PATCH operations.
 /// Contains only the changed fields with old and new values.
+#[cfg(test)]
 #[derive(Debug, Clone)]
 pub struct PatchPayload {
     pub fields: HashMap<String, PatchField>,
 }
 /// A single field change in a PATCH operation.
 /// Both old and new values come from the WAL event -- we never look up stored state.
+#[cfg(test)]
 #[derive(Debug, Clone)]
 pub struct PatchField {
     pub old: FieldValue,
@@ -333,6 +335,7 @@ pub fn diff_document(
 /// but ONLY processes fields present in new_doc. Missing fields are skipped
 /// entirely — they are NOT treated as deletions. This is the key difference
 /// from diff_document which treats missing fields as "change to None."
+#[cfg(test)]
 pub fn diff_document_partial(
     slot: u32,
     old_doc: Option<&StoredDoc>,
@@ -453,6 +456,7 @@ pub fn diff_document_partial(
     ops
 }
 /// Pure diff for PATCH: given old/new field values, returns MutationOps.
+#[cfg(test)]
 pub fn diff_patch(
     slot: u32,
     patch: &PatchPayload,
@@ -709,6 +713,7 @@ fn emit_sort_diff_ops(
     }
 }
 /// The core mutation engine. Applies PUT/PATCH/DELETE/DELETE WHERE to bitmaps.
+#[cfg(test)]
 pub struct MutationEngine<'a> {
     slots: &'a mut SlotAllocator,
     filters: &'a mut FilterIndex,
@@ -716,6 +721,7 @@ pub struct MutationEngine<'a> {
     config: &'a Config,
     docstore: &'a mut DocSiloAdapter,
 }
+#[cfg(test)]
 impl<'a> MutationEngine<'a> {
     pub fn new(
         slots: &'a mut SlotAllocator,
diff --git a/src/ops_processor.rs b/src/ops_processor.rs
index 0a08179..8db7885 100644
--- a/src/ops_processor.rs
+++ b/src/ops_processor.rs
@@ -12,9 +12,8 @@
 //! into BitmapSink calls using the engine Config for field awareness and
 //! `value_to_bitmap_key()` / `value_to_sort_u32()` for value conversion.
 use std::collections::HashMap;
-use std::path::{Path, PathBuf};
+use std::path::Path;
 use std::sync::Arc;
-use std::time::Duration;
 use serde_json::Value as JsonValue;
 use crate::engine::ConcurrentEngine;
 use crate::config::Config;
@@ -180,140 +179,6 @@ fn json_to_field_value(v: &JsonValue) -> Option<crate::mutation::FieldValue> {
         _ => None,
     }
 }
-// ---------------------------------------------------------------------------
-// Document → Ops decomposition (for PUT/PATCH → WAL refactor, task 2.7)
-// ---------------------------------------------------------------------------
-/// Convert a FieldValue to a serde_json::Value for Op serialization.
-pub fn field_value_to_json(fv: &crate::mutation::FieldValue) -> JsonValue {
-    match fv {
-        crate::mutation::FieldValue::Single(v) => qvalue_to_json(v),
-        crate::mutation::FieldValue::Multi(vals) => {
-            JsonValue::Array(vals.iter().map(qvalue_to_json).collect())
-        }
-    }
-}
-/// Convert a query::Value to a serde_json::Value.
-fn qvalue_to_json(v: &QValue) -> JsonValue {
-    match v {
-        QValue::Integer(i) => JsonValue::Number(serde_json::Number::from(*i)),
-        QValue::Float(f) => {
-            serde_json::Number::from_f64(*f)
-                .map(JsonValue::Number)
-                .unwrap_or(JsonValue::Null)
-        }
-        QValue::Bool(b) => JsonValue::Bool(*b),
-        QValue::String(s) => JsonValue::String(s.clone()),
-    }
-}
-/// Decompose a Document into `Vec<Op>` for WAL writing.
-///
-/// For fresh inserts (old_doc is None): emits Op::Set for each field.
-/// For upserts (old_doc is Some): emits Op::Remove for old values + Op::Set for
-/// new values on changed fields. Unchanged fields are skipped.
-///
-/// Multi-value fields are decomposed into individual Op::Add/Op::Remove per value.
-///
-/// `is_patch`: when true (PATCH semantics), fields absent from new_doc are left
-/// untouched — no Op::Remove emitted. When false (PUT semantics), absent fields
-/// are treated as deletions and their old bitmap bits are cleared.
-pub fn document_to_ops(
-    new_doc: &crate::mutation::Document,
-    old_doc: Option<&crate::silos::doc_format::StoredDoc>,
-    config: &crate::config::Config,
-    is_patch: bool,
-) -> Vec<Op> {
-    let mut ops = Vec::new();
-    let empty_fields = std::collections::HashMap::new();
-    let old_fields = old_doc.map_or(&empty_fields, |d| &d.fields);
-    // Process all fields in the new document
-    for (field_name, new_val) in &new_doc.fields {
-        let old_val = old_fields.get(field_name);
-        // Check if this is a multi-value field (tagIds, toolIds, etc.)
-        let is_multi_value = config.filter_fields.iter()
-            .any(|f| f.name == *field_name && f.field_type == crate::engine::filter::FilterFieldType::MultiValue);
-        if is_multi_value {
-            // Multi-value: compute add/remove sets
-            let old_ints = extract_multi_ints(old_val);
-            let new_ints = extract_multi_ints(Some(new_val));
-            // Remove values that were in old but not in new
-            for v in &old_ints {
-                if !new_ints.contains(v) {
-                    ops.push(Op::Remove {
-                        field: field_name.clone(),
-                        value: JsonValue::Number(serde_json::Number::from(*v)),
-                    });
-                }
-            }
-            // Add values that are in new but not in old
-            for v in &new_ints {
-                if !old_ints.contains(v) {
-                    ops.push(Op::Add {
-                        field: field_name.clone(),
-                        value: JsonValue::Number(serde_json::Number::from(*v)),
-                    });
-                }
-            }
-        } else {
-            // Single-value field: remove old + set new if changed
-            if let Some(old) = old_val {
-                if old != new_val {
-                    ops.push(Op::Remove {
-                        field: field_name.clone(),
-                        value: field_value_to_json(old),
-                    });
-                    ops.push(Op::Set {
-                        field: field_name.clone(),
-                        value: field_value_to_json(new_val),
-                    });
-                }
-                // else: unchanged, skip
-            } else {
-                // New field (not in old doc)
-                ops.push(Op::Set {
-                    field: field_name.clone(),
-                    value: field_value_to_json(new_val),
-                });
-            }
-        }
-    }
-    // For PUT upsert: handle fields that were in old doc but removed in new doc.
-    // PATCH skips this — absent fields are left untouched (partial update semantics).
-    if old_doc.is_some() && !is_patch {
-        for (field_name, old_val) in old_fields {
-            if !new_doc.fields.contains_key(field_name) {
-                // Field was removed
-                let is_multi_value = config.filter_fields.iter()
-                    .any(|f| f.name == *field_name && f.field_type == crate::engine::filter::FilterFieldType::MultiValue);
-                if is_multi_value {
-                    for v in extract_multi_ints(Some(old_val)) {
-                        ops.push(Op::Remove {
-                            field: field_name.clone(),
-                            value: JsonValue::Number(serde_json::Number::from(v)),
-                        });
-                    }
-                } else {
-                    ops.push(Op::Remove {
-                        field: field_name.clone(),
-                        value: field_value_to_json(old_val),
-                    });
-                }
-            }
-        }
-    }
-    ops
-}
-/// Extract integer values from a multi-value FieldValue.
-fn extract_multi_ints(fv: Option<&crate::mutation::FieldValue>) -> Vec<i64> {
-    match fv {
-        Some(crate::mutation::FieldValue::Multi(vals)) => {
-            vals.iter().filter_map(|v| {
-                if let QValue::Integer(i) = v { Some(*i) } else { None }
-            }).collect()
-        }
-        Some(crate::mutation::FieldValue::Single(QValue::Integer(i))) => vec![*i],
-        _ => Vec::new(),
-    }
-}
 /// Convert a JSON value to a PackedValue for docstore storage.
 fn json_to_packed(v: &JsonValue) -> Option<PackedValue> {
     match v {
@@ -362,24 +227,6 @@ fn json_to_qvalue(v: &JsonValue) -> QValue {
         _ => QValue::String(v.to_string()),
     }
 }
-/// Configuration for the ops processor.
-pub struct OpsProcessorConfig {
-    /// Max records to read per WAL batch
-    pub batch_size: usize,
-    /// How long to sleep when no new records are available
-    pub poll_interval: Duration,
-    /// Path to persist the cursor position
-    pub cursor_path: PathBuf,
-}
-impl Default for OpsProcessorConfig {
-    fn default() -> Self {
-        Self {
-            batch_size: 10_000,
-            poll_interval: Duration::from_millis(50),
-            cursor_path: PathBuf::from("wal_cursor"),
-        }
-    }
-}
 /// Info about a computed sort field: which source fields feed it and the operation.
 #[derive(Clone)]
 struct ComputedSortInfo {
@@ -1882,87 +1729,6 @@ mod tests {
         assert_eq!(json_to_packed(&json!(null)), None);
         assert_eq!(json_to_packed(&json!([1, 2, 3])), Some(PackedValue::Mi(vec![1, 2, 3])));
     }
-    // -----------------------------------------------------------------------
-    // document_to_ops tests (2.7)
-    // -----------------------------------------------------------------------
-    #[test]
-    fn test_document_to_ops_fresh_insert() {
-        use crate::mutation::{Document, FieldValue};
-        use crate::query::Value as QValue;
-        let config = test_config();
-        let mut fields = std::collections::HashMap::new();
-        fields.insert("nsfwLevel".into(), FieldValue::Single(QValue::Integer(16)));
-        let doc = Document { fields };
-        let ops = document_to_ops(&doc, None, &config, false);
-        // Should have a Set op for nsfwLevel
-        assert_eq!(ops.len(), 1);
-        match &ops[0] {
-            Op::Set { field, value } => {
-                assert_eq!(field, "nsfwLevel");
-                assert_eq!(value, &json!(16));
-            }
-            other => panic!("expected Set, got {:?}", other),
-        }
-    }
-    #[test]
-    fn test_document_to_ops_upsert_changed_field() {
-        use crate::mutation::{Document, FieldValue};
-        use crate::query::Value as QValue;
-        let config = test_config();
-        // Old doc: nsfwLevel=8
-        let mut old_fields = std::collections::HashMap::new();
-        old_fields.insert("nsfwLevel".into(), FieldValue::Single(QValue::Integer(8)));
-        let old_doc = crate::silos::doc_format::StoredDoc { fields: old_fields, schema_version: 0 };
-
-        // New doc: nsfwLevel=16
-        let mut new_fields = std::collections::HashMap::new();
-        new_fields.insert("nsfwLevel".into(), FieldValue::Single(QValue::Integer(16)));
-        let new_doc = Document { fields: new_fields };
-        let ops = document_to_ops(&new_doc, Some(&old_doc), &config, false);
-        // Should have Remove(old=8) + Set(new=16)
-        assert_eq!(ops.len(), 2);
-        assert!(ops.iter().any(|op| matches!(op, Op::Remove { field, value } if field == "nsfwLevel" && value == &json!(8))));
-        assert!(ops.iter().any(|op| matches!(op, Op::Set { field, value } if field == "nsfwLevel" && value == &json!(16))));
-    }
-    #[test]
-    fn test_document_to_ops_unchanged_field_skipped() {
-        use crate::mutation::{Document, FieldValue};
-        use crate::query::Value as QValue;
-        let config = test_config();
-        let mut fields = std::collections::HashMap::new();
-        fields.insert("nsfwLevel".into(), FieldValue::Single(QValue::Integer(8)));
-
-        let old_doc = crate::silos::doc_format::StoredDoc { fields: fields.clone(), schema_version: 0 };
-        let new_doc = Document { fields };
-        let ops = document_to_ops(&new_doc, Some(&old_doc), &config, false);
-        assert!(ops.is_empty(), "unchanged fields should produce no ops");
-    }
-    #[test]
-    fn test_document_to_ops_patch_preserves_absent_fields() {
-        use crate::mutation::{Document, FieldValue};
-        use crate::query::Value as QValue;
-        let config = test_config();
-        // Old doc has nsfwLevel=8 AND reactionCount sort field
-        let mut old_fields = std::collections::HashMap::new();
-        old_fields.insert("nsfwLevel".into(), FieldValue::Single(QValue::Integer(8)));
-        let old_doc = crate::silos::doc_format::StoredDoc { fields: old_fields, schema_version: 0 };
-
-        // PATCH only sends userId=42 (nsfwLevel absent from patch)
-        let mut new_fields = std::collections::HashMap::new();
-        new_fields.insert("userId".into(), FieldValue::Single(QValue::Integer(42)));
-        let new_doc = Document { fields: new_fields };
-        // is_patch=true: absent fields should NOT generate Remove ops
-        let ops = document_to_ops(&new_doc, Some(&old_doc), &config, true);
-        let has_remove_nsfw = ops.iter().any(|op| matches!(op, Op::Remove { field, .. } if field == "nsfwLevel"));
-        assert!(!has_remove_nsfw, "PATCH should NOT remove absent fields (nsfwLevel)");
-        // Should have Set for userId (new field)
-        let has_set_user = ops.iter().any(|op| matches!(op, Op::Set { field, .. } if field == "userId"));
-        assert!(has_set_user, "PATCH should set provided fields (userId)");
-        // is_patch=false (PUT): absent fields SHOULD generate Remove ops
-        let ops_put = document_to_ops(&new_doc, Some(&old_doc), &config, false);
-        let has_remove_nsfw_put = ops_put.iter().any(|op| matches!(op, Op::Remove { field, .. } if field == "nsfwLevel"));
-        assert!(has_remove_nsfw_put, "PUT should remove absent fields (nsfwLevel)");
-    }
     fn test_config_with_nullable() -> Config {
         let mut config = test_config();
         config.filter_fields.push(FilterFieldConfig {
diff --git a/src/server.rs b/src/server.rs
index 59ab90e..2b6fdf0 100644
--- a/src/server.rs
+++ b/src/server.rs
@@ -24,7 +24,6 @@ use crate::engine::ConcurrentEngine;
 use crate::config::{Config, DataSchema, FieldValueType, FilterFieldConfig, SortFieldConfig};
 use crate::silos::doc_format::StoredDoc;
 use crate::engine::executor::{CaseSensitiveFields, StringMaps};
-use crate::sync::loader;
 use crate::metrics::Metrics;
 use crate::mutation::FieldValue;
 use crate::query::{BitdexQuery, Value};
@@ -546,43 +545,6 @@ struct CreateIndexRequest {
     data_schema: DataSchema,
 }
 
-#[derive(Deserialize)]
-struct LoadRequest {
-    path: String,
-    #[serde(default)]
-    limit: Option<usize>,
-    #[serde(default = "default_threads")]
-    threads: usize,
-    #[serde(default = "default_chunk_size")]
-    chunk_size: usize,
-    #[serde(default = "default_docstore_batch_size")]
-    docstore_batch_size: usize,
-    #[serde(default = "default_max_writer_threads")]
-    max_writer_threads: usize,
-    #[serde(default)]
-    save_snapshot: bool,
-}
-
-fn default_threads() -> usize {
-    // Unused by fused parse+bitmap loader (rayon manages parallelism),
-    // kept for API compat.
-    let logical = std::thread::available_parallelism()
-        .map(|n| n.get())
-        .unwrap_or(8);
-    (logical / 2).clamp(4, 8)
-}
-
-fn default_chunk_size() -> usize {
-    500_000
-}
-
-fn default_docstore_batch_size() -> usize {
-    100_000
-}
-
-fn default_max_writer_threads() -> usize {
-    4
-}
 
 #[derive(Deserialize)]
 struct DocumentRequest {
@@ -878,24 +840,6 @@ struct AddFieldsRequest {
     skip_validation: bool,
 }
 
-/// Sync filter values for a filter_only multi-value field.
-/// Replaces all bitmap memberships for the given slots on the named field.
-#[derive(Deserialize)]
-struct FilterSyncRequest {
-    /// The filter field name (must be a multi_value field).
-    field: String,
-    /// List of (slot, values) pairs to sync.
-    documents: Vec<FilterSyncEntry>,
-}
-
-#[derive(Deserialize)]
-struct FilterSyncEntry {
-    /// The document/slot ID.
-    id: u32,
-    /// The complete set of values this slot should have for the field.
-    values: Vec<u64>,
-}
-
 #[derive(Deserialize)]
 struct RemoveFieldsRequest {
     #[serde(default)]
@@ -1343,12 +1287,9 @@ impl BitdexServer {
             .route("/api/indexes", post(handle_create_index))
             .route("/api/indexes/{name}", delete(handle_delete_index))
             .route("/api/indexes/{name}/config", patch(handle_patch_config))
-            .route("/api/indexes/{name}/load", post(handle_load))
             .route("/api/indexes/{name}/documents", post(handle_documents_batch).delete(handle_delete_docs))
             .route("/api/indexes/{name}/documents/{slot_id}", get(handle_get_document))
             .route("/api/indexes/{name}/documents/upsert", post(handle_upsert))
-            .route("/api/indexes/{name}/documents/patch", patch(handle_patch_documents))
-            .route("/api/indexes/{name}/documents/filter-sync", post(handle_filter_sync))
             .route("/api/indexes/{name}/cache", delete(handle_clear_cache))
             .route("/api/indexes/{name}/cache/persistent", delete(handle_purge_cache))
             .route("/api/indexes/{name}/warm", post(handle_warm_cache))
@@ -2278,104 +2219,6 @@ async fn handle_delete_index(
     Json(serde_json::json!({"status": "deleted"})).into_response()
 }
 
-// ---------------------------------------------------------------------------
-// Handlers: Data loading
-// ---------------------------------------------------------------------------
-
-async fn handle_load(
-    State(state): State<SharedState>,
-    AxumPath(name): AxumPath<String>,
-    Json(req): Json<LoadRequest>,
-) -> impl IntoResponse {
-    let (engine, schema, tasks) = {
-        let guard = state.index.lock();
-        match guard.as_ref() {
-            Some(idx) if idx.definition.name == name => (
-                Arc::clone(&idx.engine),
-                idx.definition.data_schema.clone(),
-                Arc::clone(&idx.tasks),
-            ),
-            _ => {
-                return (
-                    StatusCode::NOT_FOUND,
-                    Json(serde_json::json!({"error": format!("Index '{}' not found", name)})),
-                ).into_response();
-            }
-        }
-    };
-
-    let path = PathBuf::from(&req.path);
-    if !path.exists() {
-        return (
-            StatusCode::BAD_REQUEST,
-            Json(serde_json::json!({"error": format!("File not found: {}", req.path)})),
-        ).into_response();
-    }
-
-    let (task_id, progress) = match tasks.try_start(TaskType::Load) {
-        Ok(v) => v,
-        Err(active_info) => {
-            return (
-                StatusCode::CONFLICT,
-                Json(serde_json::json!({
-                    "error": "A task is already running",
-                    "active_task": serde_json::to_value(&active_info).unwrap(),
-                })),
-            ).into_response();
-        }
-    };
-
-    let limit = req.limit;
-    let threads = req.threads;
-    let chunk_size = req.chunk_size;
-    let docstore_batch_size = req.docstore_batch_size;
-    let max_writer_threads = req.max_writer_threads;
-    let save_snapshot = req.save_snapshot;
-
-    // Spawn blocking loading task with TaskGuard for panic safety
-    let tasks_clone = Arc::clone(&tasks);
-    tokio::task::spawn_blocking(move || {
-        let mut guard = TaskGuard { tasks: tasks_clone, task_id: Some(task_id) };
-
-        match loader::load_ndjson(&engine, &schema, &path, limit, threads, chunk_size, docstore_batch_size, max_writer_threads, progress.clone()) {
-            Ok(stats) => {
-                let alive;
-
-                if save_snapshot {
-                    guard.tasks.set_saving(task_id);
-
-                    let snap_start = Instant::now();
-                    if let Err(e) = engine.save_and_unload() {
-                        eprintln!("Warning: failed to save_and_unload: {e}");
-                    } else {
-                        eprintln!("save_and_unload complete in {:.1}s", snap_start.elapsed().as_secs_f64());
-                    }
-                    // Alive bitmap is always preserved during unload
-                    alive = engine.alive_count();
-                } else {
-                    alive = engine.alive_count();
-                }
-
-                eprintln!("Load complete: {} records alive", alive);
-
-                guard.tasks.set_complete(task_id, Some(serde_json::json!({
-                    "records_loaded": stats.records_loaded,
-                    "elapsed_secs": stats.elapsed.as_secs_f64(),
-                })));
-                guard.defuse();
-            }
-            Err(e) => {
-                guard.tasks.set_error(task_id, e.to_string());
-                guard.defuse();
-            }
-        }
-    });
-
-    (
-        StatusCode::ACCEPTED,
-        Json(serde_json::json!({"task_id": task_id})),
-    ).into_response()
-}
 
 // ---------------------------------------------------------------------------
 // Handlers: Query & documents
@@ -2785,38 +2628,6 @@ async fn handle_upsert(
     ).into_response()
 }
 
-/// PATCH /api/indexes/{name}/documents/patch
-///
-/// Not implemented — use upsert (PUT) for all document writes.
-async fn handle_patch_documents(
-    State(_state): State<SharedState>,
-    AxumPath(name): AxumPath<String>,
-    Json(_req): Json<UpsertRequest>,
-) -> impl IntoResponse {
-    (
-        StatusCode::NOT_IMPLEMENTED,
-        Json(serde_json::json!({
-            "error": format!("PATCH is not implemented for index '{}'; use PUT upsert instead", name)
-        })),
-    )
-}
-
-/// Sync filter values — not implemented.
-///
-/// This endpoint is no longer supported. Use upsert (PUT) for all document writes.
-async fn handle_filter_sync(
-    State(_state): State<SharedState>,
-    AxumPath(name): AxumPath<String>,
-    Json(_req): Json<FilterSyncRequest>,
-) -> impl IntoResponse {
-    (
-        StatusCode::NOT_IMPLEMENTED,
-        Json(serde_json::json!({
-            "error": format!("filter_sync is not implemented for index '{}'; use PUT upsert instead", name)
-        })),
-    )
-}
-
 async fn handle_delete_docs(
     State(state): State<SharedState>,
     AxumPath(name): AxumPath<String>,
diff --git a/src/sync/bulk_loader.rs b/src/sync/bulk_loader.rs
index e9eac91..5224fc0 100644
--- a/src/sync/bulk_loader.rs
+++ b/src/sync/bulk_loader.rs
@@ -1,218 +1,25 @@
 //! Bulk loader utilities: PG CSV download + ClickHouse metrics download.
 //!
-//! The V1 in-process bulk load pipeline (run_bulk_load / run_bulk_load_copy) has been
-//! removed. Use the single-pass V2 loader via the pg-sync binary instead.
+//! The V1 in-process bulk load pipeline has been removed.
+//! Use the config-driven dump processor via the pg-sync binary instead.
 //!
 //! Remaining functionality:
-//!   - `download_all_tables` / `download_single_table`: Stream PG tables to local CSVs
+//!   - `download_phase_csvs`: Stream phase CSVs from PG to local files
+//!   - `download_from_sync_config`: Download all phases from sync config
 //!   - `download_metrics_from_clickhouse`: Fetch aggregate metrics from ClickHouse
-//!   - `finalize_from_bitmaps` / `scalars_to_json`: Docstore finalization helpers (used by tests)
+//!   - `clear_done_markers`: Clear stale .done markers at boot
 
-use std::collections::HashMap;
 use std::time::Instant;
 
-use roaring::RoaringBitmap;
 use sqlx::PgPool;
 
-use super::loader::BitmapAccum;
-
-use super::copy_queries;
-
 // ---------------------------------------------------------------------------
-// Compact per-image scalar storage (replaces 512-byte arena slots)
+// PG CSV download
 // ---------------------------------------------------------------------------
 
-/// Compact per-image scalar data stored during CSV processing.
-///
-/// Only stores fields needed for docstore finalization that cannot be
-/// reconstructed from filter/sort bitmaps. Multi-value fields (tagIds,
-/// toolIds, etc.) are reconstructed from their filter bitmaps.
-///
-/// At ~80 bytes avg per image (including heap strings), 107M images ≈ 8.5 GB.
-/// This replaces the 60GB memory-mapped SlotArena.
-#[derive(Debug)]
-struct ImageScalars {
-    url: Option<Box<str>>,   // Box<str> instead of String saves 8 bytes/entry (no capacity field)
-    hash: Option<Box<str>>,
-    nsfw_level: u8,
-    user_id: u64,
-    image_type: u8,      // encoded via encode_image_type
-    sort_at: u64,         // epoch seconds
-    poi: bool,            // image-level poi (OR'd with resource_poi at finalization)
-    minor: bool,
-    has_meta: bool,
-    on_site: bool,
-    post_id: u64,
-    posted_to_id: u64,
-    availability: u8,     // encoded via encode_availability
-    blocked_for: u8,      // encoded via encode_blocked_for
-    published_at_ms: u64, // milliseconds
-}
-
-/// Per-slot resource enrichment data, written by the resources stream.
-/// Stored separately because it arrives from a different CSV file.
-#[derive(Debug, Default)]
-struct ResourceEnrichment {
-    base_model: u8,       // encoded via encode_base_model
-    resource_poi: bool,
-}
-
-/// Statistics from a completed bulk load.
-#[derive(Debug)]
-pub struct BulkLoadStats {
-    pub records_loaded: u64,
-    pub errors: u64,
-    pub elapsed: std::time::Duration,
-}
-
-// ---------------------------------------------------------------------------
-// Phase 1: Download tables to local CSV files
-// ---------------------------------------------------------------------------
-
-/// Table descriptor for the download phase.
-struct TableDownload {
-    name: &'static str,
-    file: &'static str,
-}
-
-const TABLES: &[TableDownload] = &[
-    TableDownload { name: "images", file: "images.csv" },
-    TableDownload { name: "posts", file: "posts.csv" },
-    TableDownload { name: "tags", file: "tags.csv" },
-    TableDownload { name: "tools", file: "tools.csv" },
-    TableDownload { name: "techniques", file: "techniques.csv" },
-    TableDownload { name: "resources", file: "resources.csv" },
-    TableDownload { name: "model_versions", file: "model_versions.csv" },
-    TableDownload { name: "models", file: "models.csv" },
-    TableDownload { name: "collection_items", file: "collection_items.csv" },
-];
-
-/// Download a single named table from PG to a CSV file.
-/// Public wrapper for use by backfill module.
-pub async fn download_single_table(
-    pool: &PgPool,
-    stage_dir: &std::path::Path,
-    name: &'static str,
-    file: &'static str,
-) -> Result<u64, String> {
-    let table = TableDownload { name, file };
-    download_table(pool, stage_dir, &table).await
-}
-
-/// Download a single table from PG to a CSV file on the PVC.
-/// Returns the number of bytes written.
-/// Skips if the .done marker already exists.
-async fn download_table(
-    pool: &PgPool,
-    stage_dir: &std::path::Path,
-    table: &TableDownload,
-) -> Result<u64, String> {
-    use futures_util::TryStreamExt;
-    use tokio::io::AsyncWriteExt;
-
-    let csv_path = stage_dir.join(table.file);
-    let done_path = stage_dir.join(format!("{}.done", table.file));
-
-    // Skip if already downloaded
-    if done_path.exists() {
-        let size = std::fs::metadata(&csv_path).map(|m| m.len()).unwrap_or(0);
-        eprintln!("  {}: already downloaded ({:.1} MB), skipping", table.name, size as f64 / 1048576.0);
-        return Ok(size);
-    }
-
-    // Get the COPY stream for this table
-    let mut stream = match table.name {
-        "images" => copy_queries::copy_images(pool).await,
-        "posts" => copy_queries::copy_posts(pool).await,
-        "tags" => copy_queries::copy_tags(pool).await,
-        "tools" => copy_queries::copy_tools(pool).await,
-        "techniques" => copy_queries::copy_techniques(pool).await,
-        "resources" => copy_queries::copy_resources(pool).await,
-        "model_versions" => copy_queries::copy_model_versions(pool).await,
-        "models" => copy_queries::copy_models(pool).await,
-        "collection_items" => copy_queries::copy_collection_items(pool).await,
-        _ => return Err(format!("unknown table: {}", table.name)),
-    }.map_err(|e| format!("{}: COPY start failed: {e}", table.name))?;
-
-    // Stream to file
-    let file = tokio::fs::File::create(&csv_path)
-        .await
-        .map_err(|e| format!("{}: create file: {e}", table.name))?;
-    let mut writer = tokio::io::BufWriter::with_capacity(1024 * 1024, file);
-    let mut bytes_written = 0u64;
-    let start = Instant::now();
-
-    while let Some(chunk) = stream
-        .try_next()
-        .await
-        .map_err(|e| format!("{}: COPY stream: {e}", table.name))?
-    {
-        writer
-            .write_all(&chunk)
-            .await
-            .map_err(|e| format!("{}: write: {e}", table.name))?;
-        bytes_written += chunk.len() as u64;
-    }
-    writer.flush().await.map_err(|e| format!("{}: flush: {e}", table.name))?;
-
-    // Write .done marker
-    std::fs::write(&done_path, b"ok")
-        .map_err(|e| format!("{}: write done marker: {e}", table.name))?;
-
-    let elapsed = start.elapsed();
-    eprintln!(
-        "  {}: {:.1} MB in {:.1}s ({:.0} MB/s)",
-        table.name,
-        bytes_written as f64 / 1048576.0,
-        elapsed.as_secs_f64(),
-        bytes_written as f64 / 1048576.0 / elapsed.as_secs_f64().max(0.001),
-    );
-
-    Ok(bytes_written)
-}
-
-/// Download all tables from PG to CSV files on the PVC.
-/// Each table runs concurrently. Completed tables are skipped on retry.
-pub async fn download_all_tables(
-    pool: &PgPool,
-    stage_dir: &std::path::Path,
-) -> Result<(), String> {
-    std::fs::create_dir_all(stage_dir)
-        .map_err(|e| format!("create stage dir: {e}"))?;
-
-    eprintln!("\n=== Phase 1: Downloading tables to {} ===", stage_dir.display());
-    let start = Instant::now();
-
-    // Download all tables concurrently
-    let results = tokio::join!(
-        download_table(pool, stage_dir, &TABLES[0]), // images
-        download_table(pool, stage_dir, &TABLES[1]), // posts
-        download_table(pool, stage_dir, &TABLES[2]), // tags
-        download_table(pool, stage_dir, &TABLES[3]), // tools
-        download_table(pool, stage_dir, &TABLES[4]), // techniques
-        download_table(pool, stage_dir, &TABLES[5]), // resources
-        download_table(pool, stage_dir, &TABLES[6]), // model_versions
-        download_table(pool, stage_dir, &TABLES[7]), // models
-    );
-
-    // Check all results
-    let mut total_bytes = 0u64;
-    for (i, result) in [results.0, results.1, results.2, results.3, results.4, results.5, results.6, results.7].into_iter().enumerate() {
-        total_bytes += result.map_err(|e| format!("download {} failed: {e}", TABLES[i].name))?;
-    }
-
-    eprintln!(
-        "Phase 1 complete: {:.1} GB in {:.1}s",
-        total_bytes as f64 / (1024.0 * 1024.0 * 1024.0),
-        start.elapsed().as_secs_f64(),
-    );
-
-    Ok(())
-}
-
 /// Download CSVs using copy_query from sync config dump phases.
 ///
-/// Config-driven replacement for download_all_tables — uses the exact COPY SQL
+/// Config-driven replacement for the old download_all_tables — uses the exact COPY SQL
 /// from each DumpPhase (and its enrichment lookups) instead of hardcoded queries.
 /// This ensures the CSVs match what the dump processor expects.
 pub async fn download_from_sync_config(
@@ -404,331 +211,6 @@ async fn download_copy_query(
     Ok(bytes_written)
 }
 
-// ---------------------------------------------------------------------------
-// Arena-free docstore finalization (used by V1 bulk loader, kept for tests)
-// ---------------------------------------------------------------------------
-
-/// Block size for chunked bitmap reconstruction.
-/// Aligned with roaring bitmap container boundaries (65,536 = 2^16).
-const FINALIZE_CHUNK_SIZE: u32 = 65_536;
-
-/// Finalize alive slots to the docstore by reconstructing multi-value fields
-/// from filter bitmaps and combining with stored scalars.
-///
-/// Processes alive slots in 65K-block chunks aligned to roaring container
-/// boundaries for efficient `bitmap.range()` iteration.
-fn finalize_from_bitmaps(
-    _schema: &crate::config::DataSchema,
-    _alive: &RoaringBitmap,
-    _image_scalars: &HashMap<u32, ImageScalars>,
-    _resource_enrichments: &HashMap<u32, ResourceEnrichment>,
-    _tag_bitmaps: &HashMap<u64, RoaringBitmap>,
-    _tool_bitmaps: &HashMap<u64, RoaringBitmap>,
-    _technique_bitmaps: &HashMap<u64, RoaringBitmap>,
-    _mv_bitmaps: &HashMap<u64, RoaringBitmap>,
-) -> Result<(u64, u64), String> {
-    // TODO: Rewrite for DataSilo when V1 bulk loader is needed
-    Err("finalize_from_bitmaps: not yet ported to DataSilo".to_string())
-}
-
-// V2 dump pipeline (dump_processor.rs) handles doc finalization via DataSilo
-
-/// Convert compact ImageScalars + reconstructed multi-value fields to a
-/// JSON document matching the Bitdex data schema.
-///
-/// Produces the same output as `slot_data_to_json` in slot_arena.rs.
-fn scalars_to_json(
-    slot: u32,
-    s: &ImageScalars,
-    enrichment: Option<&ResourceEnrichment>,
-    tag_ids: &[u32],
-    tool_ids: &[u32],
-    technique_ids: &[u32],
-    model_version_ids: &[u32],
-) -> serde_json::Value {
-    use super::slot_arena::{decode_image_type, decode_availability, decode_base_model};
-
-    let base_model_enum = enrichment.map(|e| e.base_model).unwrap_or(0);
-    let resource_poi = enrichment.map(|e| e.resource_poi).unwrap_or(false);
-    let poi = s.poi || resource_poi;
-
-    let mut doc = serde_json::json!({
-        "id": slot as i64,
-        "nsfwLevel": s.nsfw_level as i64,
-        "userId": s.user_id as i64,
-        "postId": s.post_id as i64,
-        "postedToId": s.posted_to_id as i64,
-        "type": decode_image_type(s.image_type),
-        "baseModel": decode_base_model(base_model_enum),
-        "availability": decode_availability(s.availability),
-        "tagIds": tag_ids.iter().map(|&t| t as i64).collect::<Vec<i64>>(),
-        "modelVersionIds": model_version_ids.iter().map(|&t| t as i64).collect::<Vec<i64>>(),
-        "modelVersionIdsManual": serde_json::json!([]),
-        "toolIds": tool_ids.iter().map(|&t| t as i64).collect::<Vec<i64>>(),
-        "techniqueIds": technique_ids.iter().map(|&t| t as i64).collect::<Vec<i64>>(),
-        "reactionCount": 0i64,
-        "commentCount": 0i64,
-        "collectedCount": 0i64,
-        "sortAt": s.sort_at as i64,
-        "publishedAt": (s.published_at_ms / 1000) as i64,
-    });
-
-    if let Some(obj) = doc.as_object_mut() {
-        // Exists-boolean: isPublished = publishedAt is non-zero (matches outbox row_assembler)
-        if s.published_at_ms > 0 {
-            obj.insert("isPublished".into(), serde_json::json!(true));
-        }
-        if s.has_meta {
-            obj.insert("hasMeta".into(), serde_json::json!(true));
-        }
-        if s.on_site {
-            obj.insert("onSite".into(), serde_json::json!(true));
-        }
-        if poi {
-            obj.insert("poi".into(), serde_json::json!(true));
-        }
-        if s.minor {
-            obj.insert("minor".into(), serde_json::json!(true));
-        }
-        if let Some(ref url) = s.url {
-            obj.insert("url".into(), serde_json::json!(url.as_ref()));
-        }
-        if let Some(ref hash) = s.hash {
-            obj.insert("hash".into(), serde_json::json!(hash.as_ref()));
-        }
-        if s.blocked_for > 0 {
-            obj.insert("blockedFor".into(), serde_json::json!("blocked"));
-        }
-    }
-
-    doc
-}
-
-/// AND all filter and sort bitmaps in an accumulator against the alive bitmap.
-///
-/// Returns the number of bitmaps that were modified (had orphan bits stripped).
-/// This enforces the clean bitmap invariant: filter bitmaps must be subsets of alive.
-fn cleanup_orphan_bitmaps(accum: &mut BitmapAccum, alive: &RoaringBitmap) -> usize {
-    let mut cleaned = 0;
-    for value_map in accum.filter_maps.values_mut() {
-        for bitmap in value_map.values_mut() {
-            let before = bitmap.len();
-            *bitmap &= alive;
-            if bitmap.len() < before {
-                cleaned += 1;
-            }
-        }
-    }
-    for bit_map in accum.sort_maps.values_mut() {
-        for bitmap in bit_map.values_mut() {
-            let before = bitmap.len();
-            *bitmap &= alive;
-            if bitmap.len() < before {
-                cleaned += 1;
-            }
-        }
-    }
-    cleaned
-}
-
-// ---------------------------------------------------------------------------
-// Tests
-// ---------------------------------------------------------------------------
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    fn make_scalars(slot: u32) -> ImageScalars {
-        ImageScalars {
-            url: Some(format!("https://example.com/{slot}.jpg").into_boxed_str()),
-            hash: Some(format!("hash{slot}").into_boxed_str()),
-            nsfw_level: 1,
-            user_id: slot as u64 * 7,
-            image_type: 0, // "image"
-            sort_at: 1700000000 + slot as u64,
-            poi: false,
-            minor: false,
-            has_meta: true,
-            on_site: false,
-            post_id: 100 + slot as u64,
-            posted_to_id: 200 + slot as u64,
-            availability: 0, // "Public"
-            blocked_for: 0,
-            published_at_ms: 1700000000000 + slot as u64 * 1000,
-        }
-    }
-
-    #[test]
-    fn test_scalars_to_json_basic() {
-        let scalars = make_scalars(42);
-        let json = scalars_to_json(42, &scalars, None, &[], &[], &[], &[]);
-
-        let obj = json.as_object().unwrap();
-        assert_eq!(obj["id"], 42);
-        assert_eq!(obj["nsfwLevel"], 1);
-        assert_eq!(obj["userId"], 42 * 7);
-        assert_eq!(obj["type"], "image");
-        assert_eq!(obj["url"], "https://example.com/42.jpg");
-        assert_eq!(obj["hash"], "hash42");
-        assert_eq!(obj["hasMeta"], true);
-        assert_eq!(obj["tagIds"].as_array().unwrap().len(), 0);
-        assert_eq!(obj["modelVersionIds"].as_array().unwrap().len(), 0);
-    }
-
-    #[test]
-    fn test_scalars_to_json_with_multi_value() {
-        let scalars = make_scalars(10);
-        let tags = vec![100u32, 200, 300];
-        let tools = vec![50u32];
-        let techniques = vec![5u32, 6];
-        let mvs = vec![999u32, 888];
-
-        let json = scalars_to_json(10, &scalars, None, &tags, &tools, &techniques, &mvs);
-        let obj = json.as_object().unwrap();
-
-        let tag_ids: Vec<i64> = obj["tagIds"].as_array().unwrap()
-            .iter().map(|v| v.as_i64().unwrap()).collect();
-        assert_eq!(tag_ids, vec![100, 200, 300]);
-
-        let tool_ids: Vec<i64> = obj["toolIds"].as_array().unwrap()
-            .iter().map(|v| v.as_i64().unwrap()).collect();
-        assert_eq!(tool_ids, vec![50]);
-
-        let mv_ids: Vec<i64> = obj["modelVersionIds"].as_array().unwrap()
-            .iter().map(|v| v.as_i64().unwrap()).collect();
-        assert_eq!(mv_ids, vec![999, 888]);
-    }
-
-    #[test]
-    fn test_scalars_to_json_with_enrichment() {
-        let scalars = make_scalars(5);
-        let enrichment = ResourceEnrichment {
-            base_model: 3, // SDXL 1.0
-            resource_poi: true,
-        };
-
-        let json = scalars_to_json(5, &scalars, Some(&enrichment), &[], &[], &[], &[]);
-        let obj = json.as_object().unwrap();
-
-        assert_eq!(obj["baseModel"], "SDXL 1.0");
-        assert_eq!(obj["poi"], true); // resource_poi OR'd with image poi
-    }
-
-    #[test]
-    fn test_scalars_to_json_poi_or() {
-        // Image poi=true, resource_poi=false → poi=true
-        let mut scalars = make_scalars(1);
-        scalars.poi = true;
-        let json = scalars_to_json(1, &scalars, None, &[], &[], &[], &[]);
-        assert_eq!(json["poi"], true);
-
-        // Image poi=false, resource_poi=true → poi=true
-        let scalars2 = make_scalars(2);
-        let enrichment = ResourceEnrichment { base_model: 0, resource_poi: true };
-        let json2 = scalars_to_json(2, &scalars2, Some(&enrichment), &[], &[], &[], &[]);
-        assert_eq!(json2["poi"], true);
-
-        // Image poi=false, resource_poi=false → no poi field
-        let scalars3 = make_scalars(3);
-        let json3 = scalars_to_json(3, &scalars3, None, &[], &[], &[], &[]);
-        assert!(json3.get("poi").is_none());
-    }
-
-    #[test]
-    fn test_scalars_to_json_blocked_for() {
-        let mut scalars = make_scalars(1);
-        scalars.blocked_for = 1; // some blocked_for value
-        let json = scalars_to_json(1, &scalars, None, &[], &[], &[], &[]);
-        assert_eq!(json["blockedFor"], "blocked");
-    }
-
-    #[test]
-    fn test_bitmap_reconstruction_single_chunk() {
-        // Simulate the bitmap reconstruction logic for a single chunk
-        let mut tag_bitmaps: HashMap<u64, RoaringBitmap> = HashMap::new();
-
-        // Tag 100 is on slots 5 and 10
-        let mut bm100 = RoaringBitmap::new();
-        bm100.insert(5);
-        bm100.insert(10);
-        tag_bitmaps.insert(100, bm100);
-
-        // Tag 200 is on slot 5 only
-        let mut bm200 = RoaringBitmap::new();
-        bm200.insert(5);
-        tag_bitmaps.insert(200, bm200);
-
-        // Tag 300 is on slot 10 only
-        let mut bm300 = RoaringBitmap::new();
-        bm300.insert(10);
-        tag_bitmaps.insert(300, bm300);
-
-        // Reconstruct for chunk 0..65536
-        let chunk_start: u32 = 0;
-        let chunk_end: u32 = FINALIZE_CHUNK_SIZE;
-        let mut chunk_tags: Vec<Vec<u32>> = vec![Vec::new(); FINALIZE_CHUNK_SIZE as usize];
-
-        for (&tag_id, bm) in &tag_bitmaps {
-            for slot in bm.range(chunk_start..chunk_end) {
-                chunk_tags[(slot - chunk_start) as usize].push(tag_id as u32);
-            }
-        }
-
-        // Slot 5 should have tags [100, 200] (order may vary)
-        let mut tags_5 = chunk_tags[5].clone();
-        tags_5.sort();
-        assert_eq!(tags_5, vec![100, 200]);
-
-        // Slot 10 should have tags [100, 300] (order may vary)
-        let mut tags_10 = chunk_tags[10].clone();
-        tags_10.sort();
-        assert_eq!(tags_10, vec![100, 300]);
-
-        // Slot 0 should have no tags
-        assert!(chunk_tags[0].is_empty());
-    }
-
-    #[test]
-    fn test_bitmap_reconstruction_cross_chunk() {
-        // Test that slots in different chunks are correctly handled
-        let mut tag_bitmaps: HashMap<u64, RoaringBitmap> = HashMap::new();
-
-        // Tag 100 spans two chunks
-        let mut bm = RoaringBitmap::new();
-        bm.insert(100);              // chunk 0
-        bm.insert(FINALIZE_CHUNK_SIZE + 50); // chunk 1
-        tag_bitmaps.insert(100, bm);
-
-        // Check chunk 0
-        let mut chunk0: Vec<Vec<u32>> = vec![Vec::new(); FINALIZE_CHUNK_SIZE as usize];
-        for (&tag_id, bm) in &tag_bitmaps {
-            for slot in bm.range(0..FINALIZE_CHUNK_SIZE) {
-                chunk0[(slot) as usize].push(tag_id as u32);
-            }
-        }
-        assert_eq!(chunk0[100], vec![100u32]);
-
-        // Check chunk 1
-        let chunk1_start = FINALIZE_CHUNK_SIZE;
-        let chunk1_end = FINALIZE_CHUNK_SIZE * 2;
-        let mut chunk1: Vec<Vec<u32>> = vec![Vec::new(); FINALIZE_CHUNK_SIZE as usize];
-        for (&tag_id, bm) in &tag_bitmaps {
-            for slot in bm.range(chunk1_start..chunk1_end) {
-                chunk1[(slot - chunk1_start) as usize].push(tag_id as u32);
-            }
-        }
-        assert_eq!(chunk1[50], vec![100u32]);
-    }
-
-    #[test]
-    fn test_resource_enrichment_default() {
-        let enrichment = ResourceEnrichment::default();
-        assert_eq!(enrichment.base_model, 0);
-        assert!(!enrichment.resource_poi);
-    }
-}
-
 // ---------------------------------------------------------------------------
 // ClickHouse metrics download
 // ---------------------------------------------------------------------------
diff --git a/src/sync/copy_queries.rs b/src/sync/copy_queries.rs
deleted file mode 100644
index 6c6d285..0000000
--- a/src/sync/copy_queries.rs
+++ /dev/null
@@ -1,364 +0,0 @@
-//! PostgreSQL COPY TO STDOUT queries and CSV chunk parser for bulk loading.
-//!
-//! Each table is streamed independently with no JOINs.
-//!
-//! This is significantly faster than JOIN-based loading because:
-//! - No per-row deserialization through sqlx's type system
-//! - No intermediate `Vec<Row>` allocation per batch
-//! - Streaming backpressure: we process as fast as we can consume
-//! - No JOINs: each table streams at sequential scan speed
-
-use bytes::Bytes;
-use futures_core::stream::BoxStream;
-use sqlx::postgres::PgPoolCopyExt;
-use sqlx::PgPool;
-
-// ---------------------------------------------------------------------------
-// COPY query functions — one per table, no JOINs
-// ---------------------------------------------------------------------------
-
-/// Stream Image table via COPY CSV (no JOINs).
-///
-/// Columns (13): id, url, nsfwLevel, hash, flags, type, userId, blockedFor,
-///               scannedAtSecs, createdAtSecs, postId, width, height
-pub async fn copy_images(
-    pool: &PgPool,
-) -> Result<BoxStream<'static, Result<Bytes, sqlx::Error>>, sqlx::Error> {
-    pool.copy_out_raw(
-        r#"COPY (SELECT id, url, "nsfwLevel", hash, flags, type::text,
-                      "userId", "blockedFor",
-                      extract(epoch from "scannedAt")::bigint,
-                      extract(epoch from "createdAt")::bigint,
-                      "postId",
-                      width, height
-               FROM "Image"
-        ) TO STDOUT WITH (FORMAT csv)"#,
-    )
-    .await
-}
-
-/// Stream Post table via COPY CSV for enrichment.
-///
-/// Columns (4): id, publishedAtSecs, availability, modelVersionId
-pub async fn copy_posts(
-    pool: &PgPool,
-) -> Result<BoxStream<'static, Result<Bytes, sqlx::Error>>, sqlx::Error> {
-    pool.copy_out_raw(
-        r#"COPY (SELECT id,
-                      extract(epoch from "publishedAt")::bigint,
-                      availability::text,
-                      "modelVersionId"
-               FROM "Post"
-        ) TO STDOUT WITH (FORMAT csv)"#,
-    )
-    .await
-}
-
-/// Stream tags via COPY CSV (unordered).
-///
-/// Columns (2): tagId, imageId
-pub async fn copy_tags(
-    pool: &PgPool,
-) -> Result<BoxStream<'static, Result<Bytes, sqlx::Error>>, sqlx::Error> {
-    pool.copy_out_raw(
-        r#"COPY (SELECT "tagId", "imageId" FROM "TagsOnImageDetails" WHERE disabled = false) TO STDOUT WITH (FORMAT csv)"#,
-    )
-    .await
-}
-
-/// Stream tools via COPY CSV (unordered).
-///
-/// Columns (2): toolId, imageId
-pub async fn copy_tools(
-    pool: &PgPool,
-) -> Result<BoxStream<'static, Result<Bytes, sqlx::Error>>, sqlx::Error> {
-    pool.copy_out_raw(
-        r#"COPY (SELECT "toolId", "imageId" FROM "ImageTool") TO STDOUT WITH (FORMAT csv)"#,
-    )
-    .await
-}
-
-/// Stream techniques via COPY CSV (unordered).
-///
-/// Columns (2): techniqueId, imageId
-pub async fn copy_techniques(
-    pool: &PgPool,
-) -> Result<BoxStream<'static, Result<Bytes, sqlx::Error>>, sqlx::Error> {
-    pool.copy_out_raw(
-        r#"COPY (SELECT "techniqueId", "imageId" FROM "ImageTechnique") TO STDOUT WITH (FORMAT csv)"#,
-    )
-    .await
-}
-
-/// Stream ImageResourceNew via COPY CSV (no JOINs).
-///
-/// Columns (3): imageId, modelVersionId, detected
-pub async fn copy_resources(
-    pool: &PgPool,
-) -> Result<BoxStream<'static, Result<Bytes, sqlx::Error>>, sqlx::Error> {
-    pool.copy_out_raw(
-        r#"COPY (SELECT "imageId", "modelVersionId", detected FROM "ImageResourceNew") TO STDOUT WITH (FORMAT csv)"#,
-    )
-    .await
-}
-
-/// Stream ModelVersion table via COPY CSV for enrichment.
-///
-/// Columns (3): id, baseModel, modelId
-pub async fn copy_model_versions(
-    pool: &PgPool,
-) -> Result<BoxStream<'static, Result<Bytes, sqlx::Error>>, sqlx::Error> {
-    pool.copy_out_raw(
-        r#"COPY (SELECT id, "baseModel", "modelId" FROM "ModelVersion") TO STDOUT WITH (FORMAT csv)"#,
-    )
-    .await
-}
-
-/// Stream CollectionItem via COPY CSV (accepted image collections only).
-///
-/// Columns (2): collectionId, imageId
-pub async fn copy_collection_items(
-    pool: &PgPool,
-) -> Result<BoxStream<'static, Result<Bytes, sqlx::Error>>, sqlx::Error> {
-    pool.copy_out_raw(
-        r#"COPY (SELECT "collectionId", "imageId" FROM "CollectionItem" WHERE "imageId" IS NOT NULL AND status = 'ACCEPTED') TO STDOUT WITH (FORMAT csv)"#,
-    )
-    .await
-}
-
-/// Stream Model table via COPY CSV for enrichment.
-///
-/// Columns (3): id, poi, type
-pub async fn copy_models(
-    pool: &PgPool,
-) -> Result<BoxStream<'static, Result<Bytes, sqlx::Error>>, sqlx::Error> {
-    pool.copy_out_raw(
-        r#"COPY (SELECT id, poi, type::text FROM "Model") TO STDOUT WITH (FORMAT csv)"#,
-    )
-    .await
-}
-
-// ---------------------------------------------------------------------------
-// CSV chunk parser
-// ---------------------------------------------------------------------------
-
-/// Incremental CSV parser that buffers across `Bytes` chunk boundaries.
-///
-/// PostgreSQL's `COPY ... TO STDOUT WITH (FORMAT csv)` sends data in arbitrary
-/// chunk sizes that may split CSV rows mid-line. This parser accumulates bytes
-/// and yields only complete lines.
-pub struct CopyParser {
-    buffer: Vec<u8>,
-}
-
-impl CopyParser {
-    pub fn new() -> Self {
-        Self {
-            buffer: Vec::with_capacity(64 * 1024),
-        }
-    }
-
-    /// Feed a chunk of bytes. Returns complete lines that can be parsed.
-    /// Retains any incomplete trailing line in the internal buffer.
-    pub fn feed(&mut self, chunk: &[u8]) -> Vec<Vec<u8>> {
-        self.buffer.extend_from_slice(chunk);
-
-        let mut lines = Vec::new();
-        let mut start = 0;
-        let mut in_quote = false;
-
-        let buf = &self.buffer;
-        let len = buf.len();
-        let mut i = 0;
-
-        while i < len {
-            let b = buf[i];
-            if b == b'"' {
-                in_quote = !in_quote;
-            } else if b == b'\n' && !in_quote {
-                // Complete line found (excluding the newline).
-                lines.push(buf[start..i].to_vec());
-                start = i + 1;
-            }
-            i += 1;
-        }
-
-        // Keep the incomplete trailing data for the next feed.
-        if start == len {
-            self.buffer.clear();
-        } else if start > 0 {
-            // Shift remaining bytes to the front.
-            let remaining = self.buffer[start..].to_vec();
-            self.buffer = remaining;
-        }
-        // If start == 0, the entire buffer is an incomplete line — keep as-is.
-
-        lines
-    }
-}
-
-// ---------------------------------------------------------------------------
-// CSV field splitting
-// ---------------------------------------------------------------------------
-
-/// Split a CSV line into fields, handling quoted fields.
-///
-/// Rules (PostgreSQL CSV format):
-/// - Fields separated by `,`
-/// - Quoted fields start and end with `"`
-/// - A literal `"` inside a quoted field is represented as `""`
-/// - NULL is an empty unquoted field
-fn split_csv_fields(line: &[u8]) -> Vec<Vec<u8>> {
-    let mut fields = Vec::new();
-    let mut i = 0;
-    let len = line.len();
-
-    while i <= len {
-        if i == len {
-            fields.push(Vec::new());
-            break;
-        }
-
-        if line[i] == b'"' {
-            // Quoted field.
-            let mut field = Vec::new();
-            i += 1; // skip opening quote
-            while i < len {
-                if line[i] == b'"' {
-                    if i + 1 < len && line[i + 1] == b'"' {
-                        field.push(b'"');
-                        i += 2;
-                    } else {
-                        i += 1;
-                        break;
-                    }
-                } else {
-                    field.push(line[i]);
-                    i += 1;
-                }
-            }
-            fields.push(field);
-            if i < len && line[i] == b',' {
-                i += 1;
-            }
-        } else {
-            // Unquoted field — scan until comma or end.
-            let start = i;
-            while i < len && line[i] != b',' {
-                i += 1;
-            }
-            fields.push(line[start..i].to_vec());
-            if i < len {
-                i += 1; // skip comma
-            } else {
-                break;
-            }
-        }
-    }
-
-    fields
-}
-
-// ---------------------------------------------------------------------------
-// Tests
-// ---------------------------------------------------------------------------
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_parser_basic_lines() {
-        let mut parser = CopyParser::new();
-        let lines = parser.feed(b"100,hello,42\n200,world,99\n");
-        assert_eq!(lines.len(), 2);
-        assert_eq!(lines[0], b"100,hello,42");
-        assert_eq!(lines[1], b"200,world,99");
-    }
-
-    #[test]
-    fn test_parser_chunk_boundary() {
-        let mut parser = CopyParser::new();
-        let lines1 = parser.feed(b"100,hello\n200,wor");
-        assert_eq!(lines1.len(), 1);
-        assert_eq!(lines1[0], b"100,hello");
-        let lines2 = parser.feed(b"ld\n");
-        assert_eq!(lines2.len(), 1);
-        assert_eq!(lines2[0], b"200,world");
-    }
-
-    #[test]
-    fn test_parser_no_trailing_newline() {
-        let mut parser = CopyParser::new();
-        let lines = parser.feed(b"100,hello\n200,world");
-        assert_eq!(lines.len(), 1);
-        assert_eq!(lines[0], b"100,hello");
-        let lines2 = parser.feed(b"\n");
-        assert_eq!(lines2.len(), 1);
-        assert_eq!(lines2[0], b"200,world");
-    }
-
-    #[test]
-    fn test_parser_empty_fields_null() {
-        let mut parser = CopyParser::new();
-        let lines = parser.feed(b"100,,42,,\n");
-        assert_eq!(lines.len(), 1);
-        let fields = split_csv_fields(&lines[0]);
-        assert_eq!(fields.len(), 5);
-        assert_eq!(fields[0], b"100");
-        assert!(fields[1].is_empty());
-        assert_eq!(fields[2], b"42");
-        assert!(fields[3].is_empty());
-        assert!(fields[4].is_empty());
-    }
-
-    #[test]
-    fn test_parser_quoted_field_with_comma() {
-        let mut parser = CopyParser::new();
-        let lines = parser.feed(b"100,\"hello,world\",42\n");
-        assert_eq!(lines.len(), 1);
-        let fields = split_csv_fields(&lines[0]);
-        assert_eq!(fields.len(), 3);
-        assert_eq!(fields[1], b"hello,world");
-    }
-
-    #[test]
-    fn test_parser_quoted_field_with_escaped_quote() {
-        let mut parser = CopyParser::new();
-        let lines = parser.feed(b"100,\"say \"\"hi\"\"\",42\n");
-        assert_eq!(lines.len(), 1);
-        let fields = split_csv_fields(&lines[0]);
-        assert_eq!(fields[1], b"say \"hi\"");
-    }
-
-    #[test]
-    fn test_parser_quoted_field_with_newline() {
-        let mut parser = CopyParser::new();
-        let lines = parser.feed(b"100,\"line1\nline2\",42\n");
-        assert_eq!(lines.len(), 1);
-        let fields = split_csv_fields(&lines[0]);
-        assert_eq!(fields[1], b"line1\nline2");
-    }
-
-    #[test]
-    fn test_split_csv_simple() {
-        let fields = split_csv_fields(b"a,b,c");
-        assert_eq!(fields.len(), 3);
-    }
-
-    #[test]
-    fn test_split_csv_trailing_comma() {
-        let fields = split_csv_fields(b"a,b,");
-        assert_eq!(fields.len(), 3);
-        assert_eq!(fields[2], b"");
-    }
-
-    #[test]
-    fn test_multiple_chunks_interleaved() {
-        let mut parser = CopyParser::new();
-        let lines1 = parser.feed(b"1,a\n2,");
-        assert_eq!(lines1.len(), 1);
-        let lines2 = parser.feed(b"b\n3,c\n");
-        assert_eq!(lines2.len(), 2);
-    }
-}
diff --git a/src/sync/dump_enrichment.rs b/src/sync/dump_enrichment.rs
index 5203759..fe8684d 100644
--- a/src/sync/dump_enrichment.rs
+++ b/src/sync/dump_enrichment.rs
@@ -25,10 +25,9 @@
 
 use ahash::AHashMap as HashMap;
 use std::io::{self, BufRead, BufReader};
-use std::path::{Path, PathBuf};
+use std::path::PathBuf;
 use std::sync::Arc;
 
-use crate::dictionary::FieldDictionary;
 use super::dump_expression::{
     ColumnIndex, ComputedFieldDef, CsvRow, ExprValue, FilterExpression,
 };
@@ -68,28 +67,8 @@ pub struct LookupRow {
 }
 
 impl LookupRow {
-    /// Get a column value by name.
-    pub fn get(&self, column: &str) -> Option<&str> {
-        let idx = self.col_index.get(column)?;
-        self.values.get(*idx)?.as_deref()
-    }
-
-    /// Convert to CsvRow for expression evaluation.
-    pub fn to_csv_row(&self) -> CsvRow<'_> {
-        let mut row = CsvRow::new();
-        for (name, &idx) in self.col_index.as_ref() {
-            let val = self.values.get(idx).and_then(|v| v.as_deref());
-            row.insert(name.as_str(), val);
-        }
-        row
-    }
-
-    /// Iterate over (column_name, value) pairs (non-null only).
-    pub fn iter_columns(&self) -> impl Iterator<Item = (&str, &str)> {
-        self.col_index.iter().filter_map(move |(name, &idx)| {
-            self.values.get(idx)?.as_deref().map(|v| (name.as_str(), v))
-        })
-    }
+    // No public methods needed — LookupRow is internal to EnrichmentTable.
+    // Accessed via indexed path (enrich_indexed_into_with_buf) only.
 }
 
 /// Mmap-backed dense offset index for enrichment lookups.
@@ -405,61 +384,6 @@ impl EnrichmentTable {
         })
     }
 
-    /// Look up a row by key value (HashMap path only).
-    /// Look up a row by key (HashMap path only — panics for Mmap-backed tables).
-    /// For Mmap tables, use enrich_indexed_into or enrich_key_into instead.
-    pub fn get(&self, key: i64) -> Option<&LookupRow> {
-        match &self.storage {
-            EnrichmentStorage::HashMap(data) => data.get(&key),
-            EnrichmentStorage::Mmap(_) => panic!("get() not supported for Mmap-backed tables — use enrich_indexed_into() or enrich_key_into()"),
-        }
-    }
-
-    /// Get the nested child table (if any).
-    pub fn child(&self) -> Option<&EnrichmentTable> {
-        self.child.as_deref()
-    }
-
-    /// Enrich a parent row using this lookup table and its config.
-    ///
-    /// This is the full enrichment resolution that handles the filter-on-nested pattern:
-    /// Resources → MV (by modelVersionId) → Model (by modelId) → if type='Checkpoint', set baseModel
-    pub fn enrich<'a>(
-        &self,
-        parent_row: &CsvRow<'a>,
-        config: &EnrichmentConfig,
-    ) -> EnrichedFields {
-        let mut result = EnrichedFields::default();
-
-        // Get join key from parent row
-        let join_value = match parent_row.get(config.join_on.as_str()) {
-            Some(Some(v)) if !v.is_empty() => *v,
-            _ => return result,
-        };
-
-        let join_key: i64 = match join_value.parse() {
-            Ok(k) => k,
-            Err(_) => return result,
-        };
-
-        self.enrich_key_into(join_key, config, &mut result);
-        result
-    }
-
-    /// Enrich using indexed parent row (zero-allocation hot path for 107M+ rows).
-    ///
-    /// The parent row is `&[Option<&str>]` + `ColumnIndex` — no HashMap per row.
-    pub fn enrich_indexed(
-        &self,
-        parent_fields: &[Option<&str>],
-        parent_col_idx: &ColumnIndex,
-        config: &EnrichmentConfig,
-    ) -> EnrichedFields {
-        let mut result = EnrichedFields::default();
-        self.enrich_indexed_into(parent_fields, parent_col_idx, config, &mut result);
-        result
-    }
-
     /// Enrich into a pre-allocated buffer (avoids Vec reallocation across rows).
     pub fn enrich_indexed_into(
         &self,
@@ -688,26 +612,6 @@ impl EnrichmentManager {
         Ok(())
     }
 
-    /// Enrich a row using all loaded tables.
-    /// Returns combined enriched fields from all enrichment sources.
-    pub fn enrich_row<'a>(&self, row: &CsvRow<'a>) -> EnrichedFields {
-        let mut combined = EnrichedFields::default();
-        for (table, config) in self.tables.values() {
-            let enriched = table.enrich(row, config);
-            combined.fields.extend(enriched.fields);
-            combined.computed.extend(enriched.computed);
-        }
-        combined
-    }
-
-    /// Enrich a row using indexed fields (zero-allocation hot path).
-    pub fn enrich_row_indexed(&self, fields: &[Option<&str>], col_idx: &super::dump_expression::ColumnIndex) -> EnrichedFields {
-        let mut combined = EnrichedFields::default();
-        let mut lookup_buf = Vec::new();
-        self.enrich_row_indexed_into(fields, col_idx, &mut combined, &mut lookup_buf);
-        combined
-    }
-
     /// Enrich a row into a pre-allocated buffer (reuse across rows).
     /// Avoids Vec reallocation — clear + refill. String allocs still per-row.
     /// `lookup_buf` is a reusable buffer for mmap-backed table lookups (avoids Vec alloc per row).
@@ -719,119 +623,12 @@ impl EnrichmentManager {
         }
     }
 
-    /// Drop all tables to free memory. Call after the phase completes.
-    pub fn clear(&mut self) {
-        self.tables.clear();
-    }
-
-    /// Drop a specific table by join_on key.
-    pub fn drop_table(&mut self, join_on: &str) {
-        self.tables.remove(join_on);
-    }
-
-    /// Total estimated memory across all loaded tables.
-    pub fn total_memory(&self) -> usize {
-        self.tables.values().map(|(t, _)| t.estimated_memory()).sum()
-    }
-
     /// Number of loaded tables.
     pub fn table_count(&self) -> usize {
         self.tables.len()
     }
 }
 
-// ---- Dictionary helpers ----
-
-/// Resolve a string value through a FieldDictionary, returning the integer key.
-///
-/// This is the clean API for 1.10/1.15#7: pass individual `&FieldDictionary` refs,
-/// not a full `HashMap<String, FieldDictionary>`.
-pub fn resolve_dictionary_value(dict: &FieldDictionary, value: &str) -> i64 {
-    dict.get_or_insert(value)
-}
-
-/// Resolve an ExprValue through a dictionary if it's a string.
-/// Returns the bitmap key (i64) for the value.
-pub fn resolve_expr_to_bitmap_key(
-    value: &ExprValue,
-    dict: Option<&FieldDictionary>,
-) -> Option<u64> {
-    match value {
-        ExprValue::Int(n) => Some(*n as u64),
-        ExprValue::Bool(b) => Some(if *b { 1 } else { 0 }),
-        ExprValue::Str(s) => {
-            if let Some(d) = dict {
-                Some(d.get_or_insert(s) as u64)
-            } else {
-                // Try parsing as integer
-                s.parse::<u64>().ok()
-            }
-        }
-        ExprValue::Null => None,
-    }
-}
-
-/// Collection of field dictionaries for LCS fields, keyed by field name.
-///
-/// Thread-safe: FieldDictionary uses DashMap internally.
-/// Share via `Arc<DictionarySet>` across threads.
-pub struct DictionarySet {
-    dicts: HashMap<String, Arc<FieldDictionary>>,
-}
-
-impl DictionarySet {
-    /// Create a new set with dictionaries for the given field names.
-    pub fn new(field_names: &[&str]) -> Self {
-        let mut dicts = HashMap::new();
-        for name in field_names {
-            dicts.insert(name.to_string(), Arc::new(FieldDictionary::new()));
-        }
-        Self { dicts }
-    }
-
-    /// Create from existing dictionaries (e.g., loaded from disk).
-    pub fn from_existing(dicts: HashMap<String, Arc<FieldDictionary>>) -> Self {
-        Self { dicts }
-    }
-
-    /// Get a dictionary by field name.
-    pub fn get(&self, field: &str) -> Option<&Arc<FieldDictionary>> {
-        self.dicts.get(field)
-    }
-
-    /// Resolve a string value for a field, returning the bitmap key.
-    /// Returns None if the field has no dictionary (not an LCS field).
-    pub fn resolve(&self, field: &str, value: &str) -> Option<i64> {
-        self.dicts.get(field).map(|d| d.get_or_insert(value))
-    }
-
-    /// Persist all dirty dictionaries to disk.
-    pub fn persist_all(&self, dict_dir: &Path) -> io::Result<()> {
-        std::fs::create_dir_all(dict_dir)?;
-        for (name, dict) in &self.dicts {
-            let snapshot = dict.snapshot();
-            let path = dict_dir.join(format!("{}.dict", name));
-            let json = serde_json::to_string_pretty(&snapshot)
-                .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
-            // Atomic write
-            let tmp = path.with_extension("dict.tmp");
-            std::fs::write(&tmp, &json)?;
-            std::fs::rename(&tmp, &path)?;
-        }
-        Ok(())
-    }
-
-    /// Get all dictionary names.
-    pub fn names(&self) -> Vec<&str> {
-        self.dicts.keys().map(|s| s.as_str()).collect()
-    }
-
-    /// Iterate over all dictionaries.
-    pub fn iter(&self) -> impl Iterator<Item = (&str, &Arc<FieldDictionary>)> {
-        self.dicts.iter().map(|(k, v)| (k.as_str(), v))
-    }
-}
-
 // ---- CSV parsing helpers ----
 
 /// Fast extract of a specific column as i64 from a comma-delimited byte line.
@@ -937,15 +734,6 @@ pub fn parse_tsv_fields(line: &str) -> Vec<&str> {
 #[cfg(test)]
 mod tests {
     use super::*;
-    use std::io::Write;
-    use tempfile::TempDir;
-
-    fn write_csv(dir: &Path, name: &str, content: &str) -> PathBuf {
-        let path = dir.join(name);
-        let mut f = std::fs::File::create(&path).unwrap();
-        f.write_all(content.as_bytes()).unwrap();
-        path
-    }
 
     // ---- CSV parser tests ----
 
@@ -973,331 +761,4 @@ mod tests {
         assert_eq!(fields, vec!["1", "hello", "42"]);
     }
 
-    // ---- EnrichmentTable tests ----
-
-    #[test]
-    fn test_load_simple_table() {
-        let dir = TempDir::new().unwrap();
-        let csv = write_csv(
-            dir.path(),
-            "posts.csv",
-            "id,publishedAtSecs,availability\n100,1700000000,Public\n200,,Private\n300,1700001000,Public\n",
-        );
-
-        let config = EnrichmentConfig {
-            csv_path: csv,
-            key: "id".into(),
-            join_on: "postId".into(),
-            fields: vec![
-                ("publishedAtSecs".into(), "publishedAt".into()),
-                ("availability".into(), "availability".into()),
-            ],
-            computed_fields: vec![],
-            filter: None,
-            child: None,
-            columns: vec![],
-        };
-
-        let table = EnrichmentTable::load(&config).unwrap();
-        assert_eq!(table.row_count, 3);
-
-        // Check row 100
-        let row = table.get(100).unwrap();
-        assert_eq!(row.get("publishedAtSecs").unwrap(), "1700000000");
-        assert_eq!(row.get("availability").unwrap(), "Public");
-
-        // Check row 200 (null publishedAtSecs)
-        let row200 = table.get(200).unwrap();
-        assert!(row200.get("publishedAtSecs").is_none()); // empty → absent
-        assert_eq!(row200.get("availability").unwrap(), "Private");
-    }
-
-    #[test]
-    fn test_single_level_enrichment() {
-        let dir = TempDir::new().unwrap();
-        let csv = write_csv(
-            dir.path(),
-            "posts.csv",
-            "id,publishedAtSecs,availability\n100,1700000000,Public\n",
-        );
-
-        let config = EnrichmentConfig {
-            csv_path: csv,
-            key: "id".into(),
-            join_on: "postId".into(),
-            fields: vec![
-                ("publishedAtSecs".into(), "publishedAt".into()),
-                ("availability".into(), "availability".into()),
-            ],
-            computed_fields: vec![
-                ComputedFieldDef::parse("isPublished", "publishedAtSecs != null", None).unwrap(),
-                ComputedFieldDef::parse("postedToId", "lookup_key", None).unwrap(),
-            ],
-            filter: None,
-            child: None,
-            columns: vec![],
-        };
-
-        let table = EnrichmentTable::load(&config).unwrap();
-
-        // Simulate a parent row with postId=100
-        let parent: CsvRow = vec![("postId", Some("100"))].into_iter().collect();
-
-        let enriched = table.enrich(&parent, &config);
-
-        // Direct fields
-        assert_eq!(enriched.fields.len(), 2);
-        assert!(enriched.fields.contains(&("publishedAt".into(), "1700000000".into())));
-        assert!(enriched.fields.contains(&("availability".into(), "Public".into())));
-
-        // Computed fields
-        assert_eq!(enriched.computed.len(), 2);
-        assert!(enriched
-            .computed
-            .contains(&("isPublished".into(), ExprValue::Bool(true))));
-        assert!(enriched
-            .computed
-            .contains(&("postedToId".into(), ExprValue::Int(100))));
-    }
-
-    #[test]
-    fn test_enrichment_no_match() {
-        let dir = TempDir::new().unwrap();
-        let csv = write_csv(dir.path(), "posts.csv", "id,name\n100,hello\n");
-
-        let config = EnrichmentConfig {
-            csv_path: csv,
-            key: "id".into(),
-            join_on: "postId".into(),
-            fields: vec![("name".into(), "name".into())],
-            computed_fields: vec![],
-            filter: None,
-            child: None,
-            columns: vec![],
-        };
-
-        let table = EnrichmentTable::load(&config).unwrap();
-        let parent: CsvRow = vec![("postId", Some("999"))].into_iter().collect();
-        let enriched = table.enrich(&parent, &config);
-        assert!(enriched.is_empty());
-    }
-
-    #[test]
-    fn test_enrichment_null_join_key() {
-        let dir = TempDir::new().unwrap();
-        let csv = write_csv(dir.path(), "posts.csv", "id,name\n100,hello\n");
-
-        let config = EnrichmentConfig {
-            csv_path: csv,
-            key: "id".into(),
-            join_on: "postId".into(),
-            fields: vec![("name".into(), "name".into())],
-            computed_fields: vec![],
-            filter: None,
-            child: None,
-            columns: vec![],
-        };
-
-        let table = EnrichmentTable::load(&config).unwrap();
-
-        // Missing join key
-        let parent: CsvRow = HashMap::new();
-        let enriched = table.enrich(&parent, &config);
-        assert!(enriched.is_empty());
-
-        // Null join key
-        let parent2: CsvRow = vec![("postId", None)].into_iter().collect();
-        let enriched2 = table.enrich(&parent2, &config);
-        assert!(enriched2.is_empty());
-    }
-
-    #[test]
-    fn test_nested_enrichment_with_filter() {
-        let dir = TempDir::new().unwrap();
-
-        // Model versions CSV
-        let mv_csv = write_csv(
-            dir.path(),
-            "model_versions.csv",
-            "id,baseModel,modelId\n10,SDXL,1000\n20,SD 1.5,2000\n",
-        );
-
-        // Models CSV
-        let models_csv = write_csv(
-            dir.path(),
-            "models.csv",
-            "id,poi,type\n1000,false,Checkpoint\n2000,true,LORA\n",
-        );
-
-        // Config: Resources → MV (by modelVersionId) → Model (by modelId, filter: Checkpoint)
-        let config = EnrichmentConfig {
-            csv_path: mv_csv,
-            key: "id".into(),
-            join_on: "modelVersionId".into(),
-            fields: vec![("baseModel".into(), "baseModel".into())],
-            computed_fields: vec![],
-            filter: None,
-            columns: vec![],
-            child: Some(Box::new(EnrichmentConfig {
-                csv_path: models_csv,
-                key: "id".into(),
-                join_on: "modelId".into(),
-                fields: vec![("poi".into(), "poi".into())],
-                computed_fields: vec![],
-                filter: Some(FilterExpression::parse("type = 'Checkpoint'").unwrap()),
-                child: None,
-            columns: vec![],
-            })),
-        };
-
-        let table = EnrichmentTable::load(&config).unwrap();
-        assert_eq!(table.row_count, 2);
-        assert!(table.child().is_some());
-        assert_eq!(table.child().unwrap().row_count, 2);
-
-        // Resource row with MV id=10 (Checkpoint model → filter passes)
-        let row1: CsvRow = vec![("modelVersionId", Some("10"))].into_iter().collect();
-        let enriched1 = table.enrich(&row1, &config);
-        // baseModel from MV level
-        assert!(enriched1.fields.contains(&("baseModel".into(), "SDXL".into())));
-        // poi from Model level (Checkpoint, filter passed)
-        assert!(enriched1.fields.contains(&("poi".into(), "false".into())));
-
-        // Resource row with MV id=20 (LORA model → filter fails)
-        let row2: CsvRow = vec![("modelVersionId", Some("20"))].into_iter().collect();
-        let enriched2 = table.enrich(&row2, &config);
-        // baseModel from MV level (no filter on MV)
-        assert!(enriched2.fields.contains(&("baseModel".into(), "SD 1.5".into())));
-        // poi NOT present — Model filter (type=Checkpoint) failed for LORA
-        assert!(!enriched2.fields.iter().any(|(k, _)| k == "poi"));
-    }
-
-    // ---- EnrichmentManager tests ----
-
-    #[test]
-    fn test_manager_load_and_clear() {
-        let dir = TempDir::new().unwrap();
-        let csv = write_csv(dir.path(), "posts.csv", "id,name\n100,hello\n");
-
-        let mut mgr = EnrichmentManager::new();
-        assert_eq!(mgr.table_count(), 0);
-
-        mgr.load(EnrichmentConfig {
-            csv_path: csv,
-            key: "id".into(),
-            join_on: "postId".into(),
-            fields: vec![("name".into(), "name".into())],
-            computed_fields: vec![],
-            filter: None,
-            child: None,
-            columns: vec![],
-        })
-        .unwrap();
-
-        assert_eq!(mgr.table_count(), 1);
-        assert!(mgr.total_memory() > 0);
-
-        mgr.clear();
-        assert_eq!(mgr.table_count(), 0);
-    }
-
-    #[test]
-    fn test_manager_enrich_row() {
-        let dir = TempDir::new().unwrap();
-        let csv = write_csv(
-            dir.path(),
-            "posts.csv",
-            "id,availability\n100,Public\n200,Private\n",
-        );
-
-        let mut mgr = EnrichmentManager::new();
-        mgr.load(EnrichmentConfig {
-            csv_path: csv,
-            key: "id".into(),
-            join_on: "postId".into(),
-            fields: vec![("availability".into(), "availability".into())],
-            computed_fields: vec![],
-            filter: None,
-            child: None,
-            columns: vec![],
-        })
-        .unwrap();
-
-        let row: CsvRow = vec![("postId", Some("100"))].into_iter().collect();
-        let enriched = mgr.enrich_row(&row);
-        assert_eq!(enriched.fields.len(), 1);
-        assert!(enriched.fields.contains(&("availability".into(), "Public".into())));
-    }
-
-    // ---- Dictionary tests ----
-
-    #[test]
-    fn test_resolve_dictionary_value() {
-        let dict = FieldDictionary::new();
-        let key1 = resolve_dictionary_value(&dict, "Checkpoint");
-        let key2 = resolve_dictionary_value(&dict, "LORA");
-        let key3 = resolve_dictionary_value(&dict, "Checkpoint"); // same as key1
-        assert_ne!(key1, key2);
-        assert_eq!(key1, key3);
-    }
-
-    #[test]
-    fn test_resolve_expr_to_bitmap_key() {
-        let dict = FieldDictionary::new();
-
-        // Integer → direct
-        assert_eq!(
-            resolve_expr_to_bitmap_key(&ExprValue::Int(42), None),
-            Some(42)
-        );
-
-        // Bool → 0/1
-        assert_eq!(
-            resolve_expr_to_bitmap_key(&ExprValue::Bool(true), None),
-            Some(1)
-        );
-
-        // String with dict → dictionary key
-        let key = resolve_expr_to_bitmap_key(&ExprValue::Str("Public".into()), Some(&dict));
-        assert!(key.is_some());
-
-        // String without dict → try parse
-        assert_eq!(
-            resolve_expr_to_bitmap_key(&ExprValue::Str("42".into()), None),
-            Some(42)
-        );
-
-        // Null → None
-        assert_eq!(resolve_expr_to_bitmap_key(&ExprValue::Null, None), None);
-    }
-
-    #[test]
-    fn test_dictionary_set() {
-        let set = DictionarySet::new(&["type", "availability", "baseModel"]);
-        assert_eq!(set.names().len(), 3);
-
-        let key1 = set.resolve("type", "Checkpoint").unwrap();
-        let key2 = set.resolve("type", "LORA").unwrap();
-        assert_ne!(key1, key2);
-
-        // Unknown field → None
-        assert!(set.resolve("unknown", "value").is_none());
-    }
-
-    #[test]
-    fn test_dictionary_set_persist() {
-        let dir = TempDir::new().unwrap();
-        let dict_dir = dir.path().join("dictionaries");
-
-        let set = DictionarySet::new(&["type", "availability"]);
-        set.resolve("type", "Checkpoint");
-        set.resolve("type", "LORA");
-        set.resolve("availability", "Public");
-
-        set.persist_all(&dict_dir).unwrap();
-
-        // Check files exist
-        assert!(dict_dir.join("type.dict").exists());
-        assert!(dict_dir.join("availability.dict").exists());
-    }
 }
diff --git a/src/sync/dump_expression.rs b/src/sync/dump_expression.rs
index 2e5efc8..3d57589 100644
--- a/src/sync/dump_expression.rs
+++ b/src/sync/dump_expression.rs
@@ -49,14 +49,6 @@ impl ExprValue {
         }
     }
 
-    /// Coerce to string.
-    pub fn as_str_value(&self) -> Option<&str> {
-        match self {
-            ExprValue::Str(s) => Some(s.as_str()),
-            _ => None,
-        }
-    }
-
     pub fn is_null(&self) -> bool {
         matches!(self, ExprValue::Null)
     }
@@ -99,23 +91,10 @@ pub enum Expr {
     Max(Vec<String>),
 }
 
-/// Context for expression evaluation.
-pub struct EvalContext<'a> {
-    /// The current CSV row being processed.
-    pub row: &'a CsvRow<'a>,
-    /// The enrichment join key value (for `lookup_key` expressions).
-    pub lookup_key: Option<i64>,
-}
-
 /// Column name → index mapping for zero-allocation row access.
 /// Build once from CSV headers, reuse for every row in the phase.
 pub type ColumnIndex = HashMap<String, usize>;
 
-/// Build a ColumnIndex from CSV header names.
-pub fn build_column_index(headers: &[&str]) -> ColumnIndex {
-    headers.iter().enumerate().map(|(i, &name)| (name.to_string(), i)).collect()
-}
-
 /// Zero-allocation evaluation context using column indices.
 /// The row is a slice of parsed fields — no HashMap per row.
 pub struct IndexedEvalContext<'a> {
@@ -138,122 +117,6 @@ impl<'a> IndexedEvalContext<'a> {
 }
 
 impl Expr {
-    /// Evaluate the expression against a row context.
-    pub fn eval(&self, ctx: &EvalContext) -> ExprValue {
-        match self {
-            Expr::Column(name) => {
-                match ctx.row.get(name.as_str()) {
-                    Some(Some(val)) if !val.is_empty() => {
-                        // Try to parse as integer first, then keep as string
-                        if let Ok(n) = val.parse::<i64>() {
-                            ExprValue::Int(n)
-                        } else if *val == "true" || *val == "t" {
-                            ExprValue::Bool(true)
-                        } else if *val == "false" || *val == "f" {
-                            ExprValue::Bool(false)
-                        } else {
-                            ExprValue::Str(val.to_string())
-                        }
-                    }
-                    _ => ExprValue::Null,
-                }
-            }
-            Expr::IntLit(n) => ExprValue::Int(*n),
-            Expr::StrLit(s) => ExprValue::Str(s.clone()),
-            Expr::BoolLit(b) => ExprValue::Bool(*b),
-            Expr::NullLit => ExprValue::Null,
-            Expr::LookupKey => match ctx.lookup_key {
-                Some(k) => ExprValue::Int(k),
-                None => ExprValue::Null,
-            },
-
-            Expr::BitfieldExtract { expr, shift, mask } => {
-                let val = expr.eval(ctx);
-                match val.as_i64() {
-                    Some(n) => ExprValue::Int((n >> shift) & (*mask as i64)),
-                    None => ExprValue::Null,
-                }
-            }
-
-            Expr::Eq(left, right) => {
-                let l = left.eval(ctx);
-                let r = right.eval(ctx);
-                // null != null (SQL semantics for filter context)
-                if l.is_null() && r.is_null() {
-                    // Special case: `col != null` is handled by NotEq
-                    // For `col = null`, we check if left is null
-                    return ExprValue::Bool(true);
-                }
-                if l.is_null() || r.is_null() {
-                    return ExprValue::Bool(false);
-                }
-                let result = match (&l, &r) {
-                    (ExprValue::Int(a), ExprValue::Int(b)) => a == b,
-                    (ExprValue::Str(a), ExprValue::Str(b)) => a == b,
-                    (ExprValue::Bool(a), ExprValue::Bool(b)) => a == b,
-                    // Cross-type: try i64 comparison
-                    _ => l.as_i64() == r.as_i64(),
-                };
-                ExprValue::Bool(result)
-            }
-
-            Expr::NotEq(left, right) => {
-                let l = left.eval(ctx);
-                let r = right.eval(ctx);
-                // `col != null` means "col is not null"
-                if r.is_null() {
-                    return ExprValue::Bool(!l.is_null());
-                }
-                if l.is_null() {
-                    return ExprValue::Bool(true);
-                }
-                let result = match (&l, &r) {
-                    (ExprValue::Int(a), ExprValue::Int(b)) => a != b,
-                    (ExprValue::Str(a), ExprValue::Str(b)) => a != b,
-                    (ExprValue::Bool(a), ExprValue::Bool(b)) => a != b,
-                    _ => l.as_i64() != r.as_i64(),
-                };
-                ExprValue::Bool(result)
-            }
-
-            Expr::And(left, right) => {
-                let l = left.eval(ctx);
-                if !l.as_bool() {
-                    return ExprValue::Bool(false);
-                }
-                let r = right.eval(ctx);
-                ExprValue::Bool(r.as_bool())
-            }
-
-            Expr::Or(left, right) => {
-                let l = left.eval(ctx);
-                if l.as_bool() {
-                    return ExprValue::Bool(true);
-                }
-                let r = right.eval(ctx);
-                ExprValue::Bool(r.as_bool())
-            }
-
-            Expr::Max(columns) => {
-                let mut max_val: Option<i64> = None;
-                for col in columns {
-                    if let Some(Some(val)) = ctx.row.get(col.as_str()) {
-                        if let Ok(n) = val.parse::<i64>() {
-                            max_val = Some(match max_val {
-                                Some(cur) => cur.max(n),
-                                None => n,
-                            });
-                        }
-                    }
-                }
-                match max_val {
-                    Some(n) => ExprValue::Int(n),
-                    None => ExprValue::Null,
-                }
-            }
-        }
-    }
-
     /// Evaluate against an indexed row context (zero-allocation per row).
     /// This is the hot-path method for 107M+ row processing.
     pub fn eval_indexed(&self, ctx: &IndexedEvalContext) -> ExprValue {
@@ -643,12 +506,6 @@ impl FilterExpression {
         Ok(Self { expr, source: source.to_string() })
     }
 
-    /// Evaluate the filter against a row. Returns true if the row passes.
-    pub fn eval(&self, row: &CsvRow, lookup_key: Option<i64>) -> bool {
-        let ctx = EvalContext { row, lookup_key };
-        self.expr.eval(&ctx).as_bool()
-    }
-
     /// Evaluate against an indexed row (zero-allocation hot path).
     #[inline]
     pub fn eval_indexed(&self, fields: &[Option<&str>], col_idx: &ColumnIndex, lookup_key: Option<i64>) -> bool {
@@ -689,36 +546,6 @@ impl ComputedFieldDef {
     /// Returns `Some(value)` if the field should be set, `None` if it should be skipped.
     /// For conditional fields (value_column set), returns the value from that column
     /// only when the expression evaluates to true.
-    pub fn eval(&self, row: &CsvRow, lookup_key: Option<i64>) -> Option<ExprValue> {
-        let ctx = EvalContext { row, lookup_key };
-
-        if let Some(ref value_col) = self.value_column {
-            // Conditional: expression is a filter, value comes from column
-            if self.expr.eval(&ctx).as_bool() {
-                match row.get(value_col.as_str()) {
-                    Some(Some(val)) if !val.is_empty() => {
-                        if let Ok(n) = val.parse::<i64>() {
-                            Some(ExprValue::Int(n))
-                        } else {
-                            Some(ExprValue::Str(val.to_string()))
-                        }
-                    }
-                    _ => None,
-                }
-            } else {
-                None
-            }
-        } else {
-            // Standard: expression IS the value
-            let val = self.expr.eval(&ctx);
-            if val.is_null() {
-                None
-            } else {
-                Some(val)
-            }
-        }
-    }
-
     /// Evaluate against an indexed row (zero-allocation hot path).
     pub fn eval_indexed(&self, fields: &[Option<&str>], col_idx: &ColumnIndex, lookup_key: Option<i64>) -> Option<ExprValue> {
         let ctx = IndexedEvalContext { fields, col_idx, lookup_key };
@@ -841,203 +668,6 @@ mod tests {
         }
     }
 
-    // --- Evaluator tests ---
-
-    #[test]
-    fn test_eval_identity() {
-        let expr = parse_expression("id").unwrap();
-        let row = make_row(&[("id", "12345")]);
-        let ctx = EvalContext { row: &row, lookup_key: None };
-        assert_eq!(expr.eval(&ctx), ExprValue::Int(12345));
-    }
-
-    #[test]
-    fn test_eval_lookup_key() {
-        let expr = parse_expression("lookup_key").unwrap();
-        let row = CsvRow::new();
-        let ctx = EvalContext { row: &row, lookup_key: Some(42) };
-        assert_eq!(expr.eval(&ctx), ExprValue::Int(42));
-    }
-
-    #[test]
-    fn test_eval_null_check_present() {
-        let expr = parse_expression("publishedAtSecs != null").unwrap();
-        let row = make_row(&[("publishedAtSecs", "1700000000")]);
-        let ctx = EvalContext { row: &row, lookup_key: None };
-        assert_eq!(expr.eval(&ctx), ExprValue::Bool(true));
-    }
-
-    #[test]
-    fn test_eval_null_check_absent() {
-        let expr = parse_expression("publishedAtSecs != null").unwrap();
-        let row = make_row_with_nulls(&[("publishedAtSecs", None)]);
-        let ctx = EvalContext { row: &row, lookup_key: None };
-        assert_eq!(expr.eval(&ctx), ExprValue::Bool(false));
-    }
-
-    #[test]
-    fn test_eval_equality_string() {
-        let expr = parse_expression("type = 'Checkpoint'").unwrap();
-        let row = make_row(&[("type", "Checkpoint")]);
-        let ctx = EvalContext { row: &row, lookup_key: None };
-        assert_eq!(expr.eval(&ctx), ExprValue::Bool(true));
-    }
-
-    #[test]
-    fn test_eval_equality_string_mismatch() {
-        let expr = parse_expression("type = 'Checkpoint'").unwrap();
-        let row = make_row(&[("type", "LORA")]);
-        let ctx = EvalContext { row: &row, lookup_key: None };
-        assert_eq!(expr.eval(&ctx), ExprValue::Bool(false));
-    }
-
-    #[test]
-    fn test_eval_boolean_false() {
-        let expr = parse_expression("detected == false").unwrap();
-        let row = make_row(&[("detected", "false")]);
-        let ctx = EvalContext { row: &row, lookup_key: None };
-        assert_eq!(expr.eval(&ctx), ExprValue::Bool(true));
-    }
-
-    #[test]
-    fn test_eval_boolean_true() {
-        let expr = parse_expression("detected == false").unwrap();
-        let row = make_row(&[("detected", "true")]);
-        let ctx = EvalContext { row: &row, lookup_key: None };
-        assert_eq!(expr.eval(&ctx), ExprValue::Bool(false));
-    }
-
-    #[test]
-    fn test_eval_bitfield_set() {
-        // (flags >> 13) & 1 == 1
-        let expr = parse_expression("(flags >> 13) & 1 == 1").unwrap();
-        let flags = (1i64 << 13).to_string();
-        let row = make_row(&[("flags", &flags)]);
-        let ctx = EvalContext { row: &row, lookup_key: None };
-        assert_eq!(expr.eval(&ctx), ExprValue::Bool(true));
-    }
-
-    #[test]
-    fn test_eval_bitfield_unset() {
-        let expr = parse_expression("(flags >> 13) & 1 == 1").unwrap();
-        let row = make_row(&[("flags", "0")]);
-        let ctx = EvalContext { row: &row, lookup_key: None };
-        assert_eq!(expr.eval(&ctx), ExprValue::Bool(false));
-    }
-
-    #[test]
-    fn test_eval_compound_bitfield() {
-        // hasMeta: (flags >> 13) & 1 == 1 && (flags >> 2) & 1 == 0
-        let expr = parse_expression("(flags >> 13) & 1 == 1 && (flags >> 2) & 1 == 0").unwrap();
-        // bit 13 set, bit 2 NOT set → true
-        let flags = (1i64 << 13).to_string();
-        let row = make_row(&[("flags", &flags)]);
-        let ctx = EvalContext { row: &row, lookup_key: None };
-        assert_eq!(expr.eval(&ctx), ExprValue::Bool(true));
-
-        // bit 13 set, bit 2 ALSO set → false
-        let flags2 = ((1i64 << 13) | (1i64 << 2)).to_string();
-        let row2 = make_row(&[("flags", &flags2)]);
-        let ctx2 = EvalContext { row: &row2, lookup_key: None };
-        assert_eq!(expr.eval(&ctx2), ExprValue::Bool(false));
-    }
-
-    #[test]
-    fn test_eval_max() {
-        let expr = parse_expression("max(scannedAtSecs, createdAtSecs)").unwrap();
-        let row = make_row(&[("scannedAtSecs", "1000"), ("createdAtSecs", "2000")]);
-        let ctx = EvalContext { row: &row, lookup_key: None };
-        assert_eq!(expr.eval(&ctx), ExprValue::Int(2000));
-    }
-
-    #[test]
-    fn test_eval_max_with_null() {
-        let expr = parse_expression("max(scannedAtSecs, createdAtSecs)").unwrap();
-        let row = make_row_with_nulls(&[
-            ("scannedAtSecs", None),
-            ("createdAtSecs", Some("2000")),
-        ]);
-        let ctx = EvalContext { row: &row, lookup_key: None };
-        assert_eq!(expr.eval(&ctx), ExprValue::Int(2000));
-    }
-
-    // --- Filter expression tests ---
-
-    #[test]
-    fn test_filter_disabled_tags() {
-        // (attributes >> 10) & 1 = 0 — skip disabled tags (filter returns true to include)
-        let filter = FilterExpression::parse("(attributes >> 10) & 1 = 0").unwrap();
-
-        // Not disabled (bit 10 not set) → include
-        let row = make_row(&[("attributes", "0")]);
-        assert!(filter.eval(&row, None));
-
-        // Disabled (bit 10 set) → exclude
-        let disabled = (1i64 << 10).to_string();
-        let row2 = make_row(&[("attributes", &disabled)]);
-        assert!(!filter.eval(&row2, None));
-    }
-
-    // --- Computed field tests ---
-
-    #[test]
-    fn test_computed_has_meta() {
-        let cf = ComputedFieldDef::parse("hasMeta", "(flags >> 13) & 1 == 1 && (flags >> 2) & 1 == 0", None).unwrap();
-        let flags = (1i64 << 13).to_string();
-        let row = make_row(&[("flags", &flags)]);
-        assert_eq!(cf.eval(&row, None), Some(ExprValue::Bool(true)));
-    }
-
-    #[test]
-    fn test_computed_is_published() {
-        let cf = ComputedFieldDef::parse("isPublished", "publishedAtSecs != null", None).unwrap();
-        let row = make_row(&[("publishedAtSecs", "1700000000")]);
-        assert_eq!(cf.eval(&row, None), Some(ExprValue::Bool(true)));
-
-        let row2 = make_row_with_nulls(&[("publishedAtSecs", None)]);
-        // false is not null, so it should return Some(Bool(false))
-        assert_eq!(cf.eval(&row2, None), Some(ExprValue::Bool(false)));
-    }
-
-    #[test]
-    fn test_computed_posted_to_id() {
-        let cf = ComputedFieldDef::parse("postedToId", "lookup_key", None).unwrap();
-        let row = CsvRow::new();
-        assert_eq!(cf.eval(&row, Some(999)), Some(ExprValue::Int(999)));
-    }
-
-    #[test]
-    fn test_computed_conditional_multi_value() {
-        // modelVersionIdsManual: detected == false, value = modelVersionId
-        let cf = ComputedFieldDef::parse(
-            "modelVersionIdsManual",
-            "detected == false",
-            Some("modelVersionId"),
-        ).unwrap();
-
-        // detected=false → include with modelVersionId value
-        let row = make_row(&[("detected", "false"), ("modelVersionId", "42")]);
-        assert_eq!(cf.eval(&row, None), Some(ExprValue::Int(42)));
-
-        // detected=true → skip
-        let row2 = make_row(&[("detected", "true"), ("modelVersionId", "42")]);
-        assert_eq!(cf.eval(&row2, None), None);
-    }
-
-    #[test]
-    fn test_computed_max_sort() {
-        let cf = ComputedFieldDef::parse("existedAt", "max(scannedAtSecs, createdAtSecs)", None).unwrap();
-        let row = make_row(&[("scannedAtSecs", "100"), ("createdAtSecs", "200")]);
-        assert_eq!(cf.eval(&row, None), Some(ExprValue::Int(200)));
-    }
-
-    #[test]
-    fn test_computed_identity() {
-        let cf = ComputedFieldDef::parse("id", "id", None).unwrap();
-        let row = make_row(&[("id", "12345")]);
-        assert_eq!(cf.eval(&row, None), Some(ExprValue::Int(12345)));
-    }
-
     // --- Error handling tests ---
 
     #[test]
@@ -1054,12 +684,4 @@ mod tests {
     fn test_parse_unmatched_paren() {
         assert!(parse_expression("(flags >> 13").is_err());
     }
-
-    #[test]
-    fn test_eval_missing_column() {
-        let expr = parse_expression("missing_col").unwrap();
-        let row = CsvRow::new();
-        let ctx = EvalContext { row: &row, lookup_key: None };
-        assert_eq!(expr.eval(&ctx), ExprValue::Null);
-    }
 }
diff --git a/src/sync/dump_processor.rs b/src/sync/dump_processor.rs
index 5132448..4fb0daa 100644
--- a/src/sync/dump_processor.rs
+++ b/src/sync/dump_processor.rs
@@ -545,29 +545,8 @@ impl<'a> ParsedRow<'a> {
         self.get_i64(slot_field).map(|v| v as u32)
     }
 
-    /// Convert to Nate's CsvRow format for expression/enrichment evaluation.
-    pub fn to_csv_row<'b>(&'b self) -> CsvRow<'b> {
-        let mut row = CsvRow::new();
-        for (name, &idx) in self.col_index {
-            if let Some(bytes) = self.fields.get(idx) {
-                if bytes.is_empty() {
-                    row.insert(name.as_str(), None);
-                } else {
-                    let s = if bytes.len() >= 2 && bytes[0] == b'"' && bytes[bytes.len() - 1] == b'"' {
-                        std::str::from_utf8(&bytes[1..bytes.len() - 1]).ok()
-                    } else {
-                        std::str::from_utf8(bytes).ok()
-                    };
-                    row.insert(name.as_str(), s);
-                }
-            }
-        }
-        row
-    }
-
     /// Build indexed fields for zero-allocation expression evaluation.
     /// Returns a Vec<Option<&str>> aligned to the column index positions.
-    /// Much cheaper than to_csv_row() — no HashMap allocation.
     pub fn to_indexed_fields<'b>(&'b self) -> Vec<Option<&'b str>> {
         self.fields
             .iter()
diff --git a/src/sync/ingester.rs b/src/sync/ingester.rs
index 8812797..be33083 100644
--- a/src/sync/ingester.rs
+++ b/src/sync/ingester.rs
@@ -1,15 +1,11 @@
 //! Bitmap sink traits and implementations for document ingestion.
 //!
-//! Two bitmap sinks:
-//! - `CoalescerSink`: sends MutationOps to the write coalescer channel (online upserts)
-//! - `AccumSink`: inserts directly into a BitmapAccum (bulk loading)
+//! Provides `CoalescerSink`: sends MutationOps to the write coalescer channel (online upserts).
+//! The AccumSink (bulk loading) has been removed along with the V1 bulk loader.
 
 use std::sync::Arc;
 
-use roaring::RoaringBitmap;
-
 use crate::error::Result;
-use super::loader::BitmapAccum;
 use crate::mutation::{MutationOp, MutationSender};
 
 /// Trait for sinking bitmap mutations during document ingestion.
@@ -127,100 +123,3 @@ impl BitmapSink for CoalescerSink {
     }
 }
 
-/// BitmapSink that inserts directly into a BitmapAccum.
-/// Used by the bulk loading path where bitmaps are accumulated in-memory
-/// and applied to staging in one shot.
-pub struct AccumSink<'a> {
-    accum: &'a mut BitmapAccum,
-}
-
-impl<'a> AccumSink<'a> {
-    #[allow(dead_code)]
-    pub(crate) fn new(accum: &'a mut BitmapAccum) -> Self {
-        Self { accum }
-    }
-}
-
-impl<'a> BitmapSink for AccumSink<'a> {
-    fn filter_insert(&mut self, field: Arc<str>, value: u64, slot: u32) {
-        let field_name: &str = &field;
-        if let Some(value_map) = self.accum.filter_maps.get_mut(field_name) {
-            value_map
-                .entry(value)
-                .or_insert_with(RoaringBitmap::new)
-                .insert(slot);
-        }
-    }
-
-    fn filter_remove(&mut self, _field: Arc<str>, _value: u64, _slot: u32) {
-        // Bulk loading never removes — this is a fresh insert path.
-    }
-
-    fn sort_set(&mut self, field: Arc<str>, bit_layer: usize, slot: u32) {
-        let field_name: &str = &field;
-        if let Some(layer_map) = self.accum.sort_maps.get_mut(field_name) {
-            layer_map
-                .entry(bit_layer)
-                .or_insert_with(RoaringBitmap::new)
-                .insert(slot);
-        }
-    }
-
-    fn sort_clear(&mut self, _field: Arc<str>, _bit_layer: usize, _slot: u32) {
-        // Bulk loading never clears sort layers.
-    }
-
-    fn alive_insert(&mut self, slot: u32) {
-        self.accum.alive.insert(slot);
-    }
-
-    fn alive_remove(&mut self, _slot: u32) {
-        // Bulk loading never removes alive bits.
-    }
-
-    fn deferred_alive(&mut self, _slot: u32, _activate_at: u64) {
-        // In dump mode, deferred alive is a no-op for AccumSink.
-        // The slot is NOT added to the alive bitmap (skipped in the caller).
-        // The deferred alive map is built separately by the dump pipeline
-        // and applied to the engine after the dump completes.
-    }
-
-    fn flush(&mut self) -> Result<()> {
-        Ok(()) // Accum is in-memory, nothing to flush.
-    }
-}
-
-
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_accum_sink() {
-        let mut accum = BitmapAccum::new(
-            &["nsfwLevel".to_string()],
-            &[("reactionCount".to_string(), 32)],
-        );
-
-        {
-            let mut sink = AccumSink::new(&mut accum);
-            sink.filter_insert(Arc::from("nsfwLevel"), 1, 10);
-            sink.filter_insert(Arc::from("nsfwLevel"), 1, 20);
-            sink.filter_insert(Arc::from("nsfwLevel"), 2, 30);
-            sink.sort_set(Arc::from("reactionCount"), 0, 10);
-            sink.sort_set(Arc::from("reactionCount"), 1, 10);
-            sink.alive_insert(10);
-            sink.alive_insert(20);
-            sink.alive_insert(30);
-        }
-
-        assert_eq!(accum.alive.len(), 3);
-        let nsfw_map = &accum.filter_maps["nsfwLevel"];
-        assert_eq!(nsfw_map[&1].len(), 2);
-        assert_eq!(nsfw_map[&2].len(), 1);
-        let sort_map = &accum.sort_maps["reactionCount"];
-        assert_eq!(sort_map[&0].len(), 1);
-        assert_eq!(sort_map[&1].len(), 1);
-    }
-}
diff --git a/src/sync/loader.rs b/src/sync/loader.rs
deleted file mode 100644
index e057bf1..0000000
--- a/src/sync/loader.rs
+++ /dev/null
@@ -1,1855 +0,0 @@
-//! Generic NDJSON loader — converts arbitrary NDJSON files to engine Documents
-//! using a DataSchema definition.
-//!
-//! Three-stage pipeline:
-//!   Stage 1 (reader thread): reads raw bytes from disk into blocks
-//!   Stage 2 (parse thread):  rayon fold+reduce → bitmap maps + full docs (fused)
-//!   Stage 3 (main thread):   apply bitmaps to staging + async docstore writes
-//!
-//! Key optimization: bitmaps are built directly from JSON during parse — no
-//! intermediate Document allocation for the bitmap path. The old decompose/merge
-//! pipeline in put_bulk_into is bypassed entirely.
-
-use std::collections::{HashMap, HashSet};
-use std::fs::File;
-use std::io::Read as _;
-use std::path::Path;
-use std::sync::atomic::{AtomicU64, Ordering};
-use std::sync::Arc;
-use std::thread;
-use std::time::{Duration, Instant};
-
-use rayon::prelude::*;
-use roaring::RoaringBitmap;
-
-use crate::engine::ConcurrentEngine;
-use crate::config::{DataSchema, FieldMapping, FieldValueType};
-use crate::dictionary::FieldDictionary;
-use crate::mutation::{Document, FieldValue};
-use crate::query::Value;
-#[cfg(test)]
-use crate::silos::doc_format::StoredDoc;
-
-/// Statistics from a completed load operation.
-#[derive(Debug, Clone)]
-pub struct LoadStats {
-    pub records_loaded: u64,
-    pub elapsed: Duration,
-    pub errors_skipped: u64,
-}
-
-/// Bitmap accumulator for rayon fold+reduce.
-/// Each rayon task builds its own instance; reduce merges them with bitmap OR.
-pub(crate) struct BitmapAccum {
-    pub(crate) filter_maps: HashMap<String, HashMap<u64, RoaringBitmap>>,
-    pub(crate) sort_maps: HashMap<String, HashMap<usize, RoaringBitmap>>,
-    pub(crate) alive: RoaringBitmap,
-    /// Pre-encoded msgpack bytes — encoding happens in the rayon fold so
-    /// BulkWriter does pure I/O with no rayon contention.
-    pub(crate) encoded_docs: Vec<(u32, Vec<u8>)>,
-    /// Deferred alive slots: (slot, activate_at_secs). These slots have
-    /// filter/sort bitmaps set but alive is NOT set — deferred until timestamp.
-    pub(crate) deferred_alive: Vec<(u32, u64)>,
-    pub(crate) count: usize,
-    pub(crate) errors: u64,
-}
-
-impl BitmapAccum {
-    pub(crate) fn new(filter_names: &[String], sort_configs: &[(String, u8)]) -> Self {
-        let mut filter_maps = HashMap::with_capacity(filter_names.len());
-        for name in filter_names {
-            filter_maps.insert(name.clone(), HashMap::new());
-        }
-        let mut sort_maps = HashMap::with_capacity(sort_configs.len());
-        for (name, bits) in sort_configs {
-            sort_maps.insert(name.clone(), HashMap::with_capacity(*bits as usize));
-        }
-        BitmapAccum {
-            filter_maps,
-            sort_maps,
-            alive: RoaringBitmap::new(),
-            encoded_docs: Vec::new(),
-            deferred_alive: Vec::new(),
-            count: 0,
-            errors: 0,
-        }
-    }
-
-    /// Save this accumulator to a checkpoint file for crash recovery.
-    ///
-    /// Format: [alive_len:u64][alive_bytes][filter_count:u64]
-    ///   for each filter: [name_len:u64][name_bytes][value_count:u64]
-    ///     for each value: [value:u64][bitmap_len:u64][bitmap_bytes]
-    ///   [sort_count:u64]
-    ///   for each sort: [name_len:u64][name_bytes][bit_count:u64]
-    ///     for each bit: [bit:u64][bitmap_len:u64][bitmap_bytes]
-    #[allow(dead_code)]
-    pub(crate) fn save_checkpoint(&self, path: &std::path::Path) -> std::io::Result<()> {
-        let mut buf = Vec::with_capacity(64 * 1024 * 1024);
-
-        // Alive bitmap
-        let alive_bytes = self.alive.serialized_size();
-        buf.extend_from_slice(&(alive_bytes as u64).to_le_bytes());
-        self.alive.serialize_into(&mut buf)?;
-
-        // Filter maps
-        buf.extend_from_slice(&(self.filter_maps.len() as u64).to_le_bytes());
-        for (name, value_map) in &self.filter_maps {
-            let name_bytes = name.as_bytes();
-            buf.extend_from_slice(&(name_bytes.len() as u64).to_le_bytes());
-            buf.extend_from_slice(name_bytes);
-            buf.extend_from_slice(&(value_map.len() as u64).to_le_bytes());
-            for (&value, bitmap) in value_map {
-                buf.extend_from_slice(&value.to_le_bytes());
-                let bm_size = bitmap.serialized_size();
-                buf.extend_from_slice(&(bm_size as u64).to_le_bytes());
-                bitmap.serialize_into(&mut buf)?;
-            }
-        }
-
-        // Sort maps
-        buf.extend_from_slice(&(self.sort_maps.len() as u64).to_le_bytes());
-        for (name, bit_map) in &self.sort_maps {
-            let name_bytes = name.as_bytes();
-            buf.extend_from_slice(&(name_bytes.len() as u64).to_le_bytes());
-            buf.extend_from_slice(name_bytes);
-            buf.extend_from_slice(&(bit_map.len() as u64).to_le_bytes());
-            for (&bit, bitmap) in bit_map {
-                buf.extend_from_slice(&(bit as u64).to_le_bytes());
-                let bm_size = bitmap.serialized_size();
-                buf.extend_from_slice(&(bm_size as u64).to_le_bytes());
-                bitmap.serialize_into(&mut buf)?;
-            }
-        }
-
-        // Atomic write: write to temp file, then rename
-        let tmp = path.with_extension("tmp");
-        std::fs::write(&tmp, &buf)?;
-        std::fs::rename(&tmp, path)?;
-        eprintln!(
-            "Checkpoint saved: {} ({:.1} MB)",
-            path.display(),
-            buf.len() as f64 / (1024.0 * 1024.0)
-        );
-        Ok(())
-    }
-
-    /// Load an accumulator from a checkpoint file.
-    #[allow(dead_code)]
-    pub(crate) fn load_checkpoint(path: &std::path::Path) -> std::io::Result<Self> {
-        let data = std::fs::read(path)?;
-        let mut pos = 0;
-
-        let read_u64 = |pos: &mut usize| -> u64 {
-            let val = u64::from_le_bytes(data[*pos..*pos + 8].try_into().unwrap());
-            *pos += 8;
-            val
-        };
-
-        // Alive bitmap
-        let alive_len = read_u64(&mut pos) as usize;
-        let alive = RoaringBitmap::deserialize_from(&data[pos..pos + alive_len])
-            .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))?;
-        pos += alive_len;
-
-        // Filter maps
-        let filter_count = read_u64(&mut pos) as usize;
-        let mut filter_maps = HashMap::with_capacity(filter_count);
-        for _ in 0..filter_count {
-            let name_len = read_u64(&mut pos) as usize;
-            let name = String::from_utf8_lossy(&data[pos..pos + name_len]).into_owned();
-            pos += name_len;
-            let value_count = read_u64(&mut pos) as usize;
-            let mut value_map = HashMap::with_capacity(value_count);
-            for _ in 0..value_count {
-                let value = read_u64(&mut pos);
-                let bm_size = read_u64(&mut pos) as usize;
-                let bitmap = RoaringBitmap::deserialize_from(&data[pos..pos + bm_size])
-                    .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))?;
-                pos += bm_size;
-                value_map.insert(value, bitmap);
-            }
-            filter_maps.insert(name, value_map);
-        }
-
-        // Sort maps
-        let sort_count = read_u64(&mut pos) as usize;
-        let mut sort_maps = HashMap::with_capacity(sort_count);
-        for _ in 0..sort_count {
-            let name_len = read_u64(&mut pos) as usize;
-            let name = String::from_utf8_lossy(&data[pos..pos + name_len]).into_owned();
-            pos += name_len;
-            let bit_count = read_u64(&mut pos) as usize;
-            let mut bit_map = HashMap::with_capacity(bit_count);
-            for _ in 0..bit_count {
-                let bit = read_u64(&mut pos) as usize;
-                let bm_size = read_u64(&mut pos) as usize;
-                let bitmap = RoaringBitmap::deserialize_from(&data[pos..pos + bm_size])
-                    .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))?;
-                pos += bm_size;
-                bit_map.insert(bit, bitmap);
-            }
-            sort_maps.insert(name, bit_map);
-        }
-
-        eprintln!(
-            "Checkpoint loaded: {} ({:.1} MB, {} alive)",
-            path.display(),
-            data.len() as f64 / (1024.0 * 1024.0),
-            alive.len()
-        );
-
-        Ok(BitmapAccum {
-            filter_maps,
-            sort_maps,
-            alive,
-            encoded_docs: Vec::new(),
-            deferred_alive: Vec::new(),
-            count: 0,
-            errors: 0,
-        })
-    }
-
-    #[cfg(test)]
-    pub(crate) fn alive_len(&self) -> u64 {
-        self.alive.len()
-    }
-
-    pub(crate) fn merge(mut self, other: Self) -> Self {
-        self.alive |= &other.alive;
-        for (field, value_map) in other.filter_maps {
-            let target = self.filter_maps.entry(field).or_default();
-            for (value, bm) in value_map {
-                target
-                    .entry(value)
-                    .and_modify(|e| *e |= &bm)
-                    .or_insert(bm);
-            }
-        }
-        for (field, bit_map) in other.sort_maps {
-            let target = self.sort_maps.entry(field).or_default();
-            for (bit, bm) in bit_map {
-                target
-                    .entry(bit)
-                    .and_modify(|e| *e |= &bm)
-                    .or_insert(bm);
-            }
-        }
-        self.encoded_docs.extend(other.encoded_docs);
-        self.deferred_alive.extend(other.deferred_alive);
-        self.count += other.count;
-        self.errors += other.errors;
-        self
-    }
-}
-
-/// Load an NDJSON file into an engine using the given data schema.
-///
-/// - `engine`: target ConcurrentEngine (must already be constructed with the right config)
-/// - `schema`: field mapping rules for converting raw JSON → Documents
-/// - `path`: path to the NDJSON file
-/// - `limit`: optional max records to load
-/// - `threads`: number of threads (unused — rayon manages parallelism)
-/// - `chunk_size`: number of full docs to accumulate before flushing docstore
-/// - `docstore_batch_size`: unused
-/// - `max_writer_threads`: max concurrent docstore writer threads (0 = unbounded)
-/// - `progress`: atomic counter updated as records are loaded (for progress polling)
-pub fn load_ndjson(
-    engine: &ConcurrentEngine,
-    schema: &DataSchema,
-    path: &Path,
-    limit: Option<usize>,
-    _threads: usize,
-    chunk_size: usize,
-    _docstore_batch_size: usize,
-    max_writer_threads: usize,
-    progress: Arc<AtomicU64>,
-) -> Result<LoadStats, String> {
-    let record_limit = limit.unwrap_or(usize::MAX);
-    let _chunk_size = chunk_size; // kept for API compat; docstore flushes per block now
-    let read_batch_size: usize = 500_000;
-    let target_batch_bytes = read_batch_size * 600;
-
-    // Pre-build field lookup tables for direct bitmap extraction
-    let config = engine.config();
-    let filter_names: Vec<String> = config.filter_fields.iter().map(|f| f.name.clone()).collect();
-    let sort_configs: Vec<(String, u8)> = config
-        .sort_fields
-        .iter()
-        .map(|f| (f.name.clone(), f.bits))
-        .collect();
-    let filter_set: HashSet<String> = filter_names.iter().cloned().collect();
-    let sort_bits: HashMap<String, u8> = sort_configs.iter().cloned().collect();
-
-    // ---- Stage 1: Reader thread ----
-    // Reads raw bytes from disk in large blocks, split on newline boundaries.
-    let data_path_owned = path.to_owned();
-    let (block_tx, block_rx) = std::sync::mpsc::sync_channel::<Vec<u8>>(2);
-
-    let reader_handle = thread::spawn(move || {
-        let file = File::open(&data_path_owned).expect("Failed to open data file");
-        let mut reader = std::io::BufReader::with_capacity(16 * 1024 * 1024, file);
-        let mut buf = vec![0u8; 4 * 1024 * 1024];
-        let mut accum = Vec::<u8>::with_capacity(target_batch_bytes + 4 * 1024 * 1024);
-
-        loop {
-            let bytes_read = reader.read(&mut buf).unwrap_or(0);
-            if bytes_read == 0 {
-                if !accum.is_empty() {
-                    let _ = block_tx.send(accum);
-                }
-                break;
-            }
-            accum.extend_from_slice(&buf[..bytes_read]);
-
-            if accum.len() >= target_batch_bytes {
-                if let Some(last_nl) = memrchr_newline(&accum) {
-                    let remainder = accum[last_nl + 1..].to_vec();
-                    accum.truncate(last_nl + 1);
-                    let batch = std::mem::replace(
-                        &mut accum,
-                        Vec::with_capacity(target_batch_bytes + 4 * 1024 * 1024),
-                    );
-                    accum = remainder;
-                    if block_tx.send(batch).is_err() {
-                        break;
-                    }
-                }
-            }
-        }
-    });
-
-    // Register field names with the docstore field dictionary.
-    // TODO: BitmapSilo (Phase 3) — replace with DataSilo BulkWriter when wired.
-    let all_field_names: Vec<String> = schema
-        .fields
-        .iter()
-        .map(|f| f.target.clone())
-        .chain(std::iter::once("id".to_string()))
-        .collect();
-    // Set up field defaults for write-side elision before creating the BulkWriter
-    engine.set_docstore_defaults(schema);
-    engine.prepare_field_names(&all_field_names).expect("prepare_field_names");
-    let bulk_writer = Arc::new(()); // TODO: BitmapSilo Phase 3 — stub, replace with DataSilo BulkWriter
-
-    // ---- Stage 2: Fused parse + bitmap build + doc encode thread ----
-    // Rayon fold+reduce: JSON → bitmap maps + pre-encoded msgpack bytes in one pass.
-    // No intermediate Document for the bitmap path; encoding in-fold avoids rayon contention.
-    let schema_ref = schema.clone();
-    let filter_names_clone = filter_names.clone();
-    let sort_configs_clone = sort_configs.clone();
-    let filter_set_clone = filter_set;
-    let sort_bits_clone = sort_bits;
-    let parse_writer = Arc::clone(&bulk_writer);
-    let (chunk_tx, chunk_rx) = std::sync::mpsc::sync_channel::<BitmapAccum>(2);
-
-    // Check if there are LowCardinalityString fields; if so, get dictionaries from engine
-    let has_lcs = schema.fields.iter().any(|f| f.value_type == FieldValueType::LowCardinalityString);
-    let dicts_arc: Option<Arc<HashMap<String, FieldDictionary>>> = if has_lcs {
-        Some(engine.dictionaries_arc())
-    } else {
-        None
-    };
-
-    let id_field = schema_ref.id_field.clone();
-    let dicts_clone = dicts_arc;
-    let parse_handle = thread::spawn(move || {
-        let mut total_parsed: usize = 0;
-
-        while let Ok(raw_block) = block_rx.recv() {
-            if total_parsed >= record_limit {
-                break;
-            }
-
-            let block_str = match std::str::from_utf8(&raw_block) {
-                Ok(s) => s,
-                Err(_) => continue,
-            };
-
-            let mut lines: Vec<&str> = block_str
-                .split('\n')
-                .map(|l| l.trim_end_matches('\r'))
-                .filter(|l| !l.is_empty())
-                .collect();
-
-            // Respect limit
-            let remaining = record_limit.saturating_sub(total_parsed);
-            if lines.len() > remaining {
-                lines.truncate(remaining);
-            }
-
-            let schema = &schema_ref;
-            let f_names = &filter_names_clone;
-            let s_configs = &sort_configs_clone;
-            let f_set = &filter_set_clone;
-            let s_bits = &sort_bits_clone;
-            let writer = &parse_writer;
-            let id_field_ref = &id_field;
-            let dicts = dicts_clone.as_deref();
-
-            // Rayon fold+reduce: each worker builds thread-local bitmap maps
-            // AND encodes docs to msgpack bytes — all CPU work in one pass.
-            // Slot = document ID (Postgres ID), not a sequential counter.
-            let accum = lines
-                .into_par_iter()
-                .fold(
-                    || BitmapAccum::new(f_names, s_configs),
-                    |mut acc, line| {
-                        match serde_json::from_str::<serde_json::Value>(line) {
-                            Ok(json) => {
-                                // Extract the document ID to use as the slot
-                                let slot = match json.get(id_field_ref).and_then(|v| v.as_u64().or_else(|| v.as_i64().map(|n| n as u64))) {
-                                    Some(id) => id as u32,
-                                    None => {
-                                        acc.errors += 1;
-                                        return acc;
-                                    }
-                                };
-
-                                // TODO: BitmapSilo (Phase 3) — encode doc via DataSilo BulkWriter.
-                                // For now, skip doc encoding (bitmaps still built correctly).
-                                let _ = writer; // suppress unused warning
-
-                                // Build bitmaps directly from JSON
-                                acc.alive.insert(slot);
-                                extract_bitmaps_with_dicts(
-                                    &json,
-                                    schema,
-                                    f_set,
-                                    s_bits,
-                                    slot,
-                                    &mut acc.filter_maps,
-                                    &mut acc.sort_maps,
-                                    dicts,
-                                );
-                                acc.count += 1;
-                            }
-                            Err(_) => acc.errors += 1,
-                        }
-                        acc
-                    },
-                )
-                .reduce(
-                    || BitmapAccum::new(f_names, s_configs),
-                    |a, b| a.merge(b),
-                );
-
-            total_parsed += accum.count;
-
-            if chunk_tx.send(accum).is_err() {
-                break;
-            }
-        }
-    });
-
-    // ---- Stage 3: Apply bitmaps + docstore (main thread) ----
-    let mut staging = engine.clone_staging();
-    let mut total_inserted: usize = 0;
-    let mut total_errors: u64 = 0;
-    let mut chunks_processed: usize = 0;
-    let wall_start = Instant::now();
-
-    let mut ds_handles: Vec<thread::JoinHandle<()>> = Vec::new();
-    let writer_cap = if max_writer_threads == 0 { usize::MAX } else { max_writer_threads };
-
-    while let Ok(chunk) = chunk_rx.recv() {
-        total_errors += chunk.errors;
-        let chunk_count = chunk.count;
-
-        // Apply pre-built bitmaps directly to staging — no decompose/merge needed
-        let t0 = Instant::now();
-        ConcurrentEngine::apply_bitmap_maps(
-            &mut staging,
-            chunk.filter_maps,
-            chunk.sort_maps,
-            chunk.alive,
-        );
-        let apply_ms = t0.elapsed().as_secs_f64() * 1000.0;
-
-        total_inserted += chunk_count;
-        progress.store(total_inserted as u64, Ordering::Release);
-        chunks_processed += 1;
-
-        let elapsed = wall_start.elapsed();
-        let rate = total_inserted as f64 / elapsed.as_secs_f64();
-        eprintln!(
-            "  chunk {}: {} total ({:.0}/s) apply={:.1}ms",
-            chunks_processed, total_inserted, rate, apply_ms
-        );
-
-        // Backpressure: wait for a writer to finish before spawning another
-        if ds_handles.len() >= writer_cap {
-            if let Some(h) = ds_handles.drain(..1).next() {
-                h.join().unwrap();
-            }
-        }
-
-        // TODO: BitmapSilo (Phase 3) — write encoded docs via DataSilo BulkWriter.
-        // For now, skip docstore writes (bitmaps applied correctly above).
-        let _ = &bulk_writer; // suppress unused warning
-    }
-
-    // Wait for remaining threads
-    parse_handle.join().unwrap();
-    reader_handle.join().unwrap();
-    for h in ds_handles {
-        h.join().unwrap();
-    }
-
-    // Publish staging snapshot
-    engine.publish_staging(staging);
-
-    let elapsed = wall_start.elapsed();
-    let rate = total_inserted as f64 / elapsed.as_secs_f64();
-    eprintln!(
-        "Loaded {} records in {:.1}s ({:.0}/s), errors skipped: {}",
-        total_inserted,
-        elapsed.as_secs_f64(),
-        rate,
-        total_errors
-    );
-
-    Ok(LoadStats {
-        records_loaded: total_inserted as u64,
-        elapsed,
-        errors_skipped: total_errors,
-    })
-}
-
-/// Extract bitmap entries directly from JSON into accumulator maps.
-/// Skips intermediate Document creation for indexed fields.
-#[allow(dead_code)] // Used by sync pipeline (feature-gated)
-pub(crate) fn extract_bitmaps(
-    json: &serde_json::Value,
-    schema: &DataSchema,
-    filter_set: &HashSet<String>,
-    sort_bits: &HashMap<String, u8>,
-    slot: u32,
-    filter_maps: &mut HashMap<String, HashMap<u64, RoaringBitmap>>,
-    sort_maps: &mut HashMap<String, HashMap<usize, RoaringBitmap>>,
-) {
-    extract_bitmaps_with_dicts(json, schema, filter_set, sort_bits, slot, filter_maps, sort_maps, None);
-}
-
-/// Extract bitmap entries directly from JSON into accumulator maps, with optional dictionaries.
-pub(crate) fn extract_bitmaps_with_dicts(
-    json: &serde_json::Value,
-    schema: &DataSchema,
-    filter_set: &HashSet<String>,
-    sort_bits: &HashMap<String, u8>,
-    slot: u32,
-    filter_maps: &mut HashMap<String, HashMap<u64, RoaringBitmap>>,
-    sort_maps: &mut HashMap<String, HashMap<usize, RoaringBitmap>>,
-    dictionaries: Option<&HashMap<String, FieldDictionary>>,
-) {
-    for mapping in &schema.fields {
-        if mapping.doc_only {
-            continue;
-        }
-
-        let is_filter = filter_set.contains(&mapping.target);
-        let s_bits = sort_bits.get(&mapping.target).copied();
-
-        if !is_filter && s_bits.is_none() {
-            continue;
-        }
-
-        let (raw, apply_ms) = match mapping.resolve_raw(json) {
-            Some(pair) => pair,
-            None => {
-                // ExistsBoolean: field absent → false
-                if is_filter && matches!(mapping.value_type, FieldValueType::ExistsBoolean) {
-                    if let Some(fm) = filter_maps.get_mut(&mapping.target) {
-                        fm.entry(0)
-                            .or_insert_with(RoaringBitmap::new)
-                            .insert(slot);
-                    }
-                }
-                continue;
-            }
-        };
-
-        if is_filter {
-            if let Some(fm) = filter_maps.get_mut(&mapping.target) {
-                let dict = dictionaries.and_then(|d| d.get(&mapping.target));
-                extract_filter_value_with_dict(raw, mapping, slot, fm, apply_ms, dict);
-            }
-        }
-
-        if let Some(bits) = s_bits {
-            if let Some(sm) = sort_maps.get_mut(&mapping.target) {
-                extract_sort_value(raw, mapping, slot, bits, sm, apply_ms);
-            }
-        }
-    }
-}
-
-/// Extract a single filter value, with optional dictionary for LowCardinalityString.
-pub(crate) fn extract_filter_value_with_dict(
-    raw: &serde_json::Value,
-    mapping: &FieldMapping,
-    slot: u32,
-    field_map: &mut HashMap<u64, RoaringBitmap>,
-    ms_to_seconds: bool,
-    dictionary: Option<&FieldDictionary>,
-) {
-    match mapping.value_type {
-        FieldValueType::Integer => {
-            if let Some(n) = extract_integer(raw, ms_to_seconds) {
-                field_map
-                    .entry(n as u64)
-                    .or_insert_with(RoaringBitmap::new)
-                    .insert(slot);
-            }
-        }
-        FieldValueType::Boolean => {
-            if let Some(b) = raw.as_bool() {
-                field_map
-                    .entry(if b { 1 } else { 0 })
-                    .or_insert_with(RoaringBitmap::new)
-                    .insert(slot);
-            }
-        }
-        FieldValueType::MappedString => {
-            if let Some(s) = raw.as_str() {
-                let lookup = if mapping.case_sensitive {
-                    std::borrow::Cow::Borrowed(s)
-                } else {
-                    std::borrow::Cow::Owned(s.to_lowercase())
-                };
-                let n = mapping
-                    .string_map
-                    .as_ref()
-                    .and_then(|m| m.get(lookup.as_ref()).copied())
-                    .unwrap_or(0);
-                field_map
-                    .entry(n as u64)
-                    .or_insert_with(RoaringBitmap::new)
-                    .insert(slot);
-            }
-        }
-        FieldValueType::LowCardinalityString => {
-            if let Some(s) = raw.as_str() {
-                if let Some(dict) = dictionary {
-                    let n = dict.get_or_insert(s);
-                    field_map
-                        .entry(n as u64)
-                        .or_insert_with(RoaringBitmap::new)
-                        .insert(slot);
-                }
-                // If no dictionary provided, skip silently (shouldn't happen in practice)
-            }
-        }
-        FieldValueType::IntegerArray => {
-            if let Some(arr) = raw.as_array() {
-                for v in arr {
-                    if let Some(n) = v.as_i64().or_else(|| v.as_u64().map(|n| n as i64)) {
-                        field_map
-                            .entry(n as u64)
-                            .or_insert_with(RoaringBitmap::new)
-                            .insert(slot);
-                    }
-                }
-            }
-        }
-        FieldValueType::ExistsBoolean => {
-            field_map
-                .entry(1)
-                .or_insert_with(RoaringBitmap::new)
-                .insert(slot);
-        }
-        FieldValueType::String => {} // String filter fields not supported in bitmap index
-    }
-}
-
-/// Extract sort value from JSON and insert into bit-layer bitmap maps.
-pub(crate) fn extract_sort_value(
-    raw: &serde_json::Value,
-    mapping: &FieldMapping,
-    slot: u32,
-    bits: u8,
-    bit_map: &mut HashMap<usize, RoaringBitmap>,
-    ms_to_seconds: bool,
-) {
-    let value = match mapping.value_type {
-        // Sort fields are stored as u32 — clamp negative values to 0 so they don't
-        // wrap around to u32::MAX and sort incorrectly.
-        FieldValueType::Integer => {
-            extract_integer(raw, ms_to_seconds).map(|n| n.max(0) as u32)
-        }
-        _ => None,
-    };
-    if let Some(v) = value {
-        for bit in 0..(bits as usize) {
-            if (v >> bit) & 1 == 1 {
-                bit_map
-                    .entry(bit)
-                    .or_insert_with(RoaringBitmap::new)
-                    .insert(slot);
-            }
-        }
-    }
-}
-
-/// Extract an integer from a JSON value, optionally converting ms→seconds.
-pub(crate) fn extract_integer(raw: &serde_json::Value, ms_to_seconds: bool) -> Option<i64> {
-    let n = raw
-        .as_i64()
-        .or_else(|| raw.as_u64().map(|n| n as i64))
-        .or_else(|| raw.as_f64().map(|n| n as i64))?;
-    Some(if ms_to_seconds {
-        ((n / 1000) as u32) as i64
-    } else {
-        n
-    })
-}
-
-/// Convert a raw JSON value to a StoredDoc using the DataSchema field mappings.
-/// Used by tests to verify field mapping correctness.
-#[cfg(test)]
-fn json_to_stored_doc(json: &serde_json::Value, schema: &DataSchema) -> StoredDoc {
-    let mut fields = HashMap::new();
-
-    if let Some(id_val) = json.get(&schema.id_field) {
-        if let Some(n) = id_val.as_i64() {
-            fields.insert("id".to_string(), FieldValue::Single(Value::Integer(n)));
-        } else if let Some(n) = id_val.as_u64() {
-            fields.insert(
-                "id".to_string(),
-                FieldValue::Single(Value::Integer(n as i64)),
-            );
-        }
-    }
-
-    for mapping in &schema.fields {
-        if mapping.filter_only {
-            continue;
-        }
-
-        let (raw, apply_ms) = match mapping.resolve_raw(json) {
-            Some(pair) => pair,
-            None => {
-                match mapping.value_type {
-                    FieldValueType::ExistsBoolean => {
-                        fields.insert(
-                            mapping.target.clone(),
-                            FieldValue::Single(Value::Bool(false)),
-                        );
-                    }
-                    _ => {}
-                }
-                continue;
-            }
-        };
-
-        if let Some(fv) = convert_field(raw, mapping, apply_ms) {
-            fields.insert(mapping.target.clone(), fv);
-        }
-    }
-
-    StoredDoc { fields, schema_version: 0 }
-}
-
-/// Convert a raw JSON object to a `Document` using the given `DataSchema`.
-///
-/// Extracts the ID from `schema.id_field` and builds the Document's field map
-/// using the schema's field mappings. Returns `(slot_id, Document)` or an error
-/// if the ID field is missing or not an integer.
-pub fn json_to_document(
-    json: &serde_json::Value,
-    schema: &DataSchema,
-) -> Result<(u32, Document), String> {
-    json_to_document_with_dicts(json, schema, None)
-}
-
-/// Convert a raw JSON object to a `Document`, with optional dictionaries for LowCardinalityString fields.
-pub fn json_to_document_with_dicts(
-    json: &serde_json::Value,
-    schema: &DataSchema,
-    dictionaries: Option<&HashMap<String, FieldDictionary>>,
-) -> Result<(u32, Document), String> {
-    // Extract ID
-    let id_val = json
-        .get(&schema.id_field)
-        .ok_or_else(|| format!("Missing id field '{}'", schema.id_field))?;
-    let id = id_val
-        .as_u64()
-        .or_else(|| id_val.as_i64().map(|n| n as u64))
-        .ok_or_else(|| format!("id field '{}' is not an integer", schema.id_field))?;
-    let slot = id as u32;
-
-    let mut fields = HashMap::new();
-
-    // Store the ID in the document fields
-    fields.insert(
-        "id".to_string(),
-        FieldValue::Single(Value::Integer(id as i64)),
-    );
-
-    for mapping in &schema.fields {
-        // filter_only fields are bitmap-indexed only — skip docstore storage
-        if mapping.filter_only {
-            continue;
-        }
-
-        let (raw, apply_ms) = match mapping.resolve_raw(json) {
-            Some(pair) => pair,
-            None => {
-                if matches!(mapping.value_type, FieldValueType::ExistsBoolean) {
-                    fields.insert(
-                        mapping.target.clone(),
-                        FieldValue::Single(Value::Bool(false)),
-                    );
-                }
-                continue;
-            }
-        };
-
-        // Null source values: write explicit defaults so the V2 docstore
-        // LIFO scan doesn't find stale old values. For fields without a
-        // default, null is a schema violation → return error.
-        if raw.is_null() {
-            match mapping.value_type {
-                FieldValueType::ExistsBoolean => {
-                    fields.insert(mapping.target.clone(), FieldValue::Single(Value::Bool(false)));
-                }
-                _ => {
-                    if let Some(ref dv) = mapping.default_value {
-                        let dict = dictionaries.and_then(|d| d.get(&mapping.target));
-                        if let Some(fv) = convert_field_with_dict(dv, mapping, false, dict) {
-                            fields.insert(mapping.target.clone(), fv);
-                        }
-                    } else if !mapping.doc_only {
-                        return Err(format!(
-                            "field '{}' (source '{}') is null but has no default",
-                            mapping.target, mapping.source
-                        ));
-                    }
-                }
-            }
-            continue;
-        }
-
-        let dict = dictionaries.and_then(|d| d.get(&mapping.target));
-        if let Some(fv) = convert_field_with_dict(raw, mapping, apply_ms, dict) {
-            fields.insert(mapping.target.clone(), fv);
-        }
-    }
-
-    Ok((slot, Document { fields }))
-}
-
-/// Apply computed sort field values to a document.
-/// Call this after `json_to_document` when the engine config is available.
-/// For each computed sort field, reads source field values from the document,
-/// applies the computation (e.g., GREATEST), and inserts the result.
-pub fn apply_computed_sort_fields(doc: &mut Document, sort_fields: &[crate::config::SortFieldConfig]) {
-    use crate::mutation::apply_computed_op;
-
-    for sort_field in sort_fields {
-        if let Some(ref computed) = sort_field.computed {
-            let values: Vec<u32> = computed.source_fields.iter()
-                .filter_map(|f| {
-                    doc.fields.get(f).and_then(|fv| match fv {
-                        FieldValue::Single(Value::Integer(v)) => Some((*v).max(0) as u32),
-                        _ => None,
-                    })
-                })
-                .collect();
-            if !values.is_empty() {
-                let result = apply_computed_op(&computed.op, &values);
-                doc.fields.insert(
-                    sort_field.name.clone(),
-                    FieldValue::Single(Value::Integer(result as i64)),
-                );
-            }
-        }
-    }
-}
-
-/// Convert a raw serde_json Value field to a FieldValue.
-#[allow(dead_code)] // Used by test helpers
-fn convert_field(raw: &serde_json::Value, mapping: &FieldMapping, ms_to_seconds: bool) -> Option<FieldValue> {
-    convert_field_with_dict(raw, mapping, ms_to_seconds, None)
-}
-
-/// Convert a raw serde_json Value field to a FieldValue, with optional dictionary.
-pub fn convert_field_with_dict(
-    raw: &serde_json::Value,
-    mapping: &FieldMapping,
-    ms_to_seconds: bool,
-    dictionary: Option<&FieldDictionary>,
-) -> Option<FieldValue> {
-    match mapping.value_type {
-        FieldValueType::Integer => {
-            let n = if let Some(n) = raw.as_i64() {
-                n
-            } else if let Some(n) = raw.as_u64() {
-                n as i64
-            } else if let Some(n) = raw.as_f64() {
-                n as i64
-            } else {
-                return None;
-            };
-            let n = if ms_to_seconds {
-                ((n / 1000) as u32) as i64
-            } else {
-                n
-            };
-            Some(FieldValue::Single(Value::Integer(n)))
-        }
-        FieldValueType::Boolean => {
-            let b = raw.as_bool()?;
-            Some(FieldValue::Single(Value::Bool(b)))
-        }
-        FieldValueType::String => {
-            let s = raw.as_str()?;
-            Some(FieldValue::Single(Value::String(s.to_string())))
-        }
-        FieldValueType::MappedString => {
-            let s = raw.as_str()?;
-            let map = mapping.string_map.as_ref()?;
-            let lookup = if mapping.case_sensitive {
-                std::borrow::Cow::Borrowed(s)
-            } else {
-                std::borrow::Cow::Owned(s.to_lowercase())
-            };
-            let n = map.get(lookup.as_ref()).copied().unwrap_or(0);
-            Some(FieldValue::Single(Value::Integer(n)))
-        }
-        FieldValueType::LowCardinalityString => {
-            let s = raw.as_str()?;
-            if let Some(dict) = dictionary {
-                let n = dict.get_or_insert(s);
-                Some(FieldValue::Single(Value::Integer(n)))
-            } else {
-                // Without a dictionary, store as 0 (unknown)
-                Some(FieldValue::Single(Value::Integer(0)))
-            }
-        }
-        FieldValueType::IntegerArray => {
-            let arr = raw.as_array()?;
-            if arr.is_empty() {
-                return None;
-            }
-            let values: Vec<Value> = arr
-                .iter()
-                .filter_map(|v| {
-                    v.as_i64()
-                        .or_else(|| v.as_u64().map(|n| n as i64))
-                        .map(Value::Integer)
-                })
-                .collect();
-            if values.is_empty() {
-                None
-            } else {
-                Some(FieldValue::Multi(values))
-            }
-        }
-        FieldValueType::ExistsBoolean => Some(FieldValue::Single(Value::Bool(true))),
-    }
-}
-
-fn memrchr_newline(data: &[u8]) -> Option<usize> {
-    data.iter().rposition(|&b| b == b'\n')
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_json_to_stored_doc_integer() {
-        let schema = DataSchema {
-            id_field: "id".into(),
-            schema_version: 1,
-            fields: vec![FieldMapping {
-                source: "count".into(),
-                target: "count".into(),
-                value_type: FieldValueType::Integer,
-                fallback: None,
-                string_map: None,
-                doc_only: false,
-                filter_only: false,
-                ms_to_seconds: false,
-                truncate_u32: false,
-                case_sensitive: false,
-                default_value: None,
-                nullable: false,
-            }],
-        };
-        let json: serde_json::Value = serde_json::json!({"id": 42, "count": 100});
-        let doc = json_to_stored_doc(&json, &schema);
-        assert_eq!(
-            doc.fields.get("id"),
-            Some(&FieldValue::Single(Value::Integer(42)))
-        );
-        assert_eq!(
-            doc.fields.get("count"),
-            Some(&FieldValue::Single(Value::Integer(100)))
-        );
-    }
-
-    #[test]
-    fn test_json_to_stored_doc_fallback() {
-        let schema = DataSchema {
-            id_field: "id".into(),
-            schema_version: 1,
-            fields: vec![FieldMapping {
-                source: "primary".into(),
-                target: "val".into(),
-                value_type: FieldValueType::Integer,
-                fallback: Some("secondary".into()),
-                string_map: None,
-                doc_only: false,
-                filter_only: false,
-                ms_to_seconds: false,
-                truncate_u32: false,
-                case_sensitive: false,
-                default_value: None,
-                nullable: false,
-            }],
-        };
-        let json: serde_json::Value = serde_json::json!({"id": 1, "secondary": 99});
-        let doc = json_to_stored_doc(&json, &schema);
-        assert_eq!(
-            doc.fields.get("val"),
-            Some(&FieldValue::Single(Value::Integer(99)))
-        );
-    }
-
-    #[test]
-    fn test_json_to_stored_doc_mapped_string() {
-        let mut map = HashMap::new();
-        map.insert("image".into(), 1);
-        map.insert("video".into(), 2);
-
-        let schema = DataSchema {
-            id_field: "id".into(),
-            schema_version: 1,
-            fields: vec![FieldMapping {
-                source: "type".into(),
-                target: "type".into(),
-                value_type: FieldValueType::MappedString,
-                fallback: None,
-                string_map: Some(map),
-                doc_only: false,
-                filter_only: false,
-                ms_to_seconds: false,
-                truncate_u32: false,
-                case_sensitive: false,
-                default_value: None,
-                nullable: false,
-            }],
-        };
-        let json: serde_json::Value = serde_json::json!({"id": 1, "type": "image"});
-        let doc = json_to_stored_doc(&json, &schema);
-        assert_eq!(
-            doc.fields.get("type"),
-            Some(&FieldValue::Single(Value::Integer(1)))
-        );
-    }
-
-    #[test]
-    fn test_json_to_stored_doc_mapped_string_case_insensitive() {
-        let mut map = HashMap::new();
-        map.insert("Image".into(), 1);
-        map.insert("Video".into(), 2);
-
-        let mut schema = DataSchema {
-            id_field: "id".into(),
-            schema_version: 1,
-            fields: vec![FieldMapping {
-                source: "type".into(),
-                target: "type".into(),
-                value_type: FieldValueType::MappedString,
-                fallback: None,
-                string_map: Some(map),
-                doc_only: false,
-                filter_only: false,
-                ms_to_seconds: false,
-                truncate_u32: false,
-                case_sensitive: false, // default
-                default_value: None,
-                nullable: false,
-            }],
-        };
-        schema.normalize_string_maps();
-
-        // Uppercase input matches lowercase-normalized map key
-        let json: serde_json::Value = serde_json::json!({"id": 1, "type": "IMAGE"});
-        let doc = json_to_stored_doc(&json, &schema);
-        assert_eq!(
-            doc.fields.get("type"),
-            Some(&FieldValue::Single(Value::Integer(1)))
-        );
-
-        // Mixed case input also matches
-        let json2: serde_json::Value = serde_json::json!({"id": 2, "type": "Video"});
-        let doc2 = json_to_stored_doc(&json2, &schema);
-        assert_eq!(
-            doc2.fields.get("type"),
-            Some(&FieldValue::Single(Value::Integer(2)))
-        );
-    }
-
-    #[test]
-    fn test_json_to_stored_doc_mapped_string_case_sensitive() {
-        let mut map = HashMap::new();
-        map.insert("Image".into(), 1);
-        map.insert("Video".into(), 2);
-
-        let mut schema = DataSchema {
-            id_field: "id".into(),
-            schema_version: 1,
-            fields: vec![FieldMapping {
-                source: "type".into(),
-                target: "type".into(),
-                value_type: FieldValueType::MappedString,
-                fallback: None,
-                string_map: Some(map),
-                doc_only: false,
-                filter_only: false,
-                ms_to_seconds: false,
-                truncate_u32: false,
-                case_sensitive: true,
-                default_value: None,
-                nullable: false,
-            }],
-        };
-        schema.normalize_string_maps();
-
-        // Exact case matches
-        let json: serde_json::Value = serde_json::json!({"id": 1, "type": "Image"});
-        let doc = json_to_stored_doc(&json, &schema);
-        assert_eq!(
-            doc.fields.get("type"),
-            Some(&FieldValue::Single(Value::Integer(1)))
-        );
-
-        // Wrong case falls back to 0
-        let json2: serde_json::Value = serde_json::json!({"id": 2, "type": "image"});
-        let doc2 = json_to_stored_doc(&json2, &schema);
-        assert_eq!(
-            doc2.fields.get("type"),
-            Some(&FieldValue::Single(Value::Integer(0)))
-        );
-    }
-
-    #[test]
-    fn test_json_to_stored_doc_boolean() {
-        let schema = DataSchema {
-            id_field: "id".into(),
-            schema_version: 1,
-            fields: vec![FieldMapping {
-                source: "hasMeta".into(),
-                target: "hasMeta".into(),
-                value_type: FieldValueType::Boolean,
-                fallback: None,
-                string_map: None,
-                doc_only: false,
-                filter_only: false,
-                ms_to_seconds: false,
-                truncate_u32: false,
-                case_sensitive: false,
-                default_value: None,
-                nullable: false,
-            }],
-        };
-        let json: serde_json::Value = serde_json::json!({"id": 1, "hasMeta": true});
-        let doc = json_to_stored_doc(&json, &schema);
-        assert_eq!(
-            doc.fields.get("hasMeta"),
-            Some(&FieldValue::Single(Value::Bool(true)))
-        );
-    }
-
-    #[test]
-    fn test_json_to_stored_doc_integer_array() {
-        let schema = DataSchema {
-            id_field: "id".into(),
-            schema_version: 1,
-            fields: vec![FieldMapping {
-                source: "tagIds".into(),
-                target: "tagIds".into(),
-                value_type: FieldValueType::IntegerArray,
-                fallback: None,
-                string_map: None,
-                doc_only: false,
-                filter_only: false,
-                ms_to_seconds: false,
-                truncate_u32: false,
-                case_sensitive: false,
-                default_value: None,
-                nullable: false,
-            }],
-        };
-        let json: serde_json::Value = serde_json::json!({"id": 1, "tagIds": [10, 20, 30]});
-        let doc = json_to_stored_doc(&json, &schema);
-        assert_eq!(
-            doc.fields.get("tagIds"),
-            Some(&FieldValue::Multi(vec![
-                Value::Integer(10),
-                Value::Integer(20),
-                Value::Integer(30),
-            ]))
-        );
-    }
-
-    #[test]
-    fn test_json_to_stored_doc_truncate_u32() {
-        let schema = DataSchema {
-            id_field: "id".into(),
-            schema_version: 1,
-            fields: vec![FieldMapping {
-                source: "ts".into(),
-                target: "ts".into(),
-                value_type: FieldValueType::Integer,
-                fallback: None,
-                string_map: None,
-                doc_only: false,
-                filter_only: false,
-                ms_to_seconds: true,
-                truncate_u32: false,
-                case_sensitive: false,
-                default_value: None,
-                nullable: false,
-            }],
-        };
-        // Millisecond timestamp → divide by 1000, then cast to u32
-        let ms_val: i64 = 1_710_000_000_000; // March 2024 in ms
-        let json: serde_json::Value = serde_json::json!({"id": 1, "ts": ms_val});
-        let doc = json_to_stored_doc(&json, &schema);
-        let expected = (ms_val / 1000) as i64; // 1_710_000_000 — valid seconds
-        assert_eq!(
-            doc.fields.get("ts"),
-            Some(&FieldValue::Single(Value::Integer(expected)))
-        );
-
-    }
-
-    #[test]
-    fn test_ms_to_seconds_with_fallback() {
-        // Mirrors the real civitai config: source=sortAtUnix (ms), fallback=sortAt (seconds)
-        let schema = DataSchema {
-            id_field: "id".into(),
-            schema_version: 1,
-            fields: vec![FieldMapping {
-                source: "sortAtUnix".into(),
-                target: "sortAt".into(),
-                value_type: FieldValueType::Integer,
-                fallback: Some("sortAt".into()),
-                string_map: None,
-                doc_only: false,
-                filter_only: false,
-                ms_to_seconds: true,
-                truncate_u32: false,
-                case_sensitive: false,
-                default_value: None,
-                nullable: false,
-            }],
-        };
-
-        // Case 1: sortAtUnix present (milliseconds) → divide by 1000
-        let json1: serde_json::Value =
-            serde_json::json!({"id": 1, "sortAtUnix": 1_684_867_905_000_i64});
-        let doc1 = json_to_stored_doc(&json1, &schema);
-        assert_eq!(
-            doc1.fields.get("sortAt"),
-            Some(&FieldValue::Single(Value::Integer(1_684_867_905))),
-            "ms timestamp should be divided by 1000"
-        );
-
-        // Case 2: sortAtUnix missing, falls back to sortAt (seconds) → NO division
-        let json2: serde_json::Value =
-            serde_json::json!({"id": 2, "sortAt": 1_684_867_905_i64});
-        let doc2 = json_to_stored_doc(&json2, &schema);
-        assert_eq!(
-            doc2.fields.get("sortAt"),
-            Some(&FieldValue::Single(Value::Integer(1_684_867_905))),
-            "fallback (seconds) should NOT be divided by 1000"
-        );
-
-        // Case 3: sortAtUnix present but null, falls back to sortAt (seconds)
-        let json3: serde_json::Value =
-            serde_json::json!({"id": 3, "sortAtUnix": null, "sortAt": 1_684_867_905_i64});
-        let doc3 = json_to_stored_doc(&json3, &schema);
-        assert_eq!(
-            doc3.fields.get("sortAt"),
-            Some(&FieldValue::Single(Value::Integer(1_684_867_905))),
-            "null primary should fall back to seconds without division"
-        );
-
-        // Case 4: Both missing → field absent
-        let json4: serde_json::Value = serde_json::json!({"id": 4});
-        let doc4 = json_to_stored_doc(&json4, &schema);
-        assert_eq!(
-            doc4.fields.get("sortAt"),
-            None,
-            "both missing → field should be absent"
-        );
-    }
-
-    #[test]
-    fn test_ms_to_seconds_json_to_document() {
-        // Same test through json_to_document (the production path for upserts)
-        let schema = DataSchema {
-            id_field: "id".into(),
-            schema_version: 1,
-            fields: vec![FieldMapping {
-                source: "sortAtUnix".into(),
-                target: "sortAt".into(),
-                value_type: FieldValueType::Integer,
-                fallback: Some("sortAt".into()),
-                string_map: None,
-                doc_only: false,
-                filter_only: false,
-                ms_to_seconds: true,
-                truncate_u32: false,
-                case_sensitive: false,
-                default_value: None,
-                nullable: false,
-            }],
-        };
-
-        // Primary (ms) → divided
-        let json1 = serde_json::json!({"id": 100, "sortAtUnix": 1_684_867_905_000_i64});
-        let (slot, doc1) = json_to_document(&json1, &schema).unwrap();
-        assert_eq!(slot, 100);
-        assert_eq!(
-            doc1.fields.get("sortAt"),
-            Some(&FieldValue::Single(Value::Integer(1_684_867_905)))
-        );
-
-        // Fallback (seconds) → not divided
-        let json2 = serde_json::json!({"id": 200, "sortAt": 1_684_867_905_i64});
-        let (slot2, doc2) = json_to_document(&json2, &schema).unwrap();
-        assert_eq!(slot2, 200);
-        assert_eq!(
-            doc2.fields.get("sortAt"),
-            Some(&FieldValue::Single(Value::Integer(1_684_867_905)))
-        );
-    }
-
-    #[test]
-    fn test_ms_to_seconds_extract_integer() {
-        // Direct test of the extraction function
-        let ms = serde_json::json!(1_684_867_905_000_i64);
-        assert_eq!(extract_integer(&ms, true), Some(1_684_867_905));
-        assert_eq!(extract_integer(&ms, false), Some(1_684_867_905_000));
-
-        let sec = serde_json::json!(1_684_867_905_i64);
-        assert_eq!(extract_integer(&sec, true), Some(1_684_867));
-        assert_eq!(extract_integer(&sec, false), Some(1_684_867_905));
-    }
-
-    #[test]
-    fn test_json_to_stored_doc_string() {
-        let schema = DataSchema {
-            id_field: "id".into(),
-            schema_version: 1,
-            fields: vec![FieldMapping {
-                source: "url".into(),
-                target: "url".into(),
-                value_type: FieldValueType::String,
-                fallback: None,
-                string_map: None,
-                doc_only: true,
-                filter_only: false,
-                ms_to_seconds: false,
-                truncate_u32: false,
-                case_sensitive: false,
-                default_value: None,
-                nullable: false,
-            }],
-        };
-        let json: serde_json::Value = serde_json::json!({"id": 1, "url": "http://example.com"});
-        let doc = json_to_stored_doc(&json, &schema);
-        assert_eq!(
-            doc.fields.get("url"),
-            Some(&FieldValue::Single(Value::String(
-                "http://example.com".into()
-            )))
-        );
-    }
-
-    #[test]
-    fn test_json_to_stored_doc_missing_field_skipped() {
-        let schema = DataSchema {
-            id_field: "id".into(),
-            schema_version: 1,
-            fields: vec![FieldMapping {
-                source: "missing".into(),
-                target: "val".into(),
-                value_type: FieldValueType::Integer,
-                fallback: None,
-                string_map: None,
-                doc_only: false,
-                filter_only: false,
-                ms_to_seconds: false,
-                truncate_u32: false,
-                case_sensitive: false,
-                default_value: None,
-                nullable: false,
-            }],
-        };
-        let json: serde_json::Value = serde_json::json!({"id": 1});
-        let doc = json_to_stored_doc(&json, &schema);
-        assert!(doc.fields.get("val").is_none());
-    }
-
-    #[test]
-    fn test_json_to_stored_doc_null_field_skipped() {
-        let schema = DataSchema {
-            id_field: "id".into(),
-            schema_version: 1,
-            fields: vec![FieldMapping {
-                source: "val".into(),
-                target: "val".into(),
-                value_type: FieldValueType::Integer,
-                fallback: None,
-                string_map: None,
-                doc_only: false,
-                filter_only: false,
-                ms_to_seconds: false,
-                truncate_u32: false,
-                case_sensitive: false,
-                default_value: None,
-                nullable: false,
-            }],
-        };
-        let json: serde_json::Value = serde_json::json!({"id": 1, "val": null});
-        let doc = json_to_stored_doc(&json, &schema);
-        assert!(doc.fields.get("val").is_none());
-    }
-
-    #[test]
-    fn test_json_to_stored_doc_empty_array_skipped() {
-        let schema = DataSchema {
-            id_field: "id".into(),
-            schema_version: 1,
-            fields: vec![FieldMapping {
-                source: "tags".into(),
-                target: "tags".into(),
-                value_type: FieldValueType::IntegerArray,
-                fallback: None,
-                string_map: None,
-                doc_only: false,
-                filter_only: false,
-                ms_to_seconds: false,
-                truncate_u32: false,
-                case_sensitive: false,
-                default_value: None,
-                nullable: false,
-            }],
-        };
-        let json: serde_json::Value = serde_json::json!({"id": 1, "tags": []});
-        let doc = json_to_stored_doc(&json, &schema);
-        assert!(doc.fields.get("tags").is_none());
-    }
-
-    // -----------------------------------------------------------------------
-    // LowCardinalityString tests
-    // -----------------------------------------------------------------------
-
-    #[test]
-    fn test_low_cardinality_string_auto_assignment() {
-        use crate::dictionary::FieldDictionary;
-
-        let dict = FieldDictionary::new();
-        let mut dicts = HashMap::new();
-        dicts.insert("baseModel".to_string(), dict);
-
-        let schema = DataSchema {
-            id_field: "id".into(),
-            schema_version: 1,
-            fields: vec![FieldMapping {
-                source: "baseModel".into(),
-                target: "baseModel".into(),
-                value_type: FieldValueType::LowCardinalityString,
-                fallback: None,
-                string_map: None,
-                doc_only: false,
-                filter_only: false,
-                ms_to_seconds: false,
-                truncate_u32: false,
-                case_sensitive: false,
-                default_value: None,
-                nullable: false,
-            }],
-        };
-
-        // First document — "SD 1.5" gets assigned a key
-        let json1 = serde_json::json!({"id": 1, "baseModel": "SD 1.5"});
-        let (slot1, doc1) = json_to_document_with_dicts(&json1, &schema, Some(&dicts)).unwrap();
-        assert_eq!(slot1, 1);
-        let k1 = match doc1.fields.get("baseModel") {
-            Some(FieldValue::Single(Value::Integer(n))) => *n,
-            _ => panic!("expected integer"),
-        };
-        assert!(k1 >= 1, "auto-assigned key should be >= 1");
-
-        // Second document — same string gets same key
-        let json2 = serde_json::json!({"id": 2, "baseModel": "SD 1.5"});
-        let (_, doc2) = json_to_document_with_dicts(&json2, &schema, Some(&dicts)).unwrap();
-        let k2 = match doc2.fields.get("baseModel") {
-            Some(FieldValue::Single(Value::Integer(n))) => *n,
-            _ => panic!("expected integer"),
-        };
-        assert_eq!(k1, k2, "same string should get same key");
-
-        // Third document — different string gets different key
-        let json3 = serde_json::json!({"id": 3, "baseModel": "SDXL 1.0"});
-        let (_, doc3) = json_to_document_with_dicts(&json3, &schema, Some(&dicts)).unwrap();
-        let k3 = match doc3.fields.get("baseModel") {
-            Some(FieldValue::Single(Value::Integer(n))) => *n,
-            _ => panic!("expected integer"),
-        };
-        assert_ne!(k1, k3, "different string should get different key");
-    }
-
-    #[test]
-    fn test_low_cardinality_string_case_insensitive() {
-        use crate::dictionary::FieldDictionary;
-
-        let dict = FieldDictionary::new();
-        let mut dicts = HashMap::new();
-        dicts.insert("type".to_string(), dict);
-
-        let schema = DataSchema {
-            id_field: "id".into(),
-            schema_version: 1,
-            fields: vec![FieldMapping {
-                source: "type".into(),
-                target: "type".into(),
-                value_type: FieldValueType::LowCardinalityString,
-                fallback: None,
-                string_map: None,
-                doc_only: false,
-                filter_only: false,
-                ms_to_seconds: false,
-                truncate_u32: false,
-                case_sensitive: false,
-                default_value: None,
-                nullable: false,
-            }],
-        };
-
-        let json1 = serde_json::json!({"id": 1, "type": "Image"});
-        let (_, doc1) = json_to_document_with_dicts(&json1, &schema, Some(&dicts)).unwrap();
-        let k1 = match doc1.fields.get("type") {
-            Some(FieldValue::Single(Value::Integer(n))) => *n,
-            _ => panic!("expected integer"),
-        };
-
-        // Different casing should get same key
-        let json2 = serde_json::json!({"id": 2, "type": "IMAGE"});
-        let (_, doc2) = json_to_document_with_dicts(&json2, &schema, Some(&dicts)).unwrap();
-        let k2 = match doc2.fields.get("type") {
-            Some(FieldValue::Single(Value::Integer(n))) => *n,
-            _ => panic!("expected integer"),
-        };
-        assert_eq!(k1, k2, "case-insensitive: same key for different casing");
-
-        // Original casing preserved in dictionary
-        let dict = dicts.get("type").unwrap();
-        let snap = dict.snapshot();
-        assert_eq!(snap.originals.get("image"), Some(&"Image".to_string()));
-    }
-
-    #[test]
-    fn test_low_cardinality_string_extract_filter_value() {
-        use crate::dictionary::FieldDictionary;
-
-        let dict = FieldDictionary::new();
-        let mapping = FieldMapping {
-            source: "color".into(),
-            target: "color".into(),
-            value_type: FieldValueType::LowCardinalityString,
-            fallback: None,
-            string_map: None,
-            doc_only: false,
-            filter_only: false,
-            ms_to_seconds: false,
-            truncate_u32: false,
-            case_sensitive: false,
-            default_value: None,
-            nullable: false,
-        };
-
-        let mut field_map: HashMap<u64, RoaringBitmap> = HashMap::new();
-
-        let raw1 = serde_json::json!("Red");
-        extract_filter_value_with_dict(&raw1, &mapping, 100, &mut field_map, false, Some(&dict));
-
-        let raw2 = serde_json::json!("Blue");
-        extract_filter_value_with_dict(&raw2, &mapping, 200, &mut field_map, false, Some(&dict));
-
-        let raw3 = serde_json::json!("red"); // same as "Red" (case insensitive)
-        extract_filter_value_with_dict(&raw3, &mapping, 300, &mut field_map, false, Some(&dict));
-
-        // "Red" and "red" should have the same key
-        let red_key = dict.get("Red").unwrap() as u64;
-        let blue_key = dict.get("Blue").unwrap() as u64;
-        assert_ne!(red_key, blue_key);
-
-        let red_bm = field_map.get(&red_key).unwrap();
-        assert!(red_bm.contains(100));
-        assert!(red_bm.contains(300)); // "red" maps to same key as "Red"
-        assert!(!red_bm.contains(200));
-
-        let blue_bm = field_map.get(&blue_key).unwrap();
-        assert!(blue_bm.contains(200));
-        assert!(!blue_bm.contains(100));
-    }
-
-    #[test]
-    fn test_low_cardinality_string_dictionary_persistence() {
-        use crate::dictionary::{FieldDictionary, save_dictionary, load_dictionary};
-
-        let dict = FieldDictionary::new();
-        dict.get_or_insert("Alpha");
-        dict.get_or_insert("Beta");
-        dict.get_or_insert("Gamma");
-
-        let dir = tempfile::tempdir().unwrap();
-        let path = dir.path().join("test_field.dict");
-
-        let snap = dict.snapshot();
-        save_dictionary(&snap, &path).unwrap();
-
-        let loaded_snap = load_dictionary(&path).unwrap().unwrap();
-        let dict2 = FieldDictionary::from_snapshot(&loaded_snap);
-
-        // Same mappings after reload
-        assert_eq!(dict2.get("alpha"), dict.get("alpha"));
-        assert_eq!(dict2.get("beta"), dict.get("beta"));
-        assert_eq!(dict2.get("gamma"), dict.get("gamma"));
-
-        // Original casing preserved
-        assert_eq!(loaded_snap.originals.get("alpha"), Some(&"Alpha".to_string()));
-    }
-}
-
-#[cfg(test)]
-mod checkpoint_tests {
-    use super::*;
-
-    #[test]
-    fn test_checkpoint_roundtrip() {
-        let filter_names: Vec<String> = vec!["nsfwLevel", "userId", "tagIds"]
-            .into_iter().map(String::from).collect();
-        let sort_configs: Vec<(String, u8)> = vec![("sortAt".to_string(), 32), ("id".to_string(), 32)];
-
-        let mut accum = BitmapAccum::new(&filter_names, &sort_configs);
-
-        // Add alive bits
-        for i in [100u32, 200, 300, 50000] {
-            accum.alive.insert(i);
-        }
-
-        // Add filter values
-        if let Some(fm) = accum.filter_maps.get_mut("nsfwLevel") {
-            fm.entry(1).or_insert_with(RoaringBitmap::new).insert(100);
-            fm.entry(1).or_insert_with(RoaringBitmap::new).insert(200);
-            fm.entry(8).or_insert_with(RoaringBitmap::new).insert(300);
-        }
-        if let Some(fm) = accum.filter_maps.get_mut("userId") {
-            fm.entry(42).or_insert_with(RoaringBitmap::new).insert(100);
-            fm.entry(42).or_insert_with(RoaringBitmap::new).insert(300);
-            fm.entry(99).or_insert_with(RoaringBitmap::new).insert(200);
-        }
-        if let Some(fm) = accum.filter_maps.get_mut("tagIds") {
-            fm.entry(1000).or_insert_with(RoaringBitmap::new).insert(100);
-            fm.entry(1000).or_insert_with(RoaringBitmap::new).insert(200);
-            fm.entry(2000).or_insert_with(RoaringBitmap::new).insert(300);
-        }
-
-        // Add sort bits (sortAt = 1700000000 for slot 100)
-        let val: u32 = 1700000000;
-        if let Some(sm) = accum.sort_maps.get_mut("sortAt") {
-            for bit in 0..32usize {
-                if (val >> bit) & 1 == 1 {
-                    sm.entry(bit).or_insert_with(RoaringBitmap::new).insert(100);
-                }
-            }
-        }
-
-        // Save checkpoint
-        let dir = tempfile::tempdir().unwrap();
-        let path = dir.path().join("test.ckpt");
-        accum.save_checkpoint(&path).unwrap();
-
-        // Load checkpoint
-        let loaded = BitmapAccum::load_checkpoint(&path).unwrap();
-
-        // Verify alive
-        assert_eq!(loaded.alive.len(), 4);
-        assert!(loaded.alive.contains(100));
-        assert!(loaded.alive.contains(200));
-        assert!(loaded.alive.contains(300));
-        assert!(loaded.alive.contains(50000));
-
-        // Verify filters
-        let nsfw = loaded.filter_maps.get("nsfwLevel").unwrap();
-        assert_eq!(nsfw.get(&1).unwrap().len(), 2);
-        assert_eq!(nsfw.get(&8).unwrap().len(), 1);
-
-        let users = loaded.filter_maps.get("userId").unwrap();
-        assert_eq!(users.get(&42).unwrap().len(), 2);
-        assert_eq!(users.get(&99).unwrap().len(), 1);
-
-        let tags = loaded.filter_maps.get("tagIds").unwrap();
-        assert_eq!(tags.get(&1000).unwrap().len(), 2);
-        assert_eq!(tags.get(&2000).unwrap().len(), 1);
-
-        // Verify sort bits
-        let sort_at = loaded.sort_maps.get("sortAt").unwrap();
-        // Reconstruct the value from bits
-        let mut reconstructed: u32 = 0;
-        for bit in 0..32usize {
-            if let Some(bm) = sort_at.get(&bit) {
-                if bm.contains(100) {
-                    reconstructed |= 1 << bit;
-                }
-            }
-        }
-        assert_eq!(reconstructed, 1700000000);
-    }
-
-    #[test]
-    fn test_checkpoint_empty_accum() {
-        let filter_names: Vec<String> = vec!["field1".to_string()];
-        let sort_configs: Vec<(String, u8)> = vec![("sort1".to_string(), 16)];
-
-        let accum = BitmapAccum::new(&filter_names, &sort_configs);
-
-        let dir = tempfile::tempdir().unwrap();
-        let path = dir.path().join("empty.ckpt");
-        accum.save_checkpoint(&path).unwrap();
-
-        let loaded = BitmapAccum::load_checkpoint(&path).unwrap();
-        assert_eq!(loaded.alive.len(), 0);
-        assert!(loaded.filter_maps.get("field1").unwrap().is_empty());
-        assert!(loaded.sort_maps.get("sort1").unwrap().is_empty());
-    }
-
-    #[test]
-    fn test_filter_only_excluded_from_document() {
-        // filter_only fields should be bitmap-indexed but NOT stored in the Document
-        let schema = DataSchema {
-            id_field: "id".into(),
-            schema_version: 1,
-            fields: vec![
-                FieldMapping {
-                    source: "tagIds".into(),
-                    target: "tagIds".into(),
-                    value_type: FieldValueType::IntegerArray,
-                    fallback: None,
-                    string_map: None,
-                    doc_only: false,
-                    filter_only: false,
-                    ms_to_seconds: false,
-                    truncate_u32: false,
-                    case_sensitive: false,
-                    default_value: None,
-                    nullable: false,
-                },
-                FieldMapping {
-                    source: "collectionIds".into(),
-                    target: "collectionIds".into(),
-                    value_type: FieldValueType::IntegerArray,
-                    fallback: None,
-                    string_map: None,
-                    doc_only: false,
-                    filter_only: true,
-                    ms_to_seconds: false,
-                    truncate_u32: false,
-                    case_sensitive: false,
-                    default_value: None,
-                    nullable: false,
-                },
-            ],
-        };
-
-        let json = serde_json::json!({
-            "id": 42,
-            "tagIds": [10, 20],
-            "collectionIds": [100, 200]
-        });
-
-        // Document should have tagIds but NOT collectionIds
-        let (slot, doc) = json_to_document(&json, &schema).unwrap();
-        assert_eq!(slot, 42);
-        assert!(doc.fields.contains_key("tagIds"), "tagIds should be in Document");
-        assert!(!doc.fields.contains_key("collectionIds"), "filter_only field should be excluded from Document");
-
-        // StoredDoc should also exclude filter_only fields
-        let stored = json_to_stored_doc(&json, &schema);
-        assert!(stored.fields.contains_key("tagIds"));
-        assert!(!stored.fields.contains_key("collectionIds"));
-    }
-
-    #[test]
-    fn test_filter_only_still_indexed_in_bitmaps() {
-        // filter_only fields should still be bitmap-indexed
-        let schema = DataSchema {
-            id_field: "id".into(),
-            schema_version: 1,
-            fields: vec![FieldMapping {
-                source: "collectionIds".into(),
-                target: "collectionIds".into(),
-                value_type: FieldValueType::IntegerArray,
-                fallback: None,
-                string_map: None,
-                doc_only: false,
-                filter_only: true,
-                ms_to_seconds: false,
-                truncate_u32: false,
-                case_sensitive: false,
-                default_value: None,
-                nullable: false,
-            }],
-        };
-
-        let json = serde_json::json!({
-            "id": 42,
-            "collectionIds": [100, 200]
-        });
-
-        let filter_set: HashSet<String> = ["collectionIds".to_string()].into();
-        let sort_bits: HashMap<String, u8> = HashMap::new();
-        let mut filter_maps: HashMap<String, HashMap<u64, RoaringBitmap>> = HashMap::new();
-        filter_maps.insert("collectionIds".to_string(), HashMap::new());
-        let mut sort_maps: HashMap<String, HashMap<usize, RoaringBitmap>> = HashMap::new();
-
-        extract_bitmaps(&json, &schema, &filter_set, &sort_bits, 42, &mut filter_maps, &mut sort_maps);
-
-        let coll_map = filter_maps.get("collectionIds").unwrap();
-        assert!(coll_map.get(&100).unwrap().contains(42), "slot 42 should be in bitmap for collectionId 100");
-        assert!(coll_map.get(&200).unwrap().contains(42), "slot 42 should be in bitmap for collectionId 200");
-    }
-
-    #[test]
-    fn test_filter_only_and_doc_only_mutually_exclusive() {
-        let schema = DataSchema {
-            id_field: "id".into(),
-            schema_version: 1,
-            fields: vec![FieldMapping {
-                source: "x".into(),
-                target: "x".into(),
-                value_type: FieldValueType::Integer,
-                fallback: None,
-                string_map: None,
-                doc_only: true,
-                filter_only: true,
-                ms_to_seconds: false,
-                truncate_u32: false,
-                case_sensitive: false,
-                default_value: None,
-                nullable: false,
-            }],
-        };
-        assert!(schema.validate().is_err(), "doc_only + filter_only should fail validation");
-    }
-}
diff --git a/src/sync/mod.rs b/src/sync/mod.rs
index db1d0d3..a0ab68a 100644
--- a/src/sync/mod.rs
+++ b/src/sync/mod.rs
@@ -5,19 +5,15 @@
 pub mod bitdex_client;
 pub mod bulk_loader;
 pub mod config;
-pub mod copy_queries;
 pub mod dump;
 pub mod dump_enrichment;
 pub mod dump_expression;
 pub mod dump_processor;
 pub mod ingester;
-pub mod loader;
 pub mod metrics_poller;
 pub mod op_dedup;
 pub mod ops;
 pub mod ops_poller;
 pub mod trigger_gen;
-pub mod progress;
 pub mod queries;
-pub mod slot_arena;
 pub mod sync_config;
diff --git a/src/sync/progress.rs b/src/sync/progress.rs
deleted file mode 100644
index 5ddd8c0..0000000
--- a/src/sync/progress.rs
+++ /dev/null
@@ -1,113 +0,0 @@
-//! Shared load progress state and HTTP endpoint for monitoring bulk loads.
-
-use std::sync::atomic::{AtomicU8, AtomicU64, Ordering};
-use std::sync::Arc;
-use std::time::Instant;
-
-use axum::extract::State;
-use axum::routing::get;
-use axum::{Json, Router};
-use serde_json::json;
-
-/// Shared progress state for bulk load monitoring.
-/// All fields are atomic for concurrent access from multiple stream tasks.
-pub struct LoadProgress {
-    /// Load phase: 0=setup, 1=streaming, 2=cleanup, 3=applying, 4=finalizing, 5=saving, 6=done
-    pub phase: AtomicU8,
-    /// Wall clock start time
-    start_time: Instant,
-    /// Per-stream row counters
-    pub image_rows: AtomicU64,
-    pub tag_rows: AtomicU64,
-    pub tool_rows: AtomicU64,
-    pub technique_rows: AtomicU64,
-    pub resource_rows: AtomicU64,
-    /// Number of streams that have completed (out of 5)
-    pub streams_done: AtomicU8,
-}
-
-impl LoadProgress {
-    pub fn new() -> Self {
-        Self {
-            phase: AtomicU8::new(0),
-            start_time: Instant::now(),
-            image_rows: AtomicU64::new(0),
-            tag_rows: AtomicU64::new(0),
-            tool_rows: AtomicU64::new(0),
-            technique_rows: AtomicU64::new(0),
-            resource_rows: AtomicU64::new(0),
-            streams_done: AtomicU8::new(0),
-        }
-    }
-
-    pub fn elapsed_secs(&self) -> f64 {
-        self.start_time.elapsed().as_secs_f64()
-    }
-
-    pub fn set_phase(&self, phase: u8) {
-        self.phase.store(phase, Ordering::Release);
-    }
-}
-
-async fn status_handler(State(progress): State<Arc<LoadProgress>>) -> Json<serde_json::Value> {
-    let elapsed = progress.elapsed_secs();
-    let phase = progress.phase.load(Ordering::Acquire);
-    let images = progress.image_rows.load(Ordering::Relaxed);
-    let tags = progress.tag_rows.load(Ordering::Relaxed);
-    let tools = progress.tool_rows.load(Ordering::Relaxed);
-    let techniques = progress.technique_rows.load(Ordering::Relaxed);
-    let resources = progress.resource_rows.load(Ordering::Relaxed);
-    let done = progress.streams_done.load(Ordering::Relaxed);
-
-    let phase_name = match phase {
-        0 => "setup",
-        1 => "streaming",
-        2 => "cleanup",
-        3 => "applying",
-        4 => "finalizing",
-        5 => "saving",
-        6 => "done",
-        _ => "unknown",
-    };
-
-    Json(json!({
-        "phase": phase_name,
-        "elapsed_secs": (elapsed * 10.0).round() / 10.0,
-        "streams_done": done,
-        "streams": {
-            "images": { "rows": images, "rate": if elapsed > 0.0 { (images as f64 / elapsed).round() } else { 0.0 } },
-            "tags": { "rows": tags, "rate": if elapsed > 0.0 { (tags as f64 / elapsed).round() } else { 0.0 } },
-            "tools": { "rows": tools, "rate": if elapsed > 0.0 { (tools as f64 / elapsed).round() } else { 0.0 } },
-            "techniques": { "rows": techniques, "rate": if elapsed > 0.0 { (techniques as f64 / elapsed).round() } else { 0.0 } },
-            "resources": { "rows": resources, "rate": if elapsed > 0.0 { (resources as f64 / elapsed).round() } else { 0.0 } },
-        }
-    }))
-}
-
-/// Spawn the progress HTTP server in a background tokio task.
-/// Returns the shutdown sender — send `()` to gracefully stop the server.
-pub fn spawn_progress_server(
-    port: u16,
-    progress: Arc<LoadProgress>,
-) -> tokio::sync::oneshot::Sender<()> {
-    let (tx, rx) = tokio::sync::oneshot::channel::<()>();
-
-    tokio::spawn(async move {
-        let app = Router::new()
-            .route("/status", get(status_handler))
-            .with_state(progress);
-
-        let addr = std::net::SocketAddr::from(([0, 0, 0, 0], port));
-        let listener = tokio::net::TcpListener::bind(addr).await.unwrap();
-        eprintln!("Progress server listening on {addr}");
-
-        axum::serve(listener, app)
-            .with_graceful_shutdown(async {
-                rx.await.ok();
-            })
-            .await
-            .ok();
-    });
-
-    tx
-}
diff --git a/src/sync/queries.rs b/src/sync/queries.rs
index 7e2b169..cebe73d 100644
--- a/src/sync/queries.rs
+++ b/src/sync/queries.rs
@@ -1,5 +1,4 @@
-use chrono::{DateTime, Utc};
-use sqlx::{FromRow, PgPool};
+use sqlx::PgPool;
 
 // ---------------------------------------------------------------------------
 // Setup SQL — creates BitdexOutbox table + all triggers
@@ -307,101 +306,6 @@ pub async fn get_max_ops_id(pool: &PgPool) -> Result<i64, String> {
     Ok(row.0.unwrap_or(0))
 }
 
-// ---------------------------------------------------------------------------
-// Row types
-// ---------------------------------------------------------------------------
-
-#[derive(Debug, FromRow)]
-pub struct ImageRow {
-    pub id: i64,
-    #[sqlx(rename = "postId")]
-    pub post_id: i64,
-    pub url: Option<String>,
-    #[sqlx(rename = "nsfwLevel")]
-    pub nsfw_level: Option<i32>,
-    pub hash: Option<String>,
-    #[sqlx(rename = "hideMeta")]
-    pub hide_meta: Option<bool>,
-    #[sqlx(rename = "type")]
-    pub image_type: Option<String>,
-    #[sqlx(rename = "userId")]
-    pub user_id: Option<i64>,
-    pub minor: Option<bool>,
-    pub poi: Option<bool>,
-    #[sqlx(rename = "blockedFor")]
-    pub blocked_for: Option<String>,
-    #[sqlx(rename = "scannedAt")]
-    pub scanned_at: Option<DateTime<Utc>>,
-    #[sqlx(rename = "createdAt")]
-    pub created_at: Option<DateTime<Utc>>,
-    pub meta: Option<serde_json::Value>,
-    #[sqlx(rename = "publishedAt")]
-    pub published_at: Option<DateTime<Utc>>,
-    pub availability: Option<String>,
-    #[sqlx(rename = "postedToId")]
-    pub posted_to_id: Option<i64>,
-    #[sqlx(rename = "sortAt")]
-    pub sort_at: Option<DateTime<Utc>>,
-    pub width: Option<i32>,
-    pub height: Option<i32>,
-}
-
-#[derive(Debug, FromRow)]
-pub struct TagRow {
-    #[sqlx(rename = "imageId")]
-    pub image_id: i32,
-    #[sqlx(rename = "tagId")]
-    pub tag_id: i32,
-}
-
-#[derive(Debug, FromRow)]
-pub struct ToolRow {
-    #[sqlx(rename = "imageId")]
-    pub image_id: i32,
-    #[sqlx(rename = "toolId")]
-    pub tool_id: i32,
-}
-
-#[derive(Debug, FromRow)]
-pub struct TechniqueRow {
-    #[sqlx(rename = "imageId")]
-    pub image_id: i32,
-    #[sqlx(rename = "techniqueId")]
-    pub technique_id: i32,
-}
-
-#[derive(Debug, FromRow)]
-pub struct ResourceRow {
-    #[sqlx(rename = "imageId")]
-    pub image_id: i32,
-    #[sqlx(rename = "baseModel")]
-    pub base_model: Option<String>,
-    #[sqlx(rename = "modelVersionIds")]
-    pub model_version_ids: Vec<i64>,
-    #[sqlx(rename = "modelVersionIdsManual")]
-    pub model_version_ids_manual: Vec<i64>,
-    #[sqlx(rename = "resourcePoi")]
-    pub resource_poi: Option<bool>,
-}
-
-#[derive(Debug, FromRow)]
-pub struct OutboxRow {
-    pub id: i64,
-    pub entity_id: i64,
-    pub event: String,
-}
-
-#[derive(Debug, FromRow)]
-pub struct MetricRow {
-    pub id: i64,
-    #[sqlx(rename = "reactionCount")]
-    pub reaction_count: i64,
-    #[sqlx(rename = "commentCount")]
-    pub comment_count: i64,
-    #[sqlx(rename = "collectedCount")]
-    pub collected_count: i64,
-}
-
 // ---------------------------------------------------------------------------
 // Query functions
 // ---------------------------------------------------------------------------
@@ -431,158 +335,6 @@ async fn check_triggers_exist(pool: &PgPool) -> Result<bool, sqlx::Error> {
     Ok(row.0 >= 9)
 }
 
-/// Get the max image ID for range-based bulk loading.
-pub async fn get_max_image_id(pool: &PgPool) -> Result<i64, sqlx::Error> {
-    let row: (i64,) = sqlx::query_as("SELECT COALESCE(MAX(id)::int8, 0) FROM \"Image\"")
-        .fetch_one(pool)
-        .await?;
-    Ok(row.0)
-}
-
-/// Fetch images by ID range (for bulk loading).
-pub async fn fetch_images_by_range(
-    pool: &PgPool,
-    start: i64,
-    end: i64,
-) -> Result<Vec<ImageRow>, sqlx::Error> {
-    sqlx::query_as::<_, ImageRow>(
-        r#"SELECT i.id::int8, i."postId"::int8, i.url, i."nsfwLevel", i.hash,
-           i."hideMeta", i.type::text, i."userId"::int8,
-           i.minor, i.poi, i."blockedFor", i."scannedAt"::timestamptz, i."createdAt"::timestamptz,
-           i.meta,
-           p."publishedAt"::timestamptz, p.availability::text, p."modelVersionId"::int8 as "postedToId",
-           GREATEST(p."publishedAt", i."scannedAt", i."createdAt")::timestamptz as "sortAt"
-        FROM "Image" i
-        JOIN "Post" p ON p.id = i."postId"
-        WHERE i.id >= $1 AND i.id < $2"#,
-    )
-    .bind(start)
-    .bind(end)
-    .fetch_all(pool)
-    .await
-}
-
-/// Fetch images by ID list (for sync/streaming).
-pub async fn fetch_images_by_ids(
-    pool: &PgPool,
-    ids: &[i64],
-) -> Result<Vec<ImageRow>, sqlx::Error> {
-    sqlx::query_as::<_, ImageRow>(
-        r#"SELECT i.id::int8, i."postId"::int8, i.url, i."nsfwLevel", i.hash,
-           i."hideMeta", i.type::text, i."userId"::int8,
-           i.minor, i.poi, i."blockedFor", i."scannedAt"::timestamptz, i."createdAt"::timestamptz,
-           i.meta,
-           p."publishedAt"::timestamptz, p.availability::text, p."modelVersionId"::int8 as "postedToId",
-           GREATEST(p."publishedAt", i."scannedAt", i."createdAt")::timestamptz as "sortAt",
-           i.width, i.height
-        FROM "Image" i
-        JOIN "Post" p ON p.id = i."postId"
-        WHERE i.id = ANY($1)"#,
-    )
-    .bind(ids)
-    .fetch_all(pool)
-    .await
-}
-
-/// Fetch tags for a batch of image IDs.
-pub async fn fetch_tags(pool: &PgPool, image_ids: &[i64]) -> Result<Vec<TagRow>, sqlx::Error> {
-    sqlx::query_as::<_, TagRow>(
-        r#"SELECT "imageId", "tagId" FROM "TagsOnImageDetails"
-        WHERE "imageId" = ANY($1) AND disabled = false"#,
-    )
-    .bind(image_ids)
-    .fetch_all(pool)
-    .await
-}
-
-/// Fetch tools for a batch of image IDs.
-pub async fn fetch_tools(pool: &PgPool, image_ids: &[i64]) -> Result<Vec<ToolRow>, sqlx::Error> {
-    sqlx::query_as::<_, ToolRow>(
-        r#"SELECT "imageId", "toolId" FROM "ImageTool" WHERE "imageId" = ANY($1)"#,
-    )
-    .bind(image_ids)
-    .fetch_all(pool)
-    .await
-}
-
-/// Fetch techniques for a batch of image IDs.
-pub async fn fetch_techniques(
-    pool: &PgPool,
-    image_ids: &[i64],
-) -> Result<Vec<TechniqueRow>, sqlx::Error> {
-    sqlx::query_as::<_, TechniqueRow>(
-        r#"SELECT "imageId", "techniqueId" FROM "ImageTechnique" WHERE "imageId" = ANY($1)"#,
-    )
-    .bind(image_ids)
-    .fetch_all(pool)
-    .await
-}
-
-/// Fetch resources + model versions for a batch of image IDs.
-pub async fn fetch_resources(
-    pool: &PgPool,
-    image_ids: &[i64],
-) -> Result<Vec<ResourceRow>, sqlx::Error> {
-    sqlx::query_as::<_, ResourceRow>(
-        r#"SELECT ir."imageId",
-           string_agg(CASE WHEN m.type = 'Checkpoint' THEN mv."baseModel" ELSE NULL END, '') as "baseModel",
-           coalesce(array_agg(mv.id::int8) FILTER (WHERE ir.detected), '{}') as "modelVersionIds",
-           coalesce(array_agg(mv.id::int8) FILTER (WHERE NOT ir.detected), '{}') as "modelVersionIdsManual",
-           bool_or(m.poi) as "resourcePoi"
-        FROM "ImageResourceNew" ir
-        JOIN "ModelVersion" mv ON ir."modelVersionId" = mv.id
-        JOIN "Model" m ON mv."modelId" = m.id
-        WHERE ir."imageId" = ANY($1)
-        GROUP BY ir."imageId""#,
-    )
-    .bind(image_ids)
-    .fetch_all(pool)
-    .await
-}
-
-/// Row type for CollectionItem enrichment.
-#[derive(Debug, FromRow)]
-pub struct CollectionItemRow {
-    #[sqlx(rename = "imageId")]
-    pub image_id: i64,
-    #[sqlx(rename = "collectionId")]
-    pub collection_id: i64,
-}
-
-/// Fetch accepted collection memberships for a batch of image IDs.
-pub async fn fetch_collections(
-    pool: &PgPool,
-    image_ids: &[i64],
-) -> Result<Vec<CollectionItemRow>, sqlx::Error> {
-    sqlx::query_as::<_, CollectionItemRow>(
-        r#"SELECT "imageId"::int8, "collectionId"::int8 FROM "CollectionItem"
-        WHERE "imageId" = ANY($1) AND status = 'ACCEPTED'"#,
-    )
-    .bind(image_ids)
-    .fetch_all(pool)
-    .await
-}
-
-// V1 poll_outbox and delete_outbox removed — V2 uses ops_poller with BitdexOps table.
-
-/// Poll outbox rows after a cursor position (FIFO — oldest first).
-pub async fn poll_outbox_from_cursor(
-    pool: &PgPool,
-    cursor: i64,
-    limit: i64,
-) -> Result<Vec<OutboxRow>, sqlx::Error> {
-    sqlx::query_as::<_, OutboxRow>(
-        r#"SELECT id, entity_id, event FROM "BitdexOutbox"
-        WHERE id > $1
-        ORDER BY id ASC
-        LIMIT $2"#,
-    )
-    .bind(cursor)
-    .bind(limit)
-    .fetch_all(pool)
-    .await
-}
-
 /// Report a replica's cursor to PG for outbox cleanup tracking.
 pub async fn upsert_cursor(
     pool: &PgPool,
@@ -611,177 +363,3 @@ pub async fn get_max_outbox_id(pool: &PgPool) -> Result<i64, sqlx::Error> {
     .await?;
     Ok(row.0.unwrap_or(0))
 }
-
-// ---------------------------------------------------------------------------
-// Streaming bulk queries — table-at-a-time loading
-// ---------------------------------------------------------------------------
-
-/// Row type for streaming tags ordered by tagId (for bitmap-efficient insertion).
-#[derive(Debug, FromRow)]
-pub struct StreamTagRow {
-    #[sqlx(rename = "tagId")]
-    pub tag_id: i64,
-    #[sqlx(rename = "imageId")]
-    pub image_id: i64,
-}
-
-/// Row type for streaming resources (one row per imageId, pre-aggregated).
-#[derive(Debug, FromRow)]
-pub struct StreamResourceRow {
-    #[sqlx(rename = "imageId")]
-    pub image_id: i64,
-    #[sqlx(rename = "baseModel")]
-    pub base_model: Option<String>,
-    #[sqlx(rename = "modelVersionIds")]
-    pub model_version_ids: Vec<i64>,
-    #[sqlx(rename = "modelVersionIdsManual")]
-    pub model_version_ids_manual: Vec<i64>,
-    #[sqlx(rename = "resourcePoi")]
-    pub resource_poi: Option<bool>,
-}
-
-/// Get max tag ID for range iteration.
-pub async fn get_max_tag_id(pool: &PgPool) -> Result<i64, sqlx::Error> {
-    let row: (i64,) = sqlx::query_as(
-        r#"SELECT COALESCE(MAX("tagId")::int8, 0) FROM "TagsOnImageDetails""#,
-    )
-    .fetch_one(pool)
-    .await?;
-    Ok(row.0)
-}
-
-/// Fetch tags by tagId range, ordered by tagId then imageId.
-/// This produces bitmap-optimal ordering: all images for one tagId together.
-pub async fn fetch_tags_by_tag_range(
-    pool: &PgPool,
-    start: i64,
-    end: i64,
-) -> Result<Vec<StreamTagRow>, sqlx::Error> {
-    sqlx::query_as::<_, StreamTagRow>(
-        r#"SELECT "tagId", "imageId" FROM "TagsOnImageDetails"
-        WHERE "tagId" >= $1 AND "tagId" < $2
-          AND disabled = false
-        ORDER BY "tagId", "imageId""#,
-    )
-    .bind(start)
-    .bind(end)
-    .fetch_all(pool)
-    .await
-}
-
-/// Get max tool ID for range iteration.
-pub async fn get_max_tool_id(pool: &PgPool) -> Result<i64, sqlx::Error> {
-    let row: (i64,) = sqlx::query_as(
-        r#"SELECT COALESCE(MAX("toolId")::int8, 0) FROM "ImageTool""#,
-    )
-    .fetch_one(pool)
-    .await?;
-    Ok(row.0)
-}
-
-/// Fetch tools by toolId range, ordered by toolId then imageId.
-pub async fn fetch_tools_by_tool_range(
-    pool: &PgPool,
-    start: i64,
-    end: i64,
-) -> Result<Vec<ToolRow>, sqlx::Error> {
-    sqlx::query_as::<_, ToolRow>(
-        r#"SELECT "imageId", "toolId" FROM "ImageTool"
-        WHERE "toolId" >= $1 AND "toolId" < $2
-        ORDER BY "toolId", "imageId""#,
-    )
-    .bind(start)
-    .bind(end)
-    .fetch_all(pool)
-    .await
-}
-
-/// Get max technique ID for range iteration.
-pub async fn get_max_technique_id(pool: &PgPool) -> Result<i64, sqlx::Error> {
-    let row: (i64,) = sqlx::query_as(
-        r#"SELECT COALESCE(MAX("techniqueId")::int8, 0) FROM "ImageTechnique""#,
-    )
-    .fetch_one(pool)
-    .await?;
-    Ok(row.0)
-}
-
-/// Fetch techniques by techniqueId range, ordered by techniqueId then imageId.
-pub async fn fetch_techniques_by_technique_range(
-    pool: &PgPool,
-    start: i64,
-    end: i64,
-) -> Result<Vec<TechniqueRow>, sqlx::Error> {
-    sqlx::query_as::<_, TechniqueRow>(
-        r#"SELECT "imageId", "techniqueId" FROM "ImageTechnique"
-        WHERE "techniqueId" >= $1 AND "techniqueId" < $2
-        ORDER BY "techniqueId", "imageId""#,
-    )
-    .bind(start)
-    .bind(end)
-    .fetch_all(pool)
-    .await
-}
-
-/// Fetch resources by imageId range (pre-aggregated per imageId).
-pub async fn fetch_resources_by_range(
-    pool: &PgPool,
-    start: i64,
-    end: i64,
-) -> Result<Vec<StreamResourceRow>, sqlx::Error> {
-    sqlx::query_as::<_, StreamResourceRow>(
-        r#"SELECT ir."imageId",
-           string_agg(CASE WHEN m.type = 'Checkpoint' THEN mv."baseModel" ELSE NULL END, '') as "baseModel",
-           coalesce(array_agg(mv.id) FILTER (WHERE ir.detected), '{}') as "modelVersionIds",
-           coalesce(array_agg(mv.id) FILTER (WHERE NOT ir.detected), '{}') as "modelVersionIdsManual",
-           bool_or(m.poi) as "resourcePoi"
-        FROM "ImageResourceNew" ir
-        JOIN "ModelVersion" mv ON ir."modelVersionId" = mv.id
-        JOIN "Model" m ON mv."modelId" = m.id
-        WHERE ir."imageId" >= $1 AND ir."imageId" < $2
-        GROUP BY ir."imageId""#,
-    )
-    .bind(start)
-    .bind(end)
-    .fetch_all(pool)
-    .await
-}
-
-/// Row type for streaming collection items ordered by collectionId.
-#[derive(Debug, FromRow)]
-pub struct StreamCollectionRow {
-    #[sqlx(rename = "collectionId")]
-    pub collection_id: i64,
-    #[sqlx(rename = "imageId")]
-    pub image_id: i64,
-}
-
-/// Get max collection ID for range iteration.
-pub async fn get_max_collection_id(pool: &PgPool) -> Result<i32, sqlx::Error> {
-    let row: (i32,) = sqlx::query_as(
-        r#"SELECT COALESCE(MAX("collectionId"), 0) FROM "CollectionItem" WHERE "imageId" IS NOT NULL"#,
-    )
-    .fetch_one(pool)
-    .await?;
-    Ok(row.0)
-}
-
-/// Fetch collection items by collectionId range, ordered by collectionId then imageId.
-/// Filters on imageId IS NOT NULL (image collections only) and status = 'ACCEPTED'.
-pub async fn fetch_collections_by_range(
-    pool: &PgPool,
-    start: i32,
-    end: i32,
-) -> Result<Vec<StreamCollectionRow>, sqlx::Error> {
-    sqlx::query_as::<_, StreamCollectionRow>(
-        r#"SELECT "collectionId", "imageId" FROM "CollectionItem"
-        WHERE "collectionId" >= $1 AND "collectionId" < $2
-          AND "imageId" IS NOT NULL
-          AND status = 'ACCEPTED'
-        ORDER BY "collectionId", "imageId""#,
-    )
-    .bind(start)
-    .bind(end)
-    .fetch_all(pool)
-    .await
-}
diff --git a/src/sync/slot_arena.rs b/src/sync/slot_arena.rs
deleted file mode 100644
index 2ee48ec..0000000
--- a/src/sync/slot_arena.rs
+++ /dev/null
@@ -1,1016 +0,0 @@
-//! Pre-allocated memory-mapped slot arena for bulk loading.
-//!
-//! Each document gets a fixed 512-byte slot in a memory-mapped file.
-//! Multiple table streams write to different field offsets concurrently.
-//! After all streams complete, a finalization pass reads populated slots,
-//! serializes to msgpack, and writes to the docstore.
-//!
-//! Slot layout (512 bytes):
-//! ```text
-//! Offset  Size  Field
-//! ------  ----  -----
-//!   0       8   present_mask (AtomicU64 LE)
-//!   8       8   image_id (u64 LE)
-//!  16       1   nsfw_level (u8)
-//!  17       8   user_id (u64 LE)
-//!  25       1   image_type_enum (u8: 0=image, 1=video, 2=audio)
-//!  26       8   sort_at (u64 LE, unix seconds)
-//!  34       1   poi (u8 bool)
-//!  35       1   minor (u8 bool)
-//!  36      80   url ([u8; 80], first byte = length, rest = UTF-8)
-//! 116      40   hash ([u8; 40], first byte = length)
-//! 156       1   tag_count (u8, max 48 inline)
-//! 157     192   tag_ids ([u32; 48] LE)
-//! 349       1   mv_count (u8, max 8 inline)
-//! 350      32   model_version_ids ([u32; 8] LE)
-//! 382       1   tool_count (u8, max 8)
-//! 383      32   tool_ids ([u32; 8] LE)
-//! 415       1   technique_count (u8, max 4)
-//! 416      16   technique_ids ([u32; 4] LE)
-//! 432       1   has_meta (u8 bool)
-//! 433       1   on_site (u8 bool)
-//! 434       8   post_id (u64 LE)
-//! 442       8   posted_to_id (u64 LE)
-//! 450       1   availability_enum (u8: 0=Public, 1=Private, 2=Unsearchable)
-//! 451       1   blocked_for_enum (u8: 0=none, 1=CSAM, 2=TOS, ...)
-//! 452       4   reaction_count (u32 LE)
-//! 456       4   comment_count (u32 LE)
-//! 460       4   collected_count (u32 LE)
-//! 464       1   mv_manual_count (u8, max 8)
-//! 465      32   model_version_ids_manual ([u32; 8] LE)
-//! 497       1   base_model_enum (u8)
-//! 498       1   resource_poi (u8 bool, OR'd with image poi)
-//! 499       8   published_at_unix (u64 LE, milliseconds)
-//! 507       5   _padding
-//! ---     ---
-//! 512     total
-//! ```
-
-use std::path::{Path, PathBuf};
-use std::sync::atomic::{AtomicU64, Ordering};
-use std::sync::Mutex;
-
-use memmap2::MmapMut;
-use roaring::RoaringBitmap;
-
-use crate::config::DataSchema;
-use crate::error::Result;
-
-// ---------------------------------------------------------------------------
-// Constants — slot layout
-// ---------------------------------------------------------------------------
-
-pub const SLOT_SIZE: usize = 512;
-
-// Field offsets
-pub const OFF_PRESENT: usize = 0;
-pub const OFF_IMAGE_ID: usize = 8;
-pub const OFF_NSFW: usize = 16;
-pub const OFF_USER_ID: usize = 17;
-pub const OFF_IMAGE_TYPE: usize = 25;
-pub const OFF_SORT_AT: usize = 26;
-pub const OFF_POI: usize = 34;
-pub const OFF_MINOR: usize = 35;
-pub const OFF_URL: usize = 36;
-pub const OFF_HASH: usize = 116;
-pub const OFF_TAG_COUNT: usize = 156;
-pub const OFF_TAG_IDS: usize = 157;
-pub const OFF_MV_COUNT: usize = 349;
-pub const OFF_MV_IDS: usize = 350;
-pub const OFF_TOOL_COUNT: usize = 382;
-pub const OFF_TOOL_IDS: usize = 383;
-pub const OFF_TECH_COUNT: usize = 415;
-pub const OFF_TECH_IDS: usize = 416;
-pub const OFF_HAS_META: usize = 432;
-pub const OFF_ON_SITE: usize = 433;
-pub const OFF_POST_ID: usize = 434;
-pub const OFF_POSTED_TO_ID: usize = 442;
-pub const OFF_AVAILABILITY: usize = 450;
-pub const OFF_BLOCKED_FOR: usize = 451;
-pub const OFF_REACTION_COUNT: usize = 452;
-pub const OFF_COMMENT_COUNT: usize = 456;
-pub const OFF_COLLECTED_COUNT: usize = 460;
-pub const OFF_MV_MANUAL_COUNT: usize = 464;
-pub const OFF_MV_MANUAL_IDS: usize = 465;
-pub const OFF_BASE_MODEL: usize = 497;
-pub const OFF_RESOURCE_POI: usize = 498;
-pub const OFF_PUBLISHED_AT: usize = 499;
-
-// Inline capacity limits
-pub const MAX_INLINE_TAGS: usize = 48;
-pub const MAX_INLINE_MVS: usize = 8;
-pub const MAX_INLINE_TOOLS: usize = 8;
-pub const MAX_INLINE_TECHNIQUES: usize = 4;
-pub const MAX_URL_LEN: usize = 79; // first byte = length
-pub const MAX_HASH_LEN: usize = 39;
-
-// Present mask bits
-pub const MASK_IMAGE_ID: u64 = 1 << 0;
-pub const MASK_NSFW: u64 = 1 << 1;
-pub const MASK_USER_ID: u64 = 1 << 2;
-pub const MASK_IMAGE_TYPE: u64 = 1 << 3;
-pub const MASK_SORT_AT: u64 = 1 << 4;
-pub const MASK_POI: u64 = 1 << 5;
-pub const MASK_MINOR: u64 = 1 << 6;
-pub const MASK_URL: u64 = 1 << 7;
-pub const MASK_HASH: u64 = 1 << 8;
-pub const MASK_TAGS: u64 = 1 << 9;
-pub const MASK_MV: u64 = 1 << 10;
-pub const MASK_TOOLS: u64 = 1 << 11;
-pub const MASK_TECHNIQUES: u64 = 1 << 12;
-pub const MASK_HAS_META: u64 = 1 << 13;
-pub const MASK_ON_SITE: u64 = 1 << 14;
-pub const MASK_POST_ID: u64 = 1 << 15;
-pub const MASK_POSTED_TO_ID: u64 = 1 << 16;
-pub const MASK_AVAILABILITY: u64 = 1 << 17;
-pub const MASK_BLOCKED_FOR: u64 = 1 << 18;
-pub const MASK_METRICS: u64 = 1 << 19;
-pub const MASK_MV_MANUAL: u64 = 1 << 20;
-pub const MASK_BASE_MODEL: u64 = 1 << 21;
-pub const MASK_RESOURCE_POI: u64 = 1 << 22;
-pub const MASK_PUBLISHED_AT: u64 = 1 << 23;
-
-// ---------------------------------------------------------------------------
-// Overflow for fields that exceed inline capacity
-// ---------------------------------------------------------------------------
-
-#[derive(Debug, Clone, Copy, PartialEq, Eq)]
-pub enum OverflowField {
-    Tags,
-    ModelVersionIds,
-    ModelVersionIdsManual,
-    Tools,
-    Techniques,
-}
-
-#[derive(Debug)]
-pub(crate) struct OverflowEntry {
-    pub(crate) slot: u32,
-    pub(crate) field: OverflowField,
-    pub(crate) data: Vec<u32>,
-}
-
-// ---------------------------------------------------------------------------
-// SlotData — assembled from slot + overflow for finalization
-// ---------------------------------------------------------------------------
-
-/// Complete data read from a slot, including overflow.
-#[derive(Debug)]
-pub struct SlotData {
-    pub image_id: u64,
-    pub nsfw_level: u8,
-    pub user_id: u64,
-    pub image_type: u8,
-    pub sort_at: u64,
-    pub poi: bool,
-    pub minor: bool,
-    pub url: Option<String>,
-    pub hash: Option<String>,
-    pub tag_ids: Vec<u32>,
-    pub model_version_ids: Vec<u32>,
-    pub model_version_ids_manual: Vec<u32>,
-    pub tool_ids: Vec<u32>,
-    pub technique_ids: Vec<u32>,
-    pub has_meta: bool,
-    pub on_site: bool,
-    pub post_id: u64,
-    pub posted_to_id: u64,
-    pub availability: u8,
-    pub blocked_for: u8,
-    pub reaction_count: u32,
-    pub comment_count: u32,
-    pub collected_count: u32,
-    pub base_model: u8,
-    pub resource_poi: bool,
-    pub published_at_unix: u64,
-}
-
-// ---------------------------------------------------------------------------
-// SlotArena
-// ---------------------------------------------------------------------------
-
-/// Memory-mapped slot arena for bulk loading.
-///
-/// Pre-allocates a file with `max_slot * 512` bytes. Each table stream writes
-/// to different field offsets concurrently using atomic present_mask updates.
-/// After all streams finish, `finalize_to_docstore` reads populated slots and
-/// writes compressed docstore shards.
-pub struct SlotArena {
-    mmap: MmapMut,
-    overflow: Mutex<Vec<OverflowEntry>>,
-    max_slot: u32,
-    arena_path: PathBuf,
-    _file: std::fs::File, // keep file handle alive for the mmap
-}
-
-impl SlotArena {
-    /// Create a new slot arena backed by a memory-mapped file.
-    ///
-    /// The file is pre-allocated to `(max_slot + 1) * SLOT_SIZE` bytes and
-    /// zero-initialized.
-    pub fn new(max_slot: u32, path: &Path) -> Result<Self> {
-        let file_size = (max_slot as u64 + 1) * SLOT_SIZE as u64;
-        let file = std::fs::OpenOptions::new()
-            .read(true)
-            .write(true)
-            .create(true)
-            .truncate(true)
-            .open(path)
-            .map_err(|e| crate::error::BitdexError::Storage(
-                format!("SlotArena: create file {}: {e}", path.display()),
-            ))?;
-        file.set_len(file_size).map_err(|e| {
-            crate::error::BitdexError::Storage(format!("SlotArena: set_len {file_size}: {e}"))
-        })?;
-
-        let mmap = unsafe {
-            MmapMut::map_mut(&file).map_err(|e| {
-                crate::error::BitdexError::Storage(format!("SlotArena: mmap: {e}"))
-            })?
-        };
-        // Random hint: write phase has each rayon thread writing to arbitrary slot
-        // offsets determined by document ID — access pattern is uniformly scattered.
-        #[cfg(unix)] let _ = mmap.advise(memmap2::Advice::Random);
-
-        eprintln!(
-            "SlotArena: allocated {} MB for {} slots at {}",
-            file_size / (1024 * 1024),
-            max_slot + 1,
-            path.display()
-        );
-
-        Ok(SlotArena {
-            mmap,
-            overflow: Mutex::new(Vec::new()),
-            max_slot,
-            arena_path: path.to_path_buf(),
-            _file: file,
-        })
-    }
-
-    /// Report memory usage: (arena_bytes, overflow_bytes).
-    pub fn memory_usage(&self) -> (usize, usize) {
-        let arena = (self.max_slot as usize + 1) * SLOT_SIZE;
-        let overflow_entries = self.overflow.lock().unwrap();
-        let overflow: usize = overflow_entries
-            .iter()
-            .map(|e| e.data.len() * 4 + std::mem::size_of::<OverflowEntry>())
-            .sum();
-        (arena, overflow)
-    }
-
-    // ---- Atomic present_mask helpers ----
-
-    /// Atomically OR bits into the present_mask for a slot.
-    /// Safe for concurrent access from multiple table streams.
-    fn or_present_mask(&self, slot: u32, bits: u64) {
-        let base = slot as usize * SLOT_SIZE + OFF_PRESENT;
-        // SAFETY: We're treating the 8 bytes at the present_mask offset as an AtomicU64.
-        // The mmap is aligned to page boundaries (4KB), and each slot is 512 bytes,
-        // so the 8-byte present_mask is always naturally aligned.
-        unsafe {
-            let ptr = self.mmap.as_ptr().add(base) as *const AtomicU64;
-            (*ptr).fetch_or(bits, Ordering::Release);
-        }
-    }
-
-    // ---- Low-level write helpers ----
-
-    #[inline]
-    fn slot_base(&self, slot: u32) -> usize {
-        slot as usize * SLOT_SIZE
-    }
-
-    #[inline]
-    fn write_u64(&self, offset: usize, val: u64) {
-        // SAFETY: We need &self (not &mut self) for concurrent access.
-        // Different streams write to non-overlapping field offsets within each slot.
-        unsafe {
-            let ptr = self.mmap.as_ptr().add(offset) as *mut u8;
-            std::ptr::copy_nonoverlapping(val.to_le_bytes().as_ptr(), ptr, 8);
-        }
-    }
-
-    #[inline]
-    fn write_u32(&self, offset: usize, val: u32) {
-        unsafe {
-            let ptr = self.mmap.as_ptr().add(offset) as *mut u8;
-            std::ptr::copy_nonoverlapping(val.to_le_bytes().as_ptr(), ptr, 4);
-        }
-    }
-
-    #[inline]
-    fn write_u8(&self, offset: usize, val: u8) {
-        unsafe {
-            let ptr = self.mmap.as_ptr().add(offset) as *mut u8;
-            *ptr = val;
-        }
-    }
-
-    #[inline]
-    fn write_inline_str(&self, offset: usize, max_len: usize, s: &[u8]) {
-        let len = s.len().min(max_len);
-        self.write_u8(offset, len as u8);
-        if len > 0 {
-            unsafe {
-                let ptr = self.mmap.as_ptr().add(offset + 1) as *mut u8;
-                std::ptr::copy_nonoverlapping(s.as_ptr(), ptr, len);
-            }
-        }
-    }
-
-    // ---- Read helpers (for finalization) ----
-
-    #[inline]
-    fn read_u64(&self, offset: usize) -> u64 {
-        let bytes: [u8; 8] = self.mmap[offset..offset + 8].try_into().unwrap();
-        u64::from_le_bytes(bytes)
-    }
-
-    #[inline]
-    fn read_u32(&self, offset: usize) -> u32 {
-        let bytes: [u8; 4] = self.mmap[offset..offset + 4].try_into().unwrap();
-        u32::from_le_bytes(bytes)
-    }
-
-    #[inline]
-    fn read_u8(&self, offset: usize) -> u8 {
-        self.mmap[offset]
-    }
-
-    fn read_inline_str(&self, offset: usize) -> Option<String> {
-        let len = self.read_u8(offset) as usize;
-        if len == 0 {
-            return None;
-        }
-        let bytes = &self.mmap[offset + 1..offset + 1 + len];
-        Some(String::from_utf8_lossy(bytes).into_owned())
-    }
-
-    // ---- Public write methods (called by table streams) ----
-
-    /// Write Image+Post scalar fields to a slot.
-    ///
-    /// Called by the image stream. Sets all scalar fields and their present bits.
-    pub fn write_scalars(
-        &self,
-        slot: u32,
-        image_id: u64,
-        nsfw_level: u8,
-        user_id: u64,
-        image_type: u8,
-        sort_at: u64,
-        poi: bool,
-        minor: bool,
-        url: Option<&[u8]>,
-        hash: Option<&[u8]>,
-        has_meta: bool,
-        on_site: bool,
-        post_id: u64,
-        posted_to_id: u64,
-        availability: u8,
-        blocked_for: u8,
-        published_at_unix: u64,
-    ) {
-        let base = self.slot_base(slot);
-
-        self.write_u64(base + OFF_IMAGE_ID, image_id);
-        self.write_u8(base + OFF_NSFW, nsfw_level);
-        self.write_u64(base + OFF_USER_ID, user_id);
-        self.write_u8(base + OFF_IMAGE_TYPE, image_type);
-        self.write_u64(base + OFF_SORT_AT, sort_at);
-        self.write_u8(base + OFF_POI, poi as u8);
-        self.write_u8(base + OFF_MINOR, minor as u8);
-
-        let mut mask = MASK_IMAGE_ID | MASK_NSFW | MASK_USER_ID | MASK_IMAGE_TYPE
-            | MASK_SORT_AT | MASK_POI | MASK_MINOR | MASK_POST_ID | MASK_POSTED_TO_ID
-            | MASK_AVAILABILITY | MASK_PUBLISHED_AT;
-
-        if let Some(url_bytes) = url {
-            self.write_inline_str(base + OFF_URL, MAX_URL_LEN, url_bytes);
-            mask |= MASK_URL;
-        }
-        if let Some(hash_bytes) = hash {
-            self.write_inline_str(base + OFF_HASH, MAX_HASH_LEN, hash_bytes);
-            mask |= MASK_HASH;
-        }
-
-        self.write_u8(base + OFF_HAS_META, has_meta as u8);
-        self.write_u8(base + OFF_ON_SITE, on_site as u8);
-        self.write_u64(base + OFF_POST_ID, post_id);
-        self.write_u64(base + OFF_POSTED_TO_ID, posted_to_id);
-        self.write_u8(base + OFF_AVAILABILITY, availability);
-        self.write_u8(base + OFF_BLOCKED_FOR, blocked_for);
-        self.write_u64(base + OFF_PUBLISHED_AT, published_at_unix);
-
-        if has_meta { mask |= MASK_HAS_META; }
-        if on_site { mask |= MASK_ON_SITE; }
-        if blocked_for > 0 { mask |= MASK_BLOCKED_FOR; }
-
-        // Metrics default to 0 — set mask so finalization knows they're present
-        self.write_u32(base + OFF_REACTION_COUNT, 0);
-        self.write_u32(base + OFF_COMMENT_COUNT, 0);
-        self.write_u32(base + OFF_COLLECTED_COUNT, 0);
-        mask |= MASK_METRICS;
-
-        self.or_present_mask(slot, mask);
-    }
-
-    /// Append tag IDs to a slot. Inline up to 48, overflow the rest.
-    ///
-    /// Called by the tag stream. Tags arrive ordered by tagId, so the same
-    /// slot may be written to multiple times as different tagIds are processed.
-    /// Each call appends to the existing tag list in the slot.
-    pub fn write_tags(&self, slot: u32, tag_ids: &[u32]) {
-        if tag_ids.is_empty() {
-            return;
-        }
-        let base = self.slot_base(slot);
-
-        // Read current count atomically-enough (single byte, no torn reads)
-        let current_count = self.read_u8(base + OFF_TAG_COUNT) as usize;
-        let remaining_inline = MAX_INLINE_TAGS.saturating_sub(current_count);
-
-        // Write as many as fit inline
-        let inline_count = tag_ids.len().min(remaining_inline);
-        for (i, &tag_id) in tag_ids[..inline_count].iter().enumerate() {
-            self.write_u32(base + OFF_TAG_IDS + (current_count + i) * 4, tag_id);
-        }
-        self.write_u8(base + OFF_TAG_COUNT, (current_count + inline_count) as u8);
-
-        // Overflow the rest
-        if inline_count < tag_ids.len() {
-            let overflow_data: Vec<u32> = tag_ids[inline_count..].to_vec();
-            self.overflow.lock().unwrap().push(OverflowEntry {
-                slot,
-                field: OverflowField::Tags,
-                data: overflow_data,
-            });
-        }
-
-        self.or_present_mask(slot, MASK_TAGS);
-    }
-
-    /// Write model version IDs to a slot. Inline up to 8, overflow the rest.
-    pub fn write_model_version_ids(&self, slot: u32, mv_ids: &[u32]) {
-        if mv_ids.is_empty() {
-            return;
-        }
-        let base = self.slot_base(slot);
-        let current = self.read_u8(base + OFF_MV_COUNT) as usize;
-        let remaining = MAX_INLINE_MVS.saturating_sub(current);
-        let inline_n = mv_ids.len().min(remaining);
-
-        for (i, &id) in mv_ids[..inline_n].iter().enumerate() {
-            self.write_u32(base + OFF_MV_IDS + (current + i) * 4, id);
-        }
-        self.write_u8(base + OFF_MV_COUNT, (current + inline_n) as u8);
-
-        if inline_n < mv_ids.len() {
-            self.overflow.lock().unwrap().push(OverflowEntry {
-                slot,
-                field: OverflowField::ModelVersionIds,
-                data: mv_ids[inline_n..].to_vec(),
-            });
-        }
-
-        self.or_present_mask(slot, MASK_MV);
-    }
-
-    /// Write manual model version IDs to a slot.
-    pub fn write_model_version_ids_manual(&self, slot: u32, mv_ids: &[u32]) {
-        if mv_ids.is_empty() {
-            return;
-        }
-        let base = self.slot_base(slot);
-        let current = self.read_u8(base + OFF_MV_MANUAL_COUNT) as usize;
-        let remaining = MAX_INLINE_MVS.saturating_sub(current);
-        let inline_n = mv_ids.len().min(remaining);
-
-        for (i, &id) in mv_ids[..inline_n].iter().enumerate() {
-            self.write_u32(base + OFF_MV_MANUAL_IDS + (current + i) * 4, id);
-        }
-        self.write_u8(base + OFF_MV_MANUAL_COUNT, (current + inline_n) as u8);
-
-        if inline_n < mv_ids.len() {
-            self.overflow.lock().unwrap().push(OverflowEntry {
-                slot,
-                field: OverflowField::ModelVersionIdsManual,
-                data: mv_ids[inline_n..].to_vec(),
-            });
-        }
-
-        self.or_present_mask(slot, MASK_MV_MANUAL);
-    }
-
-    /// Write tool IDs to a slot.
-    pub fn write_tools(&self, slot: u32, tool_ids: &[u32]) {
-        if tool_ids.is_empty() {
-            return;
-        }
-        let base = self.slot_base(slot);
-        let current = self.read_u8(base + OFF_TOOL_COUNT) as usize;
-        let remaining = MAX_INLINE_TOOLS.saturating_sub(current);
-        let inline_n = tool_ids.len().min(remaining);
-
-        for (i, &id) in tool_ids[..inline_n].iter().enumerate() {
-            self.write_u32(base + OFF_TOOL_IDS + (current + i) * 4, id);
-        }
-        self.write_u8(base + OFF_TOOL_COUNT, (current + inline_n) as u8);
-
-        if inline_n < tool_ids.len() {
-            self.overflow.lock().unwrap().push(OverflowEntry {
-                slot,
-                field: OverflowField::Tools,
-                data: tool_ids[inline_n..].to_vec(),
-            });
-        }
-
-        self.or_present_mask(slot, MASK_TOOLS);
-    }
-
-    /// Write technique IDs to a slot.
-    pub fn write_techniques(&self, slot: u32, technique_ids: &[u32]) {
-        if technique_ids.is_empty() {
-            return;
-        }
-        let base = self.slot_base(slot);
-        let current = self.read_u8(base + OFF_TECH_COUNT) as usize;
-        let remaining = MAX_INLINE_TECHNIQUES.saturating_sub(current);
-        let inline_n = technique_ids.len().min(remaining);
-
-        for (i, &id) in technique_ids[..inline_n].iter().enumerate() {
-            self.write_u32(base + OFF_TECH_IDS + (current + i) * 4, id);
-        }
-        self.write_u8(base + OFF_TECH_COUNT, (current + inline_n) as u8);
-
-        if inline_n < technique_ids.len() {
-            self.overflow.lock().unwrap().push(OverflowEntry {
-                slot,
-                field: OverflowField::Techniques,
-                data: technique_ids[inline_n..].to_vec(),
-            });
-        }
-
-        self.or_present_mask(slot, MASK_TECHNIQUES);
-    }
-
-    /// Write base model enum to a slot.
-    pub fn write_base_model(&self, slot: u32, base_model: u8) {
-        let base = self.slot_base(slot);
-        self.write_u8(base + OFF_BASE_MODEL, base_model);
-        self.or_present_mask(slot, MASK_BASE_MODEL);
-    }
-
-    /// OR resource_poi into the slot's poi field.
-    ///
-    /// Image stream sets poi from Image.poi.
-    /// Resource stream OR's in resource_poi if true (idempotent).
-    pub fn set_resource_poi(&self, slot: u32) {
-        let base = self.slot_base(slot);
-        self.write_u8(base + OFF_RESOURCE_POI, 1);
-        self.or_present_mask(slot, MASK_RESOURCE_POI);
-    }
-
-    // ---- Read methods (for finalization) ----
-
-    /// Read a complete slot, merging any overflow data.
-    ///
-    /// Returns `None` if the slot has no present bits set (never written).
-    pub(crate) fn read_slot(&self, slot: u32, overflow_map: &std::collections::HashMap<u32, Vec<&OverflowEntry>>) -> Option<SlotData> {
-        let base = self.slot_base(slot);
-        let mask = self.read_u64(base + OFF_PRESENT);
-
-        if mask == 0 {
-            return None;
-        }
-
-        // Read inline tag IDs
-        let tag_count = self.read_u8(base + OFF_TAG_COUNT) as usize;
-        let mut tag_ids: Vec<u32> = (0..tag_count)
-            .map(|i| self.read_u32(base + OFF_TAG_IDS + i * 4))
-            .collect();
-
-        // Read inline MV IDs
-        let mv_count = self.read_u8(base + OFF_MV_COUNT) as usize;
-        let mut model_version_ids: Vec<u32> = (0..mv_count)
-            .map(|i| self.read_u32(base + OFF_MV_IDS + i * 4))
-            .collect();
-
-        // Read inline manual MV IDs
-        let mv_manual_count = self.read_u8(base + OFF_MV_MANUAL_COUNT) as usize;
-        let mut model_version_ids_manual: Vec<u32> = (0..mv_manual_count)
-            .map(|i| self.read_u32(base + OFF_MV_MANUAL_IDS + i * 4))
-            .collect();
-
-        // Read inline tool IDs
-        let tool_count = self.read_u8(base + OFF_TOOL_COUNT) as usize;
-        let mut tool_ids: Vec<u32> = (0..tool_count)
-            .map(|i| self.read_u32(base + OFF_TOOL_IDS + i * 4))
-            .collect();
-
-        // Read inline technique IDs
-        let tech_count = self.read_u8(base + OFF_TECH_COUNT) as usize;
-        let mut technique_ids: Vec<u32> = (0..tech_count)
-            .map(|i| self.read_u32(base + OFF_TECH_IDS + i * 4))
-            .collect();
-
-        // Merge overflow
-        if let Some(entries) = overflow_map.get(&slot) {
-            for entry in entries {
-                match entry.field {
-                    OverflowField::Tags => tag_ids.extend_from_slice(&entry.data),
-                    OverflowField::ModelVersionIds => model_version_ids.extend_from_slice(&entry.data),
-                    OverflowField::ModelVersionIdsManual => model_version_ids_manual.extend_from_slice(&entry.data),
-                    OverflowField::Tools => tool_ids.extend_from_slice(&entry.data),
-                    OverflowField::Techniques => technique_ids.extend_from_slice(&entry.data),
-                }
-            }
-        }
-
-        // Resolve poi: image poi OR resource_poi
-        let image_poi = self.read_u8(base + OFF_POI) != 0;
-        let resource_poi = self.read_u8(base + OFF_RESOURCE_POI) != 0;
-
-        Some(SlotData {
-            image_id: self.read_u64(base + OFF_IMAGE_ID),
-            nsfw_level: self.read_u8(base + OFF_NSFW),
-            user_id: self.read_u64(base + OFF_USER_ID),
-            image_type: self.read_u8(base + OFF_IMAGE_TYPE),
-            sort_at: self.read_u64(base + OFF_SORT_AT),
-            poi: image_poi || resource_poi,
-            minor: self.read_u8(base + OFF_MINOR) != 0,
-            url: self.read_inline_str(base + OFF_URL),
-            hash: self.read_inline_str(base + OFF_HASH),
-            tag_ids,
-            model_version_ids,
-            model_version_ids_manual,
-            tool_ids,
-            technique_ids,
-            has_meta: self.read_u8(base + OFF_HAS_META) != 0,
-            on_site: self.read_u8(base + OFF_ON_SITE) != 0,
-            post_id: self.read_u64(base + OFF_POST_ID),
-            posted_to_id: self.read_u64(base + OFF_POSTED_TO_ID),
-            availability: self.read_u8(base + OFF_AVAILABILITY),
-            blocked_for: self.read_u8(base + OFF_BLOCKED_FOR),
-            reaction_count: self.read_u32(base + OFF_REACTION_COUNT),
-            comment_count: self.read_u32(base + OFF_COMMENT_COUNT),
-            collected_count: self.read_u32(base + OFF_COLLECTED_COUNT),
-            base_model: self.read_u8(base + OFF_BASE_MODEL),
-            resource_poi,
-            published_at_unix: self.read_u64(base + OFF_PUBLISHED_AT),
-        })
-    }
-
-    /// Finalize all populated slots to the docstore.
-    /// TODO: Rewrite for DataSilo ParallelWriter
-    pub fn finalize_to_docstore(
-        &self,
-        _schema: &DataSchema,
-        _alive: &RoaringBitmap,
-    ) -> Result<(u64, u64)> {
-        // TODO(madvise): when implemented, switch hint to Sequential before the
-        // 0..max_slot scan: `let _ = self.mmap.advise(memmap2::Advice::Sequential);`
-        Err(crate::error::BitdexError::Storage(
-            "finalize_to_docstore: not yet ported to DataSilo".to_string()
-        ))
-    }
-
-    /// Clean up the arena file.
-    pub fn cleanup(self) -> std::io::Result<()> {
-        // DONTNEED before drop: immediately reclaims RSS on Linux (up to ~54 GB at
-        // 107M slots) before the OS-level munmap completes.
-        #[cfg(target_os = "linux")]
-        let _ = unsafe { self.mmap.unchecked_advise(memmap2::UncheckedAdvice::DontNeed) };
-        // (On non-Linux Unix, the drop/munmap itself frees pages promptly enough.)
-        drop(self.mmap);
-        drop(self._file);
-        std::fs::remove_file(&self.arena_path)
-    }
-}
-
-// ---------------------------------------------------------------------------
-// Enum encoding helpers
-// ---------------------------------------------------------------------------
-
-/// Encode image type string to enum byte.
-pub fn encode_image_type(s: Option<&str>) -> u8 {
-    match s {
-        Some("video") => 1,
-        Some("audio") => 2,
-        _ => 0, // "image" or unknown
-    }
-}
-
-/// Decode image type enum byte to string.
-pub fn decode_image_type(v: u8) -> &'static str {
-    match v {
-        1 => "video",
-        2 => "audio",
-        _ => "image",
-    }
-}
-
-/// Encode availability string to enum byte.
-pub fn encode_availability(s: Option<&str>) -> u8 {
-    match s {
-        Some("Private") => 1,
-        Some("Unsearchable") => 2,
-        _ => 0, // "Public" or unknown
-    }
-}
-
-/// Decode availability enum byte to string.
-pub fn decode_availability(v: u8) -> &'static str {
-    match v {
-        1 => "Private",
-        2 => "Unsearchable",
-        _ => "Public",
-    }
-}
-
-/// Encode blocked_for string to enum byte.
-pub fn encode_blocked_for(s: Option<&str>) -> u8 {
-    match s {
-        None => 0,
-        Some("") => 0,
-        Some(_) => 1, // any non-empty value
-    }
-}
-
-// Well-known base model strings → enum byte (for filter bitmaps).
-// Not exhaustive — new base models get 0 (unknown).
-pub fn encode_base_model(s: Option<&str>) -> u8 {
-    match s {
-        None | Some("") => 0,
-        Some("SD 1.5") => 1,
-        Some("SD 2.1") => 2,
-        Some("SDXL 1.0") => 3,
-        Some("Pony") => 4,
-        Some("Flux.1 D") => 5,
-        Some("Flux.1 S") => 6,
-        Some("SD 3.5 Large") => 7,
-        Some("Illustrious") => 8,
-        Some("Hunyuan 1") => 9,
-        Some("SD 3.5 Medium") => 10,
-        Some(_) => 255, // known but unmapped
-    }
-}
-
-pub fn decode_base_model(v: u8) -> &'static str {
-    match v {
-        0 => "",
-        1 => "SD 1.5",
-        2 => "SD 2.1",
-        3 => "SDXL 1.0",
-        4 => "Pony",
-        5 => "Flux.1 D",
-        6 => "Flux.1 S",
-        7 => "SD 3.5 Large",
-        8 => "Illustrious",
-        9 => "Hunyuan 1",
-        10 => "SD 3.5 Medium",
-        _ => "Other",
-    }
-}
-
-// ---------------------------------------------------------------------------
-// SlotData → JSON for docstore encoding
-// ---------------------------------------------------------------------------
-
-/// Convert SlotData to a serde_json::Value matching the Bitdex data schema.
-/// Used during finalization to produce docstore-compatible documents.
-fn slot_data_to_json(slot: &SlotData) -> serde_json::Value {
-    let mut doc = serde_json::json!({
-        "id": slot.image_id as i64,
-        "nsfwLevel": slot.nsfw_level as i64,
-        "userId": slot.user_id as i64,
-        "postId": slot.post_id as i64,
-        "postedToId": slot.posted_to_id as i64,
-        "type": decode_image_type(slot.image_type),
-        "baseModel": decode_base_model(slot.base_model),
-        "availability": decode_availability(slot.availability),
-        "tagIds": slot.tag_ids.iter().map(|&t| t as i64).collect::<Vec<i64>>(),
-        "modelVersionIds": slot.model_version_ids.iter().map(|&t| t as i64).collect::<Vec<i64>>(),
-        "modelVersionIdsManual": slot.model_version_ids_manual.iter().map(|&t| t as i64).collect::<Vec<i64>>(),
-        "toolIds": slot.tool_ids.iter().map(|&t| t as i64).collect::<Vec<i64>>(),
-        "techniqueIds": slot.technique_ids.iter().map(|&t| t as i64).collect::<Vec<i64>>(),
-        "reactionCount": slot.reaction_count as i64,
-        "commentCount": slot.comment_count as i64,
-        "collectedCount": slot.collected_count as i64,
-        "sortAt": slot.sort_at as i64,
-        "publishedAt": (slot.published_at_unix / 1000) as i64,
-    });
-
-    if let Some(obj) = doc.as_object_mut() {
-        // Exists-boolean: isPublished = publishedAt is non-zero (matches outbox row_assembler)
-        if slot.published_at_unix > 0 {
-            obj.insert("isPublished".into(), serde_json::json!(true));
-        }
-        if slot.has_meta {
-            obj.insert("hasMeta".into(), serde_json::json!(true));
-        }
-        if slot.on_site {
-            obj.insert("onSite".into(), serde_json::json!(true));
-        }
-        if slot.poi {
-            obj.insert("poi".into(), serde_json::json!(true));
-        }
-        if slot.minor {
-            obj.insert("minor".into(), serde_json::json!(true));
-        }
-        if let Some(ref url) = slot.url {
-            obj.insert("url".into(), serde_json::json!(url));
-        }
-        if let Some(ref hash) = slot.hash {
-            obj.insert("hash".into(), serde_json::json!(hash));
-        }
-        if slot.blocked_for > 0 {
-            obj.insert("blockedFor".into(), serde_json::json!("blocked"));
-        }
-    }
-
-    doc
-}
-
-// ---------------------------------------------------------------------------
-// Tests
-// ---------------------------------------------------------------------------
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use tempfile::tempdir;
-
-    #[test]
-    fn test_write_read_scalars() {
-        let dir = tempdir().unwrap();
-        let arena = SlotArena::new(100, &dir.path().join("slots.bin")).unwrap();
-
-        arena.write_scalars(
-            42, 12345, 16, 999, 0, 1700000000, true, false,
-            Some(b"https://example.com/img.jpg"),
-            Some(b"abc123hash"),
-            true, false,
-            100, 200, 0, 0, 1700000000000,
-        );
-
-        let overflow = std::collections::HashMap::new();
-        let slot = arena.read_slot(42, &overflow).unwrap();
-
-        assert_eq!(slot.image_id, 12345);
-        assert_eq!(slot.nsfw_level, 16);
-        assert_eq!(slot.user_id, 999);
-        assert_eq!(slot.image_type, 0);
-        assert_eq!(slot.sort_at, 1700000000);
-        assert!(slot.poi);
-        assert!(!slot.minor);
-        assert_eq!(slot.url.as_deref(), Some("https://example.com/img.jpg"));
-        assert_eq!(slot.hash.as_deref(), Some("abc123hash"));
-        assert!(slot.has_meta);
-        assert!(!slot.on_site);
-        assert_eq!(slot.post_id, 100);
-        assert_eq!(slot.posted_to_id, 200);
-        assert_eq!(slot.availability, 0);
-        assert_eq!(slot.published_at_unix, 1700000000000);
-    }
-
-    #[test]
-    fn test_write_tags_inline() {
-        let dir = tempdir().unwrap();
-        let arena = SlotArena::new(10, &dir.path().join("slots.bin")).unwrap();
-
-        let tags: Vec<u32> = (100..110).collect();
-        arena.write_tags(5, &tags);
-
-        let overflow = std::collections::HashMap::new();
-        let slot = arena.read_slot(5, &overflow).unwrap();
-        assert_eq!(slot.tag_ids, tags);
-    }
-
-    #[test]
-    fn test_write_tags_overflow() {
-        let dir = tempdir().unwrap();
-        let arena = SlotArena::new(10, &dir.path().join("slots.bin")).unwrap();
-
-        // Write 60 tags — 48 inline + 12 overflow
-        let tags: Vec<u32> = (100..160).collect();
-        arena.write_tags(3, &tags);
-
-        // Build overflow map
-        let overflow_entries = arena.overflow.lock().unwrap();
-        let mut overflow_map: std::collections::HashMap<u32, Vec<&OverflowEntry>> =
-            std::collections::HashMap::new();
-        for entry in overflow_entries.iter() {
-            overflow_map.entry(entry.slot).or_default().push(entry);
-        }
-
-        let slot = arena.read_slot(3, &overflow_map).unwrap();
-        assert_eq!(slot.tag_ids.len(), 60);
-        assert_eq!(slot.tag_ids, tags);
-    }
-
-    #[test]
-    fn test_write_tags_incremental() {
-        let dir = tempdir().unwrap();
-        let arena = SlotArena::new(10, &dir.path().join("slots.bin")).unwrap();
-
-        // Write tags incrementally (simulating tag stream)
-        arena.write_tags(2, &[100, 101, 102]);
-        arena.write_tags(2, &[200, 201]);
-
-        let overflow = std::collections::HashMap::new();
-        let slot = arena.read_slot(2, &overflow).unwrap();
-        assert_eq!(slot.tag_ids, vec![100, 101, 102, 200, 201]);
-    }
-
-    #[test]
-    fn test_concurrent_field_writes() {
-        let dir = tempdir().unwrap();
-        let arena = SlotArena::new(1000, &dir.path().join("slots.bin")).unwrap();
-
-        std::thread::scope(|s| {
-            // Thread 1: write scalars
-            s.spawn(|| {
-                for slot in 0..1000u32 {
-                    arena.write_scalars(
-                        slot, slot as u64, 16, slot as u64 * 7, 0,
-                        1700000000, false, false,
-                        Some(b"url"), Some(b"hash"),
-                        false, false, 100, 0, 0, 0, 0,
-                    );
-                }
-            });
-
-            // Thread 2: write tags
-            s.spawn(|| {
-                for slot in 0..1000u32 {
-                    arena.write_tags(slot, &[slot + 1000, slot + 2000]);
-                }
-            });
-
-            // Thread 3: write tools
-            s.spawn(|| {
-                for slot in 0..1000u32 {
-                    arena.write_tools(slot, &[slot + 5000]);
-                }
-            });
-        });
-
-        // Verify all fields present
-        let overflow = std::collections::HashMap::new();
-        for slot in 0..1000u32 {
-            let data = arena.read_slot(slot, &overflow).unwrap();
-            assert_eq!(data.image_id, slot as u64);
-            assert_eq!(data.tag_ids.len(), 2);
-            assert_eq!(data.tool_ids.len(), 1);
-        }
-    }
-
-    #[test]
-    fn test_present_mask_atomic() {
-        let dir = tempdir().unwrap();
-        let arena = SlotArena::new(10, &dir.path().join("slots.bin")).unwrap();
-
-        // Multiple threads OR-ing different bits into the same slot
-        std::thread::scope(|s| {
-            for bit in 0..20u64 {
-                let arena_ref = &arena;
-                s.spawn(move || {
-                    arena_ref.or_present_mask(5, 1 << bit);
-                });
-            }
-        });
-
-        let base = 5 * SLOT_SIZE + OFF_PRESENT;
-        let mask = arena.read_u64(base);
-        // All 20 bits should be set
-        for bit in 0..20u64 {
-            assert!(mask & (1 << bit) != 0, "bit {bit} not set");
-        }
-    }
-
-    #[test]
-    fn test_empty_slot_returns_none() {
-        let dir = tempdir().unwrap();
-        let arena = SlotArena::new(10, &dir.path().join("slots.bin")).unwrap();
-
-        let overflow = std::collections::HashMap::new();
-        assert!(arena.read_slot(7, &overflow).is_none());
-    }
-
-    #[test]
-    fn test_memory_usage_reporting() {
-        let dir = tempdir().unwrap();
-        let arena = SlotArena::new(100, &dir.path().join("slots.bin")).unwrap();
-
-        let (arena_bytes, overflow_bytes) = arena.memory_usage();
-        assert_eq!(arena_bytes, 101 * SLOT_SIZE);
-        assert_eq!(overflow_bytes, 0);
-
-        // Add some overflow
-        arena.write_tags(1, &vec![0u32; 60]); // 48 inline + 12 overflow
-        let (_, overflow_bytes) = arena.memory_usage();
-        assert!(overflow_bytes > 0);
-    }
-}