diff --git a/Dockerfile b/Dockerfile index 0f6859d..0a69b44 100644 --- a/Dockerfile +++ b/Dockerfile @@ -24,12 +24,12 @@ ARG DEBIAN_FRONTEND=noninteractive ENV TASKS_ROOT=/app/tasks # Keep the original Python dependency footprint -RUN python3 -m pip install --no-cache-dir ply2splat +RUN python3 -m pip install --no-cache-dir ply2splat 3dgsconverter WORKDIR /app # Job pipeline scripts (run.py drives ns-process-data / ns-train) -COPY run.py extract_mp4.py convert_ply2splat.py rotate_ply.py /app/ +COPY run.py extract_mp4.py convert_ply2splat.py rotate_ply.py compress_sog.py /app/ # Compute node binary built from source COPY --from=rust-build /app/server/rust/target/release/splatter-bin /app/compute-node diff --git a/compress_sog.py b/compress_sog.py new file mode 100644 index 0000000..0878d43 --- /dev/null +++ b/compress_sog.py @@ -0,0 +1,63 @@ +#!/usr/bin/env python3 + +import argparse +import os +import subprocess +import sys + + +def compress_to_sog(input_path, output_path): + """ + Compress a Gaussian Splat PLY file to SOG format using 3dgsconverter. + """ + if not os.path.exists(input_path): + print(f"Input file not found: {input_path}", file=sys.stderr) + return 1 + + cmd = [ + "3dgsconverter", + "-i", str(input_path), + "-o", str(output_path), + "-f", "sog", + ] + + print(f"Compressing to SOG: {input_path} -> {output_path}") + + try: + result = subprocess.run(cmd, capture_output=True, text=True) + + if result.stdout: + print(result.stdout) + if result.stderr: + print(result.stderr, file=sys.stderr) + + if result.returncode != 0: + print(f"3dgsconverter failed with exit code: {result.returncode}", file=sys.stderr) + return result.returncode + + if not os.path.exists(output_path): + print(f"Output file was not created: {output_path}", file=sys.stderr) + return 1 + + input_size = os.path.getsize(input_path) + output_size = os.path.getsize(output_path) + ratio = input_size / output_size if output_size > 0 else 0 + print(f"SOG compression complete: {input_size} bytes -> {output_size} bytes ({ratio:.1f}x reduction)") + + return 0 + + except FileNotFoundError: + print("3dgsconverter not found. Install with: pip install 3dgsconverter", file=sys.stderr) + return 1 + except Exception as e: + print(f"SOG compression error: {e}", file=sys.stderr) + return 1 + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Compress Gaussian Splat PLY to SOG format") + parser.add_argument("--input", required=True, help="Input PLY file path") + parser.add_argument("--output", required=True, help="Output SOG file path") + args = parser.parse_args() + + sys.exit(compress_to_sog(args.input, args.output)) diff --git a/docs/specs/2026-03-30-sog-compression-design.md b/docs/specs/2026-03-30-sog-compression-design.md new file mode 100644 index 0000000..a7562e4 --- /dev/null +++ b/docs/specs/2026-03-30-sog-compression-design.md @@ -0,0 +1,124 @@ +# SOG Compression Pipeline Design + +## Summary + +Add SOG (Spatially Ordered Gaussians) compression as a post-processing step in the splatter pipeline. The compressed `.sog` file is uploaded alongside the existing `.splat` file, giving the domain-viewer a ~15-20x smaller file to load while preserving the lossless `.splat` as a backup. + +## Motivation + +The splatter server currently outputs a raw `.splat` file (hundreds of MB for large domains). The domain-viewer's Spark WASM engine already supports the SOG compressed format (`PCSOGSZIP`), mapping `data_type: splat_data_sog` to automatic decompression on the client. Adding SOG compression reduces file transfer sizes by ~15-20x, dramatically improving viewer load times with minimal visual quality loss. + +## Design + +### Pipeline Change + +Current: +``` +extract_mp4 -> ns-process-data -> ns-train -> ns-export -> rotate_ply -> convert_ply2splat -> upload .splat +``` + +New: +``` +extract_mp4 -> ns-process-data -> ns-train -> ns-export -> rotate_ply -> convert_ply2splat -> compress_sog -> upload .sog -> upload .splat +``` + +SOG is uploaded first (smaller file, faster upload, viewer can display sooner). The lossless `.splat` follows as a backup. + +### Files to Create + +#### `compress_sog.py` +New Python script following the same pattern as `convert_ply2splat.py`: +- CLI args: `--input` (PLY path), `--output` (SOG output path) +- Uses `3dgsconverter` library to convert PLY to SOG format +- Prints compression stats (input size, output size, ratio) +- Exit code 0 on success, non-zero on failure + +### Files to Modify + +#### `run.py` +Add step after `convert_ply2splat.py`: +```python +# Compress to SOG (best-effort -- don't fail the pipeline if this fails) +exit_code = run_python_script("compress_sog.py", + "--input", args.job_root_path / "refined/splatter/splat_rot.ply", + "--output", args.job_root_path / "refined/splatter/splat_rot.sog") +if exit_code != 0: + logger.warning("SOG compression failed; .splat file is still available") +``` + +SOG compression failure is non-fatal. The `.splat` is already produced and the pipeline exits successfully. + +#### `server/rust/runner/src/lib.rs` +After the existing upload block, add SOG upload logic: +- Check if `refined/splatter/splat_rot.sog` exists in the workspace +- If present, upload as `data_type: "splat_data_sog"` with name `refined_splat_sog_{suffix}` +- Upload SOG **before** the existing `.splat` upload (SOG first so the viewer gets usable data sooner) +- If SOG file is missing (compression failed), skip silently and proceed to `.splat` upload + +#### `Dockerfile` +Add `3dgsconverter` to the pip install line: +```dockerfile +RUN python3 -m pip install --no-cache-dir ply2splat 3dgsconverter +``` + +Also copy the new `compress_sog.py` script: +```dockerfile +COPY run.py extract_mp4.py convert_ply2splat.py rotate_ply.py compress_sog.py /app/ +``` + +### Error Handling + +SOG compression is best-effort throughout the entire stack: + +| Layer | Behavior on SOG failure | +|-------|------------------------| +| `compress_sog.py` | Returns non-zero exit code | +| `run.py` | Logs warning, does NOT exit -- pipeline continues | +| Rust runner | Checks if `.sog` file exists before upload; skips if missing | +| Domain viewer | Falls back to `splat_data` if no `splat_data_sog` exists | + +The existing `.splat` pipeline is never affected by SOG failures. + +### Upload Order + +1. Upload `.sog` as `splat_data_sog` (smaller, faster, viewer can start loading) +2. Upload `.splat` as `splat_data` (lossless backup) + +If the process is interrupted after step 1, the viewer still has a fully viewable domain. + +### Viewer Integration + +No viewer changes required. The domain-viewer already: +1. Scans data list for items with `data_type` matching `splat_data_sog` +2. Maps to `SplatFileType.PCSOGSZIP` via `dataTypeToSplatFileType()` in `hooks/useRefinementSplat.ts:38-48` +3. Passes format to Spark WASM engine which handles decompression +4. Falls back to `splat_data` items if no SOG exists + +### Compression Characteristics + +SOG compression is lossy for higher spherical harmonics bands (SH degrees 1-3) but lossless for: +- Positions (x, y, z) +- Rotations (quaternions) +- Scales +- Base color (SH degree 0) +- Opacity + +Visual impact is negligible for indoor phone scans where base color dominates. + +### Output Files + +After the pipeline completes, the workspace contains: +``` +refined/splatter/ + splat.ply # raw nerfstudio export + splat_rot.ply # coordinate-transformed PLY + splat_rot.splat # lossless binary format + splat_rot.sog # SOG compressed (new) +``` + +### Testing + +- Run the pipeline locally with sample data and verify `.sog` is produced +- Verify `.sog` loads correctly in the domain-viewer +- Verify pipeline completes successfully when `3dgsconverter` is unavailable (fallback) +- Compare file sizes between `.splat` and `.sog` outputs diff --git a/run.py b/run.py index e49d601..46dc66a 100644 --- a/run.py +++ b/run.py @@ -197,11 +197,18 @@ def run_cmd(cmd: list): sys.exit(exit_code) logger.info("Converting Splat") - exit_code = run_python_script("convert_ply2splat.py", + exit_code = run_python_script("convert_ply2splat.py", "--input", args.job_root_path / "refined/splatter/splat_rot.ply", "--output", args.job_root_path / "refined/splatter/splat_rot.splat") if exit_code != 0: logger.error("failed to convert splat .ply to .splat") sys.exit(exit_code) - + + logger.info("Compressing Splat to SOG") + sog_exit_code = run_python_script("compress_sog.py", + "--input", args.job_root_path / "refined/splatter/splat_rot.ply", + "--output", args.job_root_path / "refined/splatter/splat_rot.sog") + if sog_exit_code != 0: + logger.warning("SOG compression failed; .splat file is still available") + sys.exit(exit_code) \ No newline at end of file diff --git a/server/rust/runner/src/lib.rs b/server/rust/runner/src/lib.rs index 217ceec..182bb17 100644 --- a/server/rust/runner/src/lib.rs +++ b/server/rust/runner/src/lib.rs @@ -738,12 +738,16 @@ impl compute_runner_api::Runner for HelloRunner { })) .await; - // Upload splat_rot.splat if it exists. + // Upload outputs: SOG (compressed) first, then .splat (lossless backup). ensure_task_not_cancelled(&ctx, "before upload").await?; - let splat_rel = PathBuf::from("refined") + let splat_abs = job_root + .join("refined") .join("splatter") .join("splat_rot.splat"); - let splat_abs = job_root.join(&splat_rel); + let sog_abs = job_root + .join("refined") + .join("splatter") + .join("splat_rot.sog"); if !splat_abs.exists() { return Err(anyhow!("expected output missing: {}", splat_abs.display())); } @@ -761,15 +765,61 @@ impl compute_runner_api::Runner for HelloRunner { "refined_splat".to_string() }; + let sog_upload_key = + if let Some(suffix) = refined_suffix.as_deref().filter(|s| !s.is_empty()) { + if suffix.starts_with('_') { + format!("refined_splat_sog{suffix}") + } else { + format!("refined_splat_sog_{suffix}") + } + } else { + "refined_splat_sog".to_string() + }; + ctx.ctrl .progress(json!({ - "pct": 90, + "pct": 85, "stage": "upload", "status": "starting", - "artifact": upload_key.as_str(), })) .await?; + // Upload SOG first (smaller file, viewer can start loading sooner). + if sog_abs.exists() { + ensure_task_not_cancelled(&ctx, "uploading sog").await?; + match ctx + .output + .put_domain_artifact(compute_runner_api::runner::DomainArtifactRequest { + rel_path: sog_upload_key.as_str(), + name: sog_upload_key.as_str(), + data_type: "splat_data_sog", + existing_id: None, + content: compute_runner_api::runner::DomainArtifactContent::File(&sog_abs), + }) + .await + { + Ok(_) => { + info!(artifact = %sog_upload_key, "uploaded SOG compressed splat"); + let _ = ctx + .ctrl + .log_event(json!({ + "level": "info", + "stage": "upload", + "message": "SOG compressed splat uploaded", + "uploaded": sog_upload_key.as_str(), + })) + .await; + } + Err(err) => { + warn!(error = %err, "failed to upload SOG; falling back to .splat only"); + } + } + } else { + info!("SOG file not found; skipping compressed upload"); + } + + // Upload lossless .splat as backup. + ensure_task_not_cancelled(&ctx, "uploading splat").await?; ctx.output .put_domain_artifact(compute_runner_api::runner::DomainArtifactRequest { rel_path: upload_key.as_str(), @@ -786,8 +836,8 @@ impl compute_runner_api::Runner for HelloRunner { "pct": 95, "stage": "upload", "status": "completed", - "uploaded": upload_key.as_str(), - "splat_path": splat_abs.display().to_string(), + "uploaded_sog": sog_abs.exists(), + "uploaded_splat": upload_key.as_str(), })) .await?; let _ = ctx