Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 18 additions & 6 deletions bin/nixbox
Original file line number Diff line number Diff line change
Expand Up @@ -312,11 +312,13 @@ NFTEOF

# Raise FD limit before launching virtiofsd and cloud-hypervisor (#18).
raise_nofile 524288
local virtiofsd_bin
virtiofsd_bin=$(ensure_virtiofsd_cap)

# --- Start virtiofsd for nix-store share (required by microvm config) ---
log "==> Starting virtiofsd for nix-store..."
local nix_store_sock="$run_dir/nixbox-virtiofs-nix-store.sock"
virtiofsd --socket-path="$nix_store_sock" --shared-dir="/nix/store" --sandbox=none --translate-uid="map:1000:$(id -u):1" --translate-gid="map:100:$(id -g):1" --cache=auto 2>"$run_dir/virtiofsd-nix-store.log" &
"$virtiofsd_bin" --socket-path="$nix_store_sock" --shared-dir="/nix/store" --sandbox=none --translate-uid="map:1000:$(id -u):1" --translate-gid="map:100:$(id -g):1" --cache=auto --inode-file-handles=mandatory 2>"$run_dir/virtiofsd-nix-store.log" &
echo "$!" > "$state_dir/virtiofsd_nix_store_pid"
for _ in $(seq 1 10); do [ -S "$nix_store_sock" ] && break; sleep 0.2; done
[ -S "$nix_store_sock" ] || die "virtiofsd socket for nix-store did not appear"
Expand All @@ -334,7 +336,7 @@ NFTEOF
[ ! -d "$src" ] && die "Mount source does not exist: $src"

local virtiofs_sock="$run_dir/virtiofs-${i}.sock"
virtiofsd --socket-path="$virtiofs_sock" --shared-dir="$src" --sandbox=none --translate-uid="map:1000:$(id -u):1" --translate-gid="map:100:$(id -g):1" --cache=auto 2>"$run_dir/virtiofsd-${i}.log" &
"$virtiofsd_bin" --socket-path="$virtiofs_sock" --shared-dir="$src" --sandbox=none --translate-uid="map:1000:$(id -u):1" --translate-gid="map:100:$(id -g):1" --cache=auto --inode-file-handles=mandatory 2>"$run_dir/virtiofsd-${i}.log" &
echo "$!" > "$state_dir/virtiofsd_${i}_pid"
for _ in $(seq 1 10); do [ -S "$virtiofs_sock" ] && break; sleep 0.2; done
[ -S "$virtiofs_sock" ] || die "virtiofsd socket did not appear for mount $i"
Expand Down Expand Up @@ -543,13 +545,16 @@ do_mount() {

local virtiofs_sock="$run_dir/virtiofs-${mount_idx}.sock"
raise_nofile 524288
virtiofsd \
local virtiofsd_bin
virtiofsd_bin=$(ensure_virtiofsd_cap)
"$virtiofsd_bin" \
--socket-path="$virtiofs_sock" \
--shared-dir="$MOUNT_SOURCE" \
--sandbox=none \
--translate-uid="map:1000:$(id -u):1" \
--translate-gid="map:100:$(id -g):1" \
--cache=auto 2>"$run_dir/virtiofsd-${mount_idx}.log" &
--cache=auto \
--inode-file-handles=mandatory 2>"$run_dir/virtiofsd-${mount_idx}.log" &
echo "$!" > "$state_dir/virtiofsd_${mount_idx}_pid"

for _ in $(seq 1 10); do
Expand Down Expand Up @@ -653,7 +658,7 @@ do_list() {
# ---------------------------------------------------------------------------

ensure_setup() {
for cmd in nix jq dnsmasq nft mke2fs virtiofsd; do
for cmd in nix jq dnsmasq nft mke2fs virtiofsd setcap getcap; do
command -v "$cmd" &>/dev/null || die "$cmd not found. Install it first."
done
[ -e /dev/kvm ] || die "/dev/kvm not found. KVM is required."
Expand Down Expand Up @@ -856,7 +861,7 @@ cmd_doctor() {
local errors=0

log "==> Checking prerequisites..."
for cmd in nix jq dnsmasq nft mke2fs virtiofsd; do
for cmd in nix jq dnsmasq nft mke2fs virtiofsd setcap getcap; do
if command -v "$cmd" &>/dev/null; then
log_sub "$cmd: OK"
else
Expand All @@ -872,6 +877,13 @@ cmd_doctor() {
errors=$((errors + 1))
fi

local virtiofsd_wrapper="${XDG_DATA_HOME:-$HOME/.local/share}/nixbox/bin/virtiofsd"
if [ -x "$virtiofsd_wrapper" ] && getcap "$virtiofsd_wrapper" 2>/dev/null | grep -q 'cap_dac_read_search'; then
log_sub "virtiofsd cap: OK ($virtiofsd_wrapper)"
else
log_sub "virtiofsd cap: NOT INSTALLED (will install on first 'nixbox up'; needs sudo)"
fi

echo ""
log "==> Checking project config..."
if NIXBOX_DIR="$(find_nixbox_dir 2>/dev/null)"; then
Expand Down
37 changes: 37 additions & 0 deletions docs/decisions/016-virtiofsd-file-handles-capability.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# 016: virtiofsd file-handle mode via setcap'd wrapper

**Date:** 2026-04-27
**Status:** accepted

## Problem

ADR-015 keeps churning caches off virtiofs, but long-lived shares (source trees) still leak FDs under sustained access. `virtiofsd --cache=auto` retains an O_PATH FD per cached inode; with mandatory file-handle mode the cache holds opaque handles instead, freeing the FD slot until the next I/O. This is the only mitigation that addresses the underlying accumulation rather than its symptoms (#18).

`--inode-file-handles=mandatory` calls `name_to_handle_at(2)`, which requires `CAP_DAC_READ_SEARCH`. Without it, virtiofsd refuses to start (`Refusing to use (mandatory) file handles, as they do not appear safe to use`). Today the daemon runs as the host user (UID 1000) under `--sandbox=none` (ADR-001) with no special privileges, so the call returns `EPERM`.

Three privilege models were considered:

1. **Run as root, `--sandbox=none`.** Rejected: with no sandbox there is no per-request `setresuid`, so guest-created files end up owned by root on the host filesystem. Regresses ADR-002.
2. **Run as root, `--sandbox=namespace`.** Rejected: requires the daemon to run as root for one syscall path, and the interaction between namespace credential switching and `--translate-uid` is unverified — no upside over option 4, and any deviation regresses ADR-002. (ADR-001 only proves the *non-root* failure mode, so it doesn't apply here.)
3. **Run as root, `--sandbox=chroot`.** Same root-EUID-on-create problem as option 1.
4. **Grant only `CAP_DAC_READ_SEARCH` on the binary.** Daemon stays at UID 1000; ADR-001 and ADR-002 hold; only the one capability needed by `name_to_handle_at(2)` is added.

## Decision

Option 4. `lib/functions.bash::ensure_virtiofsd_cap` keeps a setcap'd copy of virtiofsd at `${XDG_DATA_HOME:-~/.local/share}/nixbox/bin/virtiofsd` and returns its path; `do_create` and `do_mount` invoke that path with `--inode-file-handles=mandatory`.

`mandatory` over `prefer` because silent fallback to FD-mode would re-introduce the leak with no signal — the failure mode of #18 only surfaces after a long session, exactly the kind of degradation that hides until something OOMs.

A copy under `$XDG_DATA_HOME` rather than `setcap` on the in-store binary, because the host is not NixOS: `/nix/store` is read-only and GC-collectable, `security.wrappers` doesn't apply, and host-package wrappers (`/usr/local/bin`) drift independently of the bundled CLI. A nixbox-managed copy is reinstalled automatically when the source binary realpath changes (e.g., after `nixbox update`), keyed by a sidecar marker file.

`cmd_doctor` reports wrapper status; `nixbox up` triggers (re)install on demand. The first run prompts for sudo (one-time per virtiofsd version), parallel to the existing `sudo prlimit` path in `raise_nofile`.

## Consequences

- The FD ceiling is no longer a function of cache size. Source-tree shares are stable across long sessions.
- **Memory replaces FDs as the long-run ceiling.** Each cached inode now holds an opaque handle in virtiofsd's address space instead of an O_PATH FD. Empirically, ~8 KB per `/nix/store` inode and ~60 KB per workspace inode of virtiofsd RSS. A 524k-inode cache is no longer hitting `RLIMIT_NOFILE`, but it is GBs of resident memory.
- One additional sudo prompt on first `nixbox up` after install or after a virtiofsd version bump.
- **`/proc/<virtiofsd-pid>/fd` is now root-owned.** File capabilities trigger `PR_SET_DUMPABLE=0`, so debugging tools (`lsof -p`, `ls /proc/<pid>/fd`) need sudo. They silently return empty results without it.
- ADR-001 (sandbox=none) and ADR-002 (uid/gid translate) preserved unchanged.
- The granted capability is scoped: `CAP_DAC_READ_SEARCH` bypasses DAC for read/search only, and only matters for paths the daemon already opens via `--shared-dir`. Other DAC checks remain.
- Wrapper drift: invoking nixbox under a non-canonical PATH that resolves a different virtiofsd causes a reinstall. Acceptable — the CLI is a Nix wrapper with a deterministic PATH, so the canonical path is stable.
1 change: 1 addition & 0 deletions docs/decisions/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,3 +23,4 @@ Each file: `NNN-short-title.md` with sections **Problem**, **Decision**, **Conse
| [011](011-guest-setup-scripts.md) | Guest setup via user-provided scripts | 2026-03-24 | accepted |
| [012](012-per-workspace-nixbox-directory.md) | Per-workspace `.nixbox/` directory | 2026-03-24 | accepted |
| [013](013-plugin-env-transparency.md) | Plugins must not inject env vars | 2026-03-24 | accepted |
| [016](016-virtiofsd-file-handles-capability.md) | virtiofsd file-handle mode via setcap'd wrapper | 2026-04-27 | accepted |
38 changes: 38 additions & 0 deletions lib/functions.bash
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,44 @@ raise_nofile() {
|| die "Failed to raise NOFILE soft limit to $target after raising hard limit"
}

# Resolves to a virtiofsd binary that has CAP_DAC_READ_SEARCH set, required for
# --inode-file-handles=mandatory (#18). The capability cannot live on the
# /nix/store path (read-only and GC-collectable), so a copy is kept under
# $XDG_DATA_HOME/nixbox/bin and re-installed when the source binary changes
# (e.g. after `nixbox update`). One sudo prompt per (re)install. Echoes the
# wrapper path on stdout.
ensure_virtiofsd_cap() {
local data_dir="${XDG_DATA_HOME:-$HOME/.local/share}/nixbox"
local wrapper="$data_dir/bin/virtiofsd"
local marker="$data_dir/bin/.virtiofsd.src"
Comment thread
razvanz marked this conversation as resolved.

local src
src=$(command -v virtiofsd 2>/dev/null) \
|| die "virtiofsd not found in PATH"
src=$(realpath "$src") \
|| die "Failed to resolve realpath of virtiofsd"

if [ -x "$wrapper" ] \
&& [ -f "$marker" ] \
&& [ "$(cat "$marker")" = "$src" ] \
&& getcap "$wrapper" 2>/dev/null | grep -q 'cap_dac_read_search'; then
echo "$wrapper"
return 0
fi

log "==> Installing setcap'd virtiofsd at $wrapper (requires sudo)..." >&2
mkdir -p "$data_dir/bin" \
|| die "Failed to create $data_dir/bin"
chmod 700 "$data_dir/bin" \
|| die "Failed to chmod 700 $data_dir/bin"
cp --remove-destination "$src" "$wrapper" \
|| die "Failed to copy virtiofsd to $wrapper"
sudo setcap cap_dac_read_search=ep "$wrapper" \
|| die "Failed to set cap_dac_read_search on $wrapper. --inode-file-handles=mandatory cannot work without it."
Comment thread
razvanz marked this conversation as resolved.
echo "$src" > "$marker"
echo "$wrapper"
}

# ---------------------------------------------------------------------------
# Network derivation (pure — depends only on slot + name)
# ---------------------------------------------------------------------------
Expand Down
Loading