From 6d52ff4b3d90f5959af26827c94308aa3c7505f3 Mon Sep 17 00:00:00 2001 From: Razvan Laurus Date: Fri, 24 Apr 2026 13:00:18 +0200 Subject: [PATCH 1/3] fix(virtiofs): raise virtiofsd FD limit to 524288 65536 was already being pinned by sbt workloads (see #18): observed 64,943/65,536 on the coursier share with virtiofsd surfacing host EMFILE as guest ENFILE. 524288 matches modern systemd user-session defaults and gives ~8x headroom over the observed peak. This is a short-term mitigation. virtiofsd with --cache=auto still accumulates backing-file FDs monotonically; the durable fix is to stop virtiofs-mounting hot caches (coursier, ivy2, npm, cargo, ...) and move them to guest-native paths. Refs #18. Co-Authored-By: Claude Opus 4.7 (1M context) --- bin/nixbox | 9 ++++++--- tests/run-e2e-tests.sh | 4 ++-- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/bin/nixbox b/bin/nixbox index 24f1c9c..7868051 100755 --- a/bin/nixbox +++ b/bin/nixbox @@ -310,8 +310,11 @@ NFTEOF ;; esac - # Raise FD limit before launching virtiofsd and cloud-hypervisor - ulimit -n 65536 + # Raise FD limit before launching virtiofsd and cloud-hypervisor. + # virtiofsd with --cache=auto monotonically accumulates backing-file FDs; + # hot caches (coursier, ivy2) can pin thousands per share. 64k was observed + # to exhaust under sbt workloads (see #18). + ulimit -n 524288 # --- Start virtiofsd for nix-store share (required by microvm config) --- log "==> Starting virtiofsd for nix-store..." @@ -542,7 +545,7 @@ do_mount() { done local virtiofs_sock="$run_dir/virtiofs-${mount_idx}.sock" - ulimit -n 65536 + ulimit -n 524288 virtiofsd \ --socket-path="$virtiofs_sock" \ --shared-dir="$MOUNT_SOURCE" \ diff --git a/tests/run-e2e-tests.sh b/tests/run-e2e-tests.sh index bd77147..6bb1d35 100755 --- a/tests/run-e2e-tests.sh +++ b/tests/run-e2e-tests.sh @@ -55,10 +55,10 @@ for pidfile in .nixbox/state/virtiofsd_*_pid; do pid=$(cat "$pidfile") tag=$(basename "$pidfile" | sed 's/virtiofsd_//;s/_pid//') max_fds=$(awk '/^Max open files/{print $4}' "/proc/$pid/limits") - if [ "$max_fds" -ge 65536 ]; then + if [ "$max_fds" -ge 524288 ]; then echo " ok: virtiofsd ($tag) has $max_fds max FDs" else - echo " FAIL: virtiofsd ($tag) has $max_fds max FDs, expected >= 65536" + echo " FAIL: virtiofsd ($tag) has $max_fds max FDs, expected >= 524288" exit 1 fi done From bad42a10411bc0b32a4ac22c33250840a7753fe4 Mon Sep 17 00:00:00 2001 From: Razvan Laurus Date: Fri, 24 Apr 2026 13:07:21 +0200 Subject: [PATCH 2/3] fix(virtiofs): raise NOFILE hard limit via sudo prlimit when needed CI (and any locked-down shell with a session hard limit below 524288) hit 'ulimit: cannot modify limit: Operation not permitted'. bash's ulimit -n sets both soft and hard, so target > hard fails with EPERM even when root could raise it. Add a raise_nofile helper that tries ulimit first, falls back to 'sudo prlimit --pid \$\$ --nofile=N:N' to raise the kernel hard limit, then sets the soft limit. sudo is already required by nixbox up (for nftables/tap) and is passwordless on GitHub Actions runners. Co-Authored-By: Claude Opus 4.7 (1M context) --- bin/nixbox | 9 +++------ lib/functions.bash | 21 +++++++++++++++++++++ 2 files changed, 24 insertions(+), 6 deletions(-) diff --git a/bin/nixbox b/bin/nixbox index 7868051..6830a7b 100755 --- a/bin/nixbox +++ b/bin/nixbox @@ -310,11 +310,8 @@ NFTEOF ;; esac - # Raise FD limit before launching virtiofsd and cloud-hypervisor. - # virtiofsd with --cache=auto monotonically accumulates backing-file FDs; - # hot caches (coursier, ivy2) can pin thousands per share. 64k was observed - # to exhaust under sbt workloads (see #18). - ulimit -n 524288 + # Raise FD limit before launching virtiofsd and cloud-hypervisor (#18). + raise_nofile 524288 # --- Start virtiofsd for nix-store share (required by microvm config) --- log "==> Starting virtiofsd for nix-store..." @@ -545,7 +542,7 @@ do_mount() { done local virtiofs_sock="$run_dir/virtiofs-${mount_idx}.sock" - ulimit -n 524288 + raise_nofile 524288 virtiofsd \ --socket-path="$virtiofs_sock" \ --shared-dir="$MOUNT_SOURCE" \ diff --git a/lib/functions.bash b/lib/functions.bash index 4d45a5b..f195fb9 100644 --- a/lib/functions.bash +++ b/lib/functions.bash @@ -11,6 +11,27 @@ die() { printf '\r%s\n' "ERROR: $*" >&2; exit 1; } log() { printf '\r%s\n' "$*"; } log_sub() { printf '\r %s\n' "$*"; } +# --------------------------------------------------------------------------- +# Process limits +# --------------------------------------------------------------------------- + +# Raise the current shell's NOFILE soft+hard limit to $1 (default 524288). +# virtiofsd with --cache=auto accumulates backing-file FDs and pins hot-cache +# shares at the ceiling (#18). If the session's hard limit is below target +# (e.g. locked-down CI runners), sudo prlimit raises the kernel limit first +# so bash's ulimit can then set the soft limit. Children inherit both. +raise_nofile() { + local target="${1:-524288}" + if ulimit -n "$target" 2>/dev/null; then + return 0 + fi + log "==> Raising NOFILE hard limit to $target (requires sudo)..." + sudo prlimit --pid $$ --nofile="$target:$target" \ + || die "Failed to raise NOFILE hard limit to $target" + ulimit -n "$target" \ + || die "ulimit -n $target failed after raising hard limit" +} + # --------------------------------------------------------------------------- # Network derivation (pure — depends only on slot + name) # --------------------------------------------------------------------------- From 857a8b1aece28ae9b23bd50e73b070aa7454cf8e Mon Sep 17 00:00:00 2001 From: Razvan Laurus Date: Fri, 24 Apr 2026 13:23:23 +0200 Subject: [PATCH 3/3] refactor(virtiofs): address Copilot review on raise_nofile - Don't lower an already-higher hard limit: ulimit -n sets both soft and hard, so target < current_hard would silently downgrade. Read current limits and only raise what's below target; use -Sn for soft-only paths. - Use $BASHPID instead of $$ in sudo prlimit --pid: $$ yields the top- level shell PID even inside subshells, which would target the wrong process. $BASHPID always reflects the current bash. - Reword helper comment: prlimit adjusts per-process rlimits, not a kernel-wide limit like fs.nr_open. Co-Authored-By: Claude Opus 4.7 (1M context) --- lib/functions.bash | 31 ++++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/lib/functions.bash b/lib/functions.bash index f195fb9..a3f845b 100644 --- a/lib/functions.bash +++ b/lib/functions.bash @@ -15,21 +15,34 @@ log_sub() { printf '\r %s\n' "$*"; } # Process limits # --------------------------------------------------------------------------- -# Raise the current shell's NOFILE soft+hard limit to $1 (default 524288). -# virtiofsd with --cache=auto accumulates backing-file FDs and pins hot-cache -# shares at the ceiling (#18). If the session's hard limit is below target -# (e.g. locked-down CI runners), sudo prlimit raises the kernel limit first -# so bash's ulimit can then set the soft limit. Children inherit both. +# Raise the current shell's NOFILE soft limit to $1 (default 524288) without +# lowering an already-higher hard limit. virtiofsd with --cache=auto +# accumulates backing-file FDs and pins hot-cache shares at the ceiling (#18). +# If the session's hard limit is below target (e.g. locked-down CI runners), +# sudo prlimit raises this shell process's NOFILE hard+soft limits so bash's +# ulimit can then succeed. Children inherit both. raise_nofile() { local target="${1:-524288}" - if ulimit -n "$target" 2>/dev/null; then + local current_soft current_hard + + current_soft=$(ulimit -Sn) || die "Failed to read NOFILE soft limit" + current_hard=$(ulimit -Hn) || die "Failed to read NOFILE hard limit" + + if [ "$current_soft" = "unlimited" ] || [ "$current_soft" -ge "$target" ]; then + return 0 + fi + + if [ "$current_hard" = "unlimited" ] || [ "$current_hard" -ge "$target" ]; then + ulimit -Sn "$target" \ + || die "Failed to raise NOFILE soft limit to $target" return 0 fi + log "==> Raising NOFILE hard limit to $target (requires sudo)..." - sudo prlimit --pid $$ --nofile="$target:$target" \ + sudo prlimit --pid "$BASHPID" --nofile="$target:$target" \ || die "Failed to raise NOFILE hard limit to $target" - ulimit -n "$target" \ - || die "ulimit -n $target failed after raising hard limit" + ulimit -Sn "$target" \ + || die "Failed to raise NOFILE soft limit to $target after raising hard limit" } # ---------------------------------------------------------------------------